I have to work with multiple xml files (and some json - but for now xml) of different depths and variable structure. Much of the information I need is stored in attributes. I am looking for the most elegant solution which can be adapted by users who are not going to go in deep with R programming. They would know the basic xml sections and key attributes and would use the data frame exported to Excel ( or maybe rendered in Shiny or a markdown document).
library(tidyverse)
library(rlang)
library(xml2)
library(reprex)
my_xml_file <- "<results>
<result name='math' status='pass' person='winkin'>90</result>
<result name='lit' status='fail' person='blinkin'>60</result>
<result name='music' status='incl' person='nod'>absent</result>
</results>"
my_xml <- read_xml(my_xml_file)
my_xml_list = xml_find_all(my_xml,'//result')
# process xml as in Jenny Bryan example
rows_df <- tibble(row=seq_along(my_xml_list), nodeset=my_xml_list)
# extract info from xml to build more usable dataframe
cells_df <- rows_df %>%
mutate(name = nodeset %>% map_chr(~xml_attr(.,'name')))
# can (most of) the following be done using tidyeval?
cells_df <- rows_df %>%
mutate(name = nodeset %>% map_chr(~xml_attr(.,'name'))) %>%
mutate(status = nodeset %>% map_chr(~xml_attr(.,'status'))) %>%
mutate(person = nodeset %>% map_chr(~xml_attr(.,'person'))) %>%
select(-nodeset)
# want a function to build 1 mutate statement to get each enumerated var
# how much tidyeval to use - is cement() from Lionel Henry webinar useful
my_fun <- function(.data, var){
var_as_txt <- quo_name(enquo(var))
.data %>%
mutate( {{var}} := nodeset %>% map_chr(~xml_attr(., !!var_as_txt ) )) %>%
select(-nodeset) }
# That works, but would like to it be
# 1) more elegant - proper use of tidyeval functions and
# 2) dplyr/purrr - take multiple vars and keep results in 1 dataframe
my_fun(rows_df, status)
#> # A tibble: 3 x 2
#> row status
#> <int> <chr>
#> 1 1 pass
#> 2 2 fail
#> 3 3 incl
my_fun(rows_df, name)
#> # A tibble: 3 x 2
#> row name
#> <int> <chr>
#> 1 1 math
#> 2 2 lit
#> 3 3 music
my_fun(rows_df, person)
#> # A tibble: 3 x 2
#> row person
#> <int> <chr>
#> 1 1 winkin
#> 2 2 blinkin
#> 3 3 nod
# looking to have something like this:
# tbl_of_xml_nodesets %>% get_identified_attributes(attribute1, attribute2, attribute3, etc.)