Hi,
I have not much experience with XML, but I got intrigued and played with the functions until I found something that might be quicker.... I noticed that the first xml_add_child process was the bottleneck, and I found our that xml_add_sibling works way faster (don't ask me why
)
In order for a sibling to be added, there needs to be already a child, so I created an empty one that I removed in the end. This led me to rewrite the function like this:
library(xml2)
library(dplyr)
missing <- c(-1, -2, -3)
values <- c("Very weak" = 1, "Weak" = 2, "Middle" = 3, "Strong" = 4, "Very strong" = 5, "Don't know" = -1)
root <- xml_new_document()
codeBook <- xml_add_child(root, "codeBook")
dataDscr <- xml_add_child(codeBook, "dataDscr")
#Add empty child
empty <- xml_add_child(dataDscr, "empty")
for (i in 1:1000) {
var <- xml_add_sibling(empty, "var", name = paste("V", i, sep = "_"), .where = "before")
if (TRUE) { # something needs to be checked here, as an example
xml_attr(var, "nature") <- "ordinal"
xml_attr(var, "representationType") <- "text"
}
labl1 <- xml_add_child(var, "labl")
xml_text(labl1) <- paste("Variable label for V", i, sep = "_")
for (v in seq(length(values))) {
ismiss <- is.element(values[v], missing)
catgry <- xml_add_sibling(labl1, "catgry", .where = "before")
if (ismiss) xml_attr(catgry, "missing") <- "Y"
catValu <- xml_add_child(catgry, "catValu")
xml_text(catValu) <- as.character(values[v])
labl <- xml_add_sibling(catValu, "labl", .where = "before")
xml_text(labl) <- names(values)[v]
}
}
#Get rid of empty child
xml_remove(empty)
This code runs much faster now.
Just on my journey to finding a result, I also combined both xml functions and paste and found a way to create the set almost instantaneously (though this code is much muddier so I'm not a huge fan):
for (i in 1:1000) {
#Add sibling to empty child (very fast)
empty %>% xml_add_sibling("var", name = paste("V", i, sep = "_"),
nature = if(T){"ordinal"}, xml_attr = if(T){"text"}, .where = "before") %>%
xml_add_child("labl", paste0("Variable label for V_", i)) %>%
xml_add_sibling(read_xml(
#Group the categories together to be able to paste them all together (need root)
paste0("<catgrys>",
paste0("<catgry",unlist(sapply(is.element(values, missing), function(x){ifelse(x, ' missing="Y"', "")})),
"><catValu>",values, "</catValu><labl>", names(values),"</labl></catgry>",
collapse = ""), "</catgrys>")
))
}
Let me know if you find other ways!
PJ