Converting xml file to csv

Hi ,
I was trying to convert XML file into csv but getting this error message .

library(httr)
library(XML)
url <-"https://docs.misoenergy.org/marketreports/MISOdaily0022020.xml"

r = GET(url)
doc <- xmlParse(r)
# class
class(doc)
# parsing an xml document into an R structure
doc = xmlParse(r,useInternalNodes = TRUE)
doc
xL<-xmlToList(doc)
data<-ldply(xL,data.frame)
head(data)

> data<-ldply(xL,data.frame)
**Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE,  : **
**  arguments imply differing number of rows: 2, 5**
```r
Thanks .

Krofox

I did not know the dataset, so the first thing to do is have a look with an internet browser.
There I see that the dataset contains various Data_Types.

In the code below I handle only the 'NG' .
The others (most probably necessary to interpret the results of the 'NG' type) I leave as an excercise for the reader :wink:


library(xml2)

url <-"https://docs.misoenergy.org/marketreports/MISOdaily0022020.xml"

doc <- read_xml(url)
doc = xml_ns_strip(doc)

# which Data_Type's exist
x1 = xml_find_all(doc,'//PostingHeader')
x2 = purrr::map_chr(x1, ~xml_attr(.,"Data_Type") )
unique(x2)
#> [1] "UTC0" "UTC1" "UTC2" "DF"   "FLOW" "UTCD" "D"    "NG"

# retrieve data for Data_Type "NG"
x3 = xml_find_all(doc,'//PostingHeader[@Data_Type ="NG"]')
x4 = purrr::map_dfr(x3,
           function(ph){
            x1 = tibble::tibble(
              Data_Code = xml_attr(ph,"Data_Code"),
              Data_Date = xml_attr(ph,"Data_Date")
                           )
            y  = xml_find_all(ph,'HourlyIndicatedValue')
            x2 = purrr::map_dfr(y,
               function(hiv){
                 tibble::tibble(
                 PostedValue = as.numeric(xml_attr(hiv,"PostedValue")),
                 Hour = as.numeric(xml_attr(hiv,"Hour"))
                 ) }
            )
            cbind(x1,x2)}
            )
# first 
head(x4)
#>   Data_Code Data_Date PostedValue Hour
#> 1       COL   0012020       17664    1
#> 2       COL   0012020       16336    2
#> 3       COL   0012020       16046    3
#> 4       COL   0012020       15712    4
#> 5       COL   0012020       14526    5
#> 6       COL   0012020       14674    6

Created on 2020-07-07 by the reprex package (v0.3.0)

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.