library(xml2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
url="https://www.esd.wa.gov/labormarketinfo/unemployment-insurance-data"
doc1 =xml2::read_html(url)
doc1_a = xml2::xml_find_all(doc1, "//a")
unpack_a <- function (doc) {
c1 = xml2::xml_text(doc)
c2 = xml2::xml_attr(doc, 'href')
c1 = ifelse(is.null(c1),'',c1)
c2 = ifelse(is.null(c2),'',c2)
tibble::tibble(c1 = c1, c2 = c2)
}
xx1 = purrr::map_dfr(doc1_a,unpack_a) %>%
dplyr::filter(c1 == 'initial claims by County') %>%
dplyr::pull(c2)
xx1
#> [1] "https://esdorchardstorage.blob.core.windows.net/esdwa/Default/ESDWAGOV/labor-market-info/Libraries/Regional-reports/UI-Claims-Karen/COVID19%20Docs/County%20weekly%20initial%20claims%20for%202020%20(21).xlsx"
Created on 2020-06-09 by the reprex package (v0.3.0)