Extracting from nested list


#1

I'm using the googleway package to collect geolocation data for a long list of locations, and I want to figure out how to do it better, e.g. with purrr.

Here's a single example:

library(googleway)
# you'll need an API key
loc <- google_geocode("151 third st. sf, ca", key = my_key)

Here's the output I get:

loc <- structure(list(results = structure(list(address_components = list(
    structure(list(long_name = c("151", "3rd Street", "South Beach", 
    "San Francisco", "San Francisco County", "California", "United States", 
    "94103", "3107"), short_name = c("151", "3rd St", "South Beach", 
    "SF", "San Francisco County", "CA", "US", "94103", "3107"
    ), types = list("street_number", "route", c("neighborhood", 
    "political"), c("locality", "political"), c("administrative_area_level_2", 
    "political"), c("administrative_area_level_1", "political"
    ), c("country", "political"), "postal_code", "postal_code_suffix")), .Names = c("long_name", 
    "short_name", "types"), class = "data.frame", row.names = c(NA, 
    9L))), formatted_address = "151 3rd St, San Francisco, CA 94103, USA", 
    geometry = structure(list(location = structure(list(lat = 37.7859072, 
        lng = -122.4008003), .Names = c("lat", "lng"), class = "data.frame", row.names = 1L), 
        location_type = "ROOFTOP", viewport = structure(list(
            northeast = structure(list(lat = 37.7872561802915, 
                lng = -122.399451319709), .Names = c("lat", "lng"
            ), class = "data.frame", row.names = 1L), southwest = structure(list(
                lat = 37.7845582197085, lng = -122.402149280291), .Names = c("lat", 
            "lng"), class = "data.frame", row.names = 1L)), .Names = c("northeast", 
        "southwest"), class = "data.frame", row.names = 1L)), .Names = c("location", 
    "location_type", "viewport"), class = "data.frame", row.names = 1L), 
    place_id = "ChIJ23gaZn2AhYARH-8DeKxyEw0", plus_code = structure(list(
        compound_code = "QHPX+9M South of Market, San Francisco, CA, United States", 
        global_code = "849VQHPX+9M"), .Names = c("compound_code", 
    "global_code"), class = "data.frame", row.names = 1L), types = list(
        "street_address")), .Names = c("address_components", 
"formatted_address", "geometry", "place_id", "plus_code", "types"
), class = "data.frame", row.names = 1L), status = "OK"), .Names = c("results", 
"status"))

I'm particularly interested in a few pieces, like
loc[["results"]][["geometry"]][["location"]] and
loc[["results"]][["geometry"]][["viewport"]].

Let's say I have a list that combines multiple of these:
locs <- list(loc, loc, loc)

How could I extract all the "locations" and "viewports" from every component of the list?


#2

Hi,

you can use pluck to extract or map extraction feature using map(l, "somestring"), or even create a function that extract what you want of a single loc list, then map that function on the long list. You'll have to deal with the returned type depending on how you want to organize your results at the end (list, data.frame, tibble, nested tibble, single vectors...)

Here is an example:

library(purrr)
# results and geometry are data.frames, 
# location is a data.frame and 
# viewport, a data.frame with nested data.frame.
# not easy to handle...
loc %>% str(4)
#> List of 2
#>  $ results:'data.frame': 1 obs. of  6 variables:
#>   ..$ address_components:List of 1
#>   .. ..$ :'data.frame':  9 obs. of  3 variables:
#>   .. .. ..$ long_name : chr [1:9] "151" "3rd Street" "South Beach" "San Francisco" ...
#>   .. .. ..$ short_name: chr [1:9] "151" "3rd St" "South Beach" "SF" ...
#>   .. .. ..$ types     :List of 9
#>   ..$ formatted_address : chr "151 3rd St, San Francisco, CA 94103, USA"
#>   ..$ geometry          :'data.frame':   1 obs. of  3 variables:
#>   .. ..$ location     :'data.frame': 1 obs. of  2 variables:
#>   .. .. ..$ lat: num 37.8
#>   .. .. ..$ lng: num -122
#>   .. ..$ location_type: chr "ROOFTOP"
#>   .. ..$ viewport     :'data.frame': 1 obs. of  2 variables:
#>   .. .. ..$ northeast:'data.frame':  1 obs. of  2 variables:
#>   .. .. ..$ southwest:'data.frame':  1 obs. of  2 variables:
#>   ..$ place_id          : chr "ChIJ23gaZn2AhYARH-8DeKxyEw0"
#>   ..$ plus_code         :'data.frame':   1 obs. of  2 variables:
#>   .. ..$ compound_code: chr "QHPX+9M South of Market, San Francisco, CA, United States"
#>   .. ..$ global_code  : chr "849VQHPX+9M"
#>   ..$ types             :List of 1
#>   .. ..$ : chr "street_address"
#>  $ status : chr "OK"
# work on a list
loc %>%
  pluck("results", "geometry") %>% str(1)
#> 'data.frame':    1 obs. of  3 variables:
#>  $ location     :'data.frame':   1 obs. of  2 variables:
#>  $ location_type: chr "ROOFTOP"
#>  $ viewport     :'data.frame':   1 obs. of  2 variables:
# build the long list
long_list <- list(loc, loc, loc)
# if you just want to subset what is inside the list
map(long_list, pluck, 'results', 'geometry') %>% str(2)
#> List of 3
#>  $ :'data.frame':    1 obs. of  3 variables:
#>   ..$ location     :'data.frame':    1 obs. of  2 variables:
#>   ..$ location_type: chr "ROOFTOP"
#>   ..$ viewport     :'data.frame':    1 obs. of  2 variables:
#>  $ :'data.frame':    1 obs. of  3 variables:
#>   ..$ location     :'data.frame':    1 obs. of  2 variables:
#>   ..$ location_type: chr "ROOFTOP"
#>   ..$ viewport     :'data.frame':    1 obs. of  2 variables:
#>  $ :'data.frame':    1 obs. of  3 variables:
#>   ..$ location     :'data.frame':    1 obs. of  2 variables:
#>   ..$ location_type: chr "ROOFTOP"
#>   ..$ viewport     :'data.frame':    1 obs. of  2 variables:

# organize the result the way you want

## Extraction function
extract_col <- function(l, col) {
  l$results$geometry[[col]]
}
# list of data.frame
list(
  location =  long_list %>% map_df(extract_col, "location"),
  viewport_southwest =  long_list %>% map(extract_col, "viewport") %>% map_df("southwest"),
  viewport_northeast =  long_list %>% map(extract_col, "viewport") %>% map_df("northeast")
)
#> $location
#>        lat       lng
#> 1 37.78591 -122.4008
#> 2 37.78591 -122.4008
#> 3 37.78591 -122.4008
#> 
#> $viewport_southwest
#>        lat       lng
#> 1 37.78456 -122.4021
#> 2 37.78456 -122.4021
#> 3 37.78456 -122.4021
#> 
#> $viewport_northeast
#>        lat       lng
#> 1 37.78726 -122.3995
#> 2 37.78726 -122.3995
#> 3 37.78726 -122.3995

Created on 2018-10-25 by the reprex package (v0.2.1)

I let you play with that. :wink:

PS: and many thanks for the reprex! it really helped !


#3

Adding a bit to @cderv already great answer, you don't need to even write a function to extract your elements yourself. Internally, purrr will convert them to very similar function anyway if you provide it with the vector of what you want to extract like so:

map(l, c("results", "geometry", "location"))
map(l, c("results", "geometry", "viewport"))

The result is just as easy to handle:

loc <- structure(list(results = structure(list(address_components = list(
  structure(list(long_name = c("151", "3rd Street", "South Beach", 
                               "San Francisco", "San Francisco County", "California", "United States", 
                               "94103", "3107"), short_name = c("151", "3rd St", "South Beach", 
                                                                "SF", "San Francisco County", "CA", "US", "94103", "3107"
                               ), types = list("street_number", "route", c("neighborhood", 
                                                                           "political"), c("locality", "political"), c("administrative_area_level_2", 
                                                                                                                       "political"), c("administrative_area_level_1", "political"
                                                                                                                       ), c("country", "political"), "postal_code", "postal_code_suffix")), .Names = c("long_name", 
                                                                                                                                                                                                       "short_name", "types"), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                   9L))), formatted_address = "151 3rd St, San Francisco, CA 94103, USA", 
  geometry = structure(list(location = structure(list(lat = 37.7859072, 
                                                      lng = -122.4008003), .Names = c("lat", "lng"), class = "data.frame", row.names = 1L), 
                            location_type = "ROOFTOP", viewport = structure(list(
                              northeast = structure(list(lat = 37.7872561802915, 
                                                         lng = -122.399451319709), .Names = c("lat", "lng"
                                                         ), class = "data.frame", row.names = 1L), southwest = structure(list(
                                                           lat = 37.7845582197085, lng = -122.402149280291), .Names = c("lat", 
                                                                                                                        "lng"), class = "data.frame", row.names = 1L)), .Names = c("northeast", 
                                                                                                                                                                                   "southwest"), class = "data.frame", row.names = 1L)), .Names = c("location", 
                                                                                                                                                                                                                                                    "location_type", "viewport"), class = "data.frame", row.names = 1L), 
  place_id = "ChIJ23gaZn2AhYARH-8DeKxyEw0", plus_code = structure(list(
    compound_code = "QHPX+9M South of Market, San Francisco, CA, United States", 
    global_code = "849VQHPX+9M"), .Names = c("compound_code", 
                                             "global_code"), class = "data.frame", row.names = 1L), types = list(
                                               "street_address")), .Names = c("address_components", 
                                                                              "formatted_address", "geometry", "place_id", "plus_code", "types"
                                               ), class = "data.frame", row.names = 1L), status = "OK"), .Names = c("results", 
                                                                                                                    "status"))
l <- list(loc, loc, loc)

library(purrr)

map(l, c("results", "geometry", "location"))
#> [[1]]
#>        lat       lng
#> 1 37.78591 -122.4008
#> 
#> [[2]]
#>        lat       lng
#> 1 37.78591 -122.4008
#> 
#> [[3]]
#>        lat       lng
#> 1 37.78591 -122.4008
map(l, c("results", "geometry", "viewport"))
#> [[1]]
#>   northeast.lat northeast.lng southwest.lat southwest.lng
#> 1      37.78726     -122.3995      37.78456     -122.4021
#> 
#> [[2]]
#>   northeast.lat northeast.lng southwest.lat southwest.lng
#> 1      37.78726     -122.3995      37.78456     -122.4021
#> 
#> [[3]]
#>   northeast.lat northeast.lng southwest.lat southwest.lng
#> 1      37.78726     -122.3995      37.78456     -122.4021

Created on 2018-10-25 by the reprex package (v0.2.1)


#4

Many thanks @cderv and @mishabalyasin! I really appreciate you walking through it and showing multiple alternatives. I had spent a while digging through purrr tutorials and trying different combinations of parameters with pluck, but hadn't figured out quite the right syntax.