Transform list into a long dataframe

Hi,
I have a dataset that looks like the below. I don't have much experience with lists so this would be a good chance to tackle it. Basically each year spreadsheet with information for those respective years.

library(tidyverse)
# I have a large list that contains the data as below:
y_list <- list(
  
y_2008 <- tibble(Zipcode = c("90045","90050","90048","90064","90045"),
       Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
       Employment = c(1106322,1106322,1106328,1106388,1106322),
       Code = c(NA,NA,321,123,NA)),


y_2009 <- tibble(Zipcode = c("90045","90050","90048","90064","90045"),
                 Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                 Employment = c(1106322,1106322,1106328,1106388,1106322),
                 Code = c(NA,NA,321,123,NA)),


y_2010 <- tibble(Zipcode = c("93345","93350","93348","93364","93345"),
                 Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                 Employment = c(1106322,1106322,1106328,1106388,1106322),
                 Code = c(NA,NA,321,123,NA)),


y_2011 <- tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                 Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                 Employment = c(1106322,1106322,1106328,1106388,1106322),
                 Code = c(NA,NA,321,"****",NA)),

y_2012 <- tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                 Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                 Employment = c(1106322,1106322,1106328,1106388,1106322),
                 Code = c(NA,NA,321,"****",NA)),

y_2013 <- tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                 Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                 Employment = c(1106322,1106322,1106328,1106388,1106322),
                 Code = c(NA,NA,321,"****",NA))
)

# Setting object names for the list:
names(y_list) <- c("y_2008","y_2009","y_2010","y_2011", "y_20012","y_2013")

Firstly, I would like to convert this into a long dataframe with a year column.

# How to transform into a single dataframe with a column for year.
y_list %>% unnest(`cols = c(y_2008, y_2009, y_2010, y_2011, y_20012, y_2013)` )

Not sure as well how I would handle applying functions when in list form for example, its unclear for me how to instruct R to apply for each list object, on column "Code" apply str_replace()...

# Not used to handling lists, how to apply the string replace to all "****" to zero?
y_list %>% map(Code,str_replace("****",0))

The examples I found weren't clear and I could not make it work.

Thanks for your help in advance.

Here is how to do it

library(tidyverse)
# I have a large list that contains the data as below:
y_list <- list(
    
    y_2008 = tibble(Zipcode = c("90045","90050","90048","90064","90045"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,123,NA)),
    
    
    y_2009 = tibble(Zipcode = c("90045","90050","90048","90064","90045"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,123,NA)),
    
    
    y_2010 = tibble(Zipcode = c("93345","93350","93348","93364","93345"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,123,NA)),
    
    
    y_2011 = tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,"****",NA)),
    
    y_2012 = tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,"****",NA)),
    
    y_2013 = tibble(Zipcode = c("98745","98750","98748","98764","98745"),
                     Industry = c("Agriculture", "Construction", "Mining","Wage","Wholesale"),
                     Employment = c(1106322,1106322,1106328,1106388,1106322),
                     Code = c(NA,NA,321,"****",NA))
)

# Setting object names for the list:
names(y_list) <- c("y_2008","y_2009","y_2010","y_2011", "y_20012","y_2013")

map(y_list, .f = ~ mutate(., Code = as.numeric(str_replace_all(Code, '\\*{4}', '0')))) %>% 
    map_dfr(.f = ~., .id = 'Year') %>% 
    mutate(Year = as.numeric(str_remove(Year, "y_")))
#> # A tibble: 30 × 5
#>     Year Zipcode Industry     Employment  Code
#>    <dbl> <chr>   <chr>             <dbl> <dbl>
#>  1  2008 90045   Agriculture     1106322    NA
#>  2  2008 90050   Construction    1106322    NA
#>  3  2008 90048   Mining          1106328   321
#>  4  2008 90064   Wage            1106388   123
#>  5  2008 90045   Wholesale       1106322    NA
#>  6  2009 90045   Agriculture     1106322    NA
#>  7  2009 90050   Construction    1106322    NA
#>  8  2009 90048   Mining          1106328   321
#>  9  2009 90064   Wage            1106388   123
#> 10  2009 90045   Wholesale       1106322    NA
#> # … with 20 more rows

Created on 2021-09-23 by the reprex package (v2.0.1)

1 Like

Hi @andresrcs ,

Thanks for the assistance!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.