Combine multiple data frames into list to run through loop

Hello!

I have a set of 12 dataframes that I want to compile into a list so that I can iterate each dataframe through a function via a loop.

Here is the function:

f <- function(siteyear){
  siteyear %>% select(Date, X_00060_00003) %>% rename(Discharge_cfs = X_00060_00003)
}

Here's an example of how I've tried to store that data as a list:

dfs <- as.list(gra15, stj05, stj10)

But the resulting output doesn't give me the full list of dataframes. Maybe a list isn't what I need, but I need to store these dataframes into some sort of item that allows me to pass it through the above function.

Here are some of the dataframes (scaled way down):

gra15 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
"USGS", "USGS"), site_no = c("04119400", "04119400", "04119400", 
"04119400", "04119400", "04119400"), Date = structure(c(16436, 
16437, 16438, 16439, 16440, 16441), class = "Date"), X_00060_00003 = c(4800, 
4570, 4650, 4790, 4610, 3860), X_00060_00003_cd = c("A e", "A", 
"A e", "A e", "A e", "A e")), url = "https://waterservices.usgs.gov/nwis/dv/?site=04119400&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2015-01-01&endDT=2015-12-31", siteInfo = structure(list(
    station_nm = "GRAND RIVER NEAR EASTMANVILLE, MI", site_no = "04119400", 
    agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", 
    dec_lat_va = 43.0241884, dec_lon_va = -86.0264354, srs = "EPSG:4326", 
    siteTypeCd = "ST", hucCd = "04050006", stateCd = "26", countyCd = "26139", 
    network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), variableInfo = structure(list(
    variableCode = "00060", variableName = "Streamflow, ft&#179;/s", 
    variableDescription = "Discharge, cubic feet per second", 
    valueType = "Derived Value", unit = "ft3/s", options = "Mean", 
    noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", statisticInfo = structure(list(
    statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, 
-1L)), queryTime = structure(1631203394.95933, class = c("POSIXct", 
"POSIXt")), row.names = c(NA, 6L), class = "data.frame")
> 

stj05 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
"USGS", "USGS"), site_no = c("04101500", "04101500", "04101500", 
"04101500", "04101500", "04101500"), Date = structure(c(12784, 
12785, 12786, 12787, 12788, 12789), class = "Date"), X_00060_00003 = c(4330, 
4100, 4480, 5570, 5760, 5520), X_00060_00003_cd = c("A", "A", 
"A", "A", "A", "A")), url = "https://waterservices.usgs.gov/nwis/dv/?site=04101500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2005-01-01&endDT=2005-12-31", siteInfo = structure(list(
    station_nm = "ST. JOSEPH RIVER AT NILES, MI", site_no = "04101500", 
    agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", 
    dec_lat_va = 41.8292138, dec_lon_va = -86.2597325, srs = "EPSG:4326", 
    siteTypeCd = "ST", hucCd = "04050001", stateCd = "26", countyCd = "26021", 
    network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), variableInfo = structure(list(
    variableCode = "00060", variableName = "Streamflow, ft&#179;/s", 
    variableDescription = "Discharge, cubic feet per second", 
    valueType = "Derived Value", unit = "ft3/s", options = "Mean", 
    noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", statisticInfo = structure(list(
    statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, 
-1L)), queryTime = structure(1631201435.34722, class = c("POSIXct", 
"POSIXt")), row.names = c(NA, 6L), class = "data.frame")

stj10 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
"USGS", "USGS"), site_no = c("04101500", "04101500", "04101500", 
"04101500", "04101500", "04101500"), Date = structure(c(14610, 
14611, 14612, 14613, 14614, 14615), class = "Date"), X_00060_00003 = c(4500, 
4270, 3670, 3900, 4000, 4180), X_00060_00003_cd = c("A", "A", 
"A", "A", "A", "A")), url = "https://waterservices.usgs.gov/nwis/dv/?site=04101500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2010-01-01&endDT=2010-12-31", siteInfo = structure(list(
    station_nm = "ST. JOSEPH RIVER AT NILES, MI", site_no = "04101500", 
    agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", 
    dec_lat_va = 41.8292138, dec_lon_va = -86.2597325, srs = "EPSG:4326", 
    siteTypeCd = "ST", hucCd = "04050001", stateCd = "26", countyCd = "26021", 
    network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), variableInfo = structure(list(
    variableCode = "00060", variableName = "Streamflow, ft&#179;/s", 
    variableDescription = "Discharge, cubic feet per second", 
    valueType = "Derived Value", unit = "ft3/s", options = "Mean", 
    noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", statisticInfo = structure(list(
    statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, 
-1L)), queryTime = structure(1631201470.81118, class = c("POSIXct", 
"POSIXt")), row.names = c(NA, 6L), class = "data.frame")

Any help would be so greatly appreciated! Thanks so much!

I think you need to use the list() function, not as.list(). If you want to replace each data frame with the result of the function, you can do it like this.

dfs <- list(gra15, stj05, stj10)
f <- function(siteyear){
  siteyear %>% select(Date, X_00060_00003) %>% rename(Discharge_cfs = X_00060_00003)
}
for (i in 1:3) {
  dfs[[i]] <- f(dfs[[i]])
}
1 Like

First of all, thanks so much for helping!

Second, the list() function absolutely worked, however iterating it did not. When I first run the loop, it seems as though nothing happens. The dataframes don't change. When I run it again, it says:

Error: Can't subset columns that don't exist.
x Column `X_00060_00003` doesn't exist.

As though the dataframes DID change. As though the columns were already renamed, however, when I check them, they remain as they were before running them through the function.

What are you doing that leads to you saying "it seems as though nothing happens"?
Here is what I get with the small example data sets you posted. The function certainly changes the data frames.

library(dplyr)
gra15 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
                                      "USGS", "USGS"), 
                        site_no = c("04119400", "04119400", "04119400", 
                                    "04119400", "04119400", "04119400"), 
                        Date = structure(c(16436,16437, 16438, 16439, 16440, 16441), class = "Date"), 
                        X_00060_00003 = c(4800,4570, 4650, 4790, 4610, 3860), 
                        X_00060_00003_cd = c("A e", "A", "A e", "A e", "A e", "A e")), 
                   url = "https://waterservices.usgs.gov/nwis/dv/?site=04119400&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2015-01-01&endDT=2015-12-31", 
                   siteInfo = structure(list(station_nm = "GRAND RIVER NEAR EASTMANVILLE, MI", site_no = "04119400", agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", dec_lat_va = 43.0241884, dec_lon_va = -86.0264354, srs = "EPSG:4326", siteTypeCd = "ST", hucCd = "04050006", stateCd = "26", countyCd = "26139", network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), 
                   variableInfo = structure(list(variableCode = "00060", variableName = "Streamflow, ft&#179;/s", variableDescription = "Discharge, cubic feet per second", valueType = "Derived Value", unit = "ft3/s", options = "Mean", noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", 
                   statisticInfo = structure(list(statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, -1L)), queryTime = structure(1631203394.95933, class = c("POSIXct", "POSIXt")), row.names = c(NA, 6L), class = "data.frame")

  
  stj05 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
                                        "USGS", "USGS"), 
                          site_no = c("04101500", "04101500", "04101500", 
                                                                     "04101500", "04101500", "04101500"), 
                          Date = structure(c(12784, 12785, 12786, 12787, 12788, 12789), class = "Date"), 
                          X_00060_00003 = c(4330, 4100, 4480, 5570, 5760, 5520), 
                          X_00060_00003_cd = c("A", "A", "A", "A", "A", "A")), url = "https://waterservices.usgs.gov/nwis/dv/?site=04101500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2005-01-01&endDT=2005-12-31", siteInfo = structure(list(
                                                         station_nm = "ST. JOSEPH RIVER AT NILES, MI", site_no = "04101500", 
                                                         agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", 
                                                         dec_lat_va = 41.8292138, dec_lon_va = -86.2597325, srs = "EPSG:4326", 
                                                         siteTypeCd = "ST", hucCd = "04050001", stateCd = "26", countyCd = "26021", 
                                                         network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), variableInfo = structure(list(
                                                         variableCode = "00060", variableName = "Streamflow, ft&#179;/s", 
                                                         variableDescription = "Discharge, cubic feet per second", 
                                                         valueType = "Derived Value", unit = "ft3/s", options = "Mean", 
                                                         noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", statisticInfo = structure(list(
                                                         statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, 
                                                         -1L)), queryTime = structure(1631201435.34722, class = c("POSIXct", 
                                                         "POSIXt")), row.names = c(NA, 6L), class = "data.frame")

stj10 <- structure(list(agency_cd = c("USGS", "USGS", "USGS", "USGS", 
                                      "USGS", "USGS"), 
                        site_no = c("04101500", "04101500", "04101500", 
                                                                   "04101500", "04101500", "04101500"), Date = structure(c(14610, 
                        14611, 14612, 14613, 14614, 14615), class = "Date"), 
                        X_00060_00003 = c(4500, 4270, 3670, 3900, 4000, 4180), 
                        X_00060_00003_cd = c("A", "A", "A", "A", "A", "A")), url = "https://waterservices.usgs.gov/nwis/dv/?site=04101500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT=2010-01-01&endDT=2010-12-31", siteInfo = structure(list(
                        station_nm = "ST. JOSEPH RIVER AT NILES, MI", site_no = "04101500", 
                        agency_cd = "USGS", timeZoneOffset = "-05:00", timeZoneAbbreviation = "EST", 
                        dec_lat_va = 41.8292138, dec_lon_va = -86.2597325, srs = "EPSG:4326", 
                        siteTypeCd = "ST", hucCd = "04050001", stateCd = "26", countyCd = "26021", 
                        network = "NWIS"), row.names = c(NA, -1L), class = "data.frame"), variableInfo = structure(list(
                        variableCode = "00060", variableName = "Streamflow, ft&#179;/s", 
                        variableDescription = "Discharge, cubic feet per second", 
                        valueType = "Derived Value", unit = "ft3/s", options = "Mean", 
                        noDataValue = NA), row.names = c(NA, -1L), class = "data.frame"), disclaimer = "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", statisticInfo = structure(list(
                        statisticCd = "00003", statisticName = "Mean"), class = "data.frame", row.names = c(NA, 
                        -1L)), queryTime = structure(1631201470.81118, class = c("POSIXct", 
                        "POSIXt")), row.names = c(NA, 6L), class = "data.frame")
dfs <- list(gra15, stj05, stj10)
f <- function(siteyear){
  siteyear %>% select(Date, X_00060_00003) %>% rename(Discharge_cfs = X_00060_00003)
}
for (i in 1:3) {
  dfs[[i]] <- f(dfs[[i]])
}
dfs[[1]]
#>         Date Discharge_cfs
#> 1 2015-01-01          4800
#> 2 2015-01-02          4570
#> 3 2015-01-03          4650
#> 4 2015-01-04          4790
#> 5 2015-01-05          4610
#> 6 2015-01-06          3860
dfs[[2]]
#>         Date Discharge_cfs
#> 1 2005-01-01          4330
#> 2 2005-01-02          4100
#> 3 2005-01-03          4480
#> 4 2005-01-04          5570
#> 5 2005-01-05          5760
#> 6 2005-01-06          5520
dfs[[3]]
#>         Date Discharge_cfs
#> 1 2010-01-01          4500
#> 2 2010-01-02          4270
#> 3 2010-01-03          3670
#> 4 2010-01-04          3900
#> 5 2010-01-05          4000
#> 6 2010-01-06          4180

Created on 2021-09-09 by the reprex package (v0.3.0)

Yes, you are correct, it does! Some weird R stuff on my end, thank you so much for your help!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.