Iteratively assign variable names for calculations done on list within a list in R?

0

I have been having an issue in R studio for a while now and I would like to preface that I am still fairly new to R and still don't know all the "fastest" methods of doings things but any help and patience would be awesome.

I am a researcher having to calculate skill scores for weather models at different weather stations and have thousands of data points to iterate over and I figured the easiest method to do this was read in the files and store each file as a separate list in a list.

After doing this I would, loop over each list in the lists, and calculate different error scores (normal error, RMSE, etc...) saving me from having to copy and paste about 121 different loops. This method of doing things is clearly A) not a good method, or B) I do not have a firm grasp on the best way of doing this.

So, below is my code I have created:

# Construct lists so that files can be read in
model_list <- list('NBS','GFS','HRRR','PFM','NAM','verif' )
param_list <- list('maxT', 'minT', 'precip', 'wind')
city_list <- list('KGRR', 'KGFL', 'PAJN', 'KMAF', 'KMLB')
path <- "Downloads/Total_Model_Data/"

# Make empty lists
for (i in model_list){
  for (j in param_list){
    listname <- paste(i,j,sep="")
    assign(listname,value = NULL)
  }
}

# Perform "parameter matching", e.g. read in all maxT's, all minT etc...
for (model in model_list){
  for (param in param_list){
    for (city in city_list){
      
# Separate maxT Data
      if (model == 'verif' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        verifmaxT <- c(verifmaxT,try(read_csv(localpath), silent = TRUE))
             
      }else if (model == 'GFS' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        GFSmaxT <- c(GFSmaxT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NBS' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NBSmaxT <- c(NBSmaxT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NAM' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NAMmaxT <- c(NAMmaxT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'PFM' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        PFMmaxT <- c(PFMmaxT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'HRRR' && param == 'maxT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        HRRRmaxT <- c(HRRRmaxT,try(read_csv(localpath), silent = TRUE))
        
# Separate minT data
      }else if (model == 'verif' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        verifminT <- c(verifminT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'GFS' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        GFSminT <- c(GFSminT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NBS' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NBSminT <- c(NBSminT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NAM' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NAMminT <- c(NAMminT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'PFM' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        PFMminT <- c(PFMminT,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'HRRR' && param == 'minT'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        HRRRminT <- c(HRRRminT,try(read_csv(localpath), silent = TRUE))
        
# Separate wind Data        
      }else if (model == 'verif' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        verifwind <- c(verifwind,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'GFS' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        GFSwind <- c(GFSwind,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NBS' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NBSwind <- c(NBSwind,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NAM' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NAMwind <- c(NAMwind,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'PFM' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        PFMwind <- c(PFMwind,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'HRRR' && param == 'wind'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        HRRRwind <- c(HRRRwind,try(read_csv(localpath), silent = TRUE))

# Separate Precip data
      }else if (model == 'verif' && param == 'precip'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        verifprecip <- c(verifprecip,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'NBS' && param == 'precip'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        NBSprecip <- c(NBSprecip,try(read_csv(localpath), silent = TRUE))
        
      }else if (model == 'HRRR' && param == 'precip'){
        var_name <- paste(city,"_",model,"_",param,".txt", sep = "") 
        localpath = paste(path,var_name,sep = "")
        HRRRprecip <- c(HRRRprecip,try(read_csv(localpath), silent = TRUE))
        
      }else{
        print(paste(city,model,param,'doesnt exist'))
        
      }
    }
  }
}

#Perform calculation(s)... 
for (val in GFSmaxT){
  for (ob in verifmaxT){
    GFSmaxT_KGRRerror <- val - ob # calculate normal error... <- Problem point!
  }
}

So here is my one problem to make this a simple Q/A...

  1. Is there a simple method to assigning unique variable names for these individual lists within a list calculations? I know I can use assign() but that throws another wrench in my code where i have to create a couple more for loops for the lists I initially created to iteratively assign the different substrings of the variable and have tried it and cant get it to work

I see two approaches that could make your life easier: a nested list or a data frame.

Nested list

A direct generalization of what you already do is to use a multi-level list (or nested list, or list-of-list). Indeed, a list can contain any element, including another list. So, you could actually rewrite:

model_list <- list('NBS','GFS','HRRR','PFM','NAM','verif' )
param_list <- list('maxT', 'minT', 'precip', 'wind')
city_list <- list('KGRR', 'KGFL', 'PAJN', 'KMAF', 'KMLB')
path <- "Downloads/Total_Model_Data/"

all_listnames <- list()
for (i in model_list){
  all_listnames[[i]] <- list()
  for (j in param_list){
    all_listnames[[i]][[j]] <- something()
  }
}

Or:

all_res <- vector("list", length(model_list))
for (model in model_list){
  all_res[[model]] <- vector("list", length(param_list))
  for (param in param_list){
    all_res[[model]][[param]] <- vector("list", length(city_list))
    for (city in city_list){
      all_res[[model]][[param]][[city]] <- read_csv(...)
    }
  }
}

That already avoids typing all the if/else manually. Then your calculations would be along these lines:

normal_errors <- vector("list", length(model_list)-1)

for (model in model_list[-length(model_list)]){ # removing last one
  normal_errors [[model]] <- vector("list", length(param_list))
  for (param in param_list){
    normal_errors [[model]][[param]] <- vector("list", length(city_list))
    for (city in city_list){
      val <- all_res[[model]][[param]][[city]]
      ob  <- all_res$verif[[param]][[city]]
      val$column - ob$column
    }
  }
}

So that method is better in that it already automates all the looping, and it keeps all the parameters of interest in a single structure. But it's still unwieldy and error-prone (I'm sure I made mistakes above). And sooo many [[[[[[]]!

Data frame

The power of R really comes with its data frames. If you can fit your data in such a structure, it will simplify every computation. There is just one choice to make: what goes in the rows, and what goes in the columns. Here, you can use this approach:

model_list <- list('NBS','GFS','HRRR','PFM','NAM','verif' )
param_list <- list('maxT', 'minT', 'precip', 'wind')
city_list <- list('KGRR', 'KGFL', 'PAJN', 'KMAF', 'KMLB')
path <- "Downloads/Total_Model_Data/"

dat <- expand.grid(model = model_list,
                   param = param_list,
                   city = city_list)

Magic! You automatically have a data frame with 120 rows representing all combinations. Then, all you want to do should be expressed as an operation on a column that creates a new column. We can use the dplyr package to simplify this, and the pipe %>% to chain operations.

library(tidyverse) #contains dplyr

dat <-dat %>%
  mutate(var_names = paste(city,"_",model,"_",param,".txt", sep = ""),
         localpath = paste(path,var_name,sep = ""),
         raw_data = try(read_csv(localpath), silent = TRUE),
         my_column = raw_data$column)

Then, if I understand the calculation, you want to subtract verif from each other model. That is not very easy, we will have to use more advanced functions of the tidyverse: the ability to nest() data in a column.

dat %>%
  group_by(city, param) %>%
  nest() %>%
  mutate(subtraction = map(data, function(sub_df) sub_df$my_column- sub_df$my_column[x$model == "verif"])) %>%
  unnest(everything())

This is harder to understand, but once you get it it's very powerful, and, I think, will solve all your problems.

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.