Obtaining summary estimates across elements of a nested lists (or at different levels)

Hi,
I am having some difficulty obtaining summary estimates across elements of a nested list. See below a very simplified example below. Essentially I am want to access the elements within a list and manipulate them easily (e.g. get means across the lists, etc...). Sometimes the elements are in the same position and sometimes and sometimes they are in different positions. I am pretty sure there is much easier and straightforward way to this in the tidyverse (or base R) but I can't seem to be able to find it. I would be grateful if you could help me solve this issue. thank you


library(reprex)
library(tidyverse)

#1) Creating a certain function that does some simulations
to_estimate <- function(){
  mean_age_female <- rnorm(1, mean=27, sd=1)
  mean_age_male <- rnorm(1, mean=30, sd=1)
  
  return(list(sim=cbind(mean_age_female,mean_age_male)))
}

to_estimate()
#> $sim
#>      mean_age_female mean_age_male
#> [1,]        26.35325      31.04636


#2) replicating these simulations and obtain estimates (mean, sd, percentile)
rep=4
res <- vector(mode = "list", length = rep)
set.seed(123)
for (i in 1:rep){
  res[[i]] <- to_estimate()
}
res
#> [[1]]
#> [[1]]$sim
#>      mean_age_female mean_age_male
#> [1,]        26.43952      29.76982
#> 
#> 
#> [[2]]
#> [[2]]$sim
#>      mean_age_female mean_age_male
#> [1,]        28.55871      30.07051
#> 
#> 
#> [[3]]
#> [[3]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.12929      31.71506
#> 
#> 
#> [[4]]
#> [[4]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.46092      28.73494


#Summarizing the results

means_female <- mean(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"])

sd_female <-sd(c(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"]))


upper_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
         res[[2]]$sim[,"mean_age_female"],
         res[[3]]$sim[,"mean_age_female"],
         res[[4]]$sim[,"mean_age_female"]), 0.975)

lower_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
                   res[[2]]$sim[,"mean_age_female"],
                   res[[3]]$sim[,"mean_age_female"],
                   res[[4]]$sim[,"mean_age_female"]), 0.025)

res_female <- cbind(means_female, sd_female, lower_female, upper_female)
rownames(res_female) <- NULL

res_female
#>      means_female sd_female lower_female upper_female
#> [1,]     26.43952 0.8835687     26.49126     28.47637


means_male <- mean(res[[1]]$sim[,"mean_age_male"],
                     res[[2]]$sim[,"mean_age_male"],
                     res[[3]]$sim[,"mean_age_male"],
                     res[[4]]$sim[,"mean_age_male"])

sd_male <-sd(c(res[[1]]$sim[,"mean_age_male"],
                 res[[2]]$sim[,"mean_age_male"],
                 res[[3]]$sim[,"mean_age_male"],
                 res[[4]]$sim[,"mean_age_male"]))


upper_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.975)

lower_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.025)

res_male <- cbind(means_male, sd_male, lower_male, upper_male)
rownames(res_male) <- NULL



cbind(res_female, res_male) %>% 
  data.frame() %>% 
  pivot_longer(cols = everything(),
       names_to = c(".value", "gender"),
       names_sep = "_",
       values_to = c("gender", "means", "sd", "lower", "upper")) 
#> # A tibble: 2 x 5
#>   gender means    sd lower upper
#>   <chr>  <dbl> <dbl> <dbl> <dbl>
#> 1 female  26.4 0.884  26.5  28.5
#> 2 male    29.8 1.24   28.8  31.6

#is there a function in tidyverse that can do this automatically such as
# in purrr package with map or modify?

It's simply easier to do such work in data.frames.
data.frames after all are collections of lists but the organising principle of having the lists in relation to each other adds so much.
The below is perfectly analgous to the much longer code you provided in terms of the data processed and the calculations conducted, but its so much shorter/less typing.


library(tidyverse)

# 1) Creating a certain function that does some simulations
to_estimate <- function(...) {
  meanagefemale <- rnorm(1, mean = 27, sd = 1)
  meanagemale <- rnorm(1, mean = 30, sd = 1)

  return(data.frame(meanagefemale, meanagemale))
}


# 2) replicating these simulations and obtain estimates (mean, sd, percentile)
set.seed(123)
as_df <- map_dfr(
  1:4,
  ~ to_estimate(.)
)

(all_sum <- summarise_all(as_df, .funs = list(~ mean(.),
  ~ sd(.),
  "lower" = ~ quantile(., probs = .025),
  "upper" = ~ quantile(., probs = .975)
)))


all_sum %>% pivot_longer(
  cols = everything(), names_to = c(".value", "gender"),
  names_sep = "_",
  values_to = c("gender", "means", "sd", "lower", "upper")
)
# A tibble: 4 x 3
  gender meanagefemale meanagemale
  <chr>          <dbl>       <dbl>
1 mean          27.4         30.1 
2 sd             0.884        1.24
3 lower         26.5         28.8 
4 upper         28.5         31.6
1 Like

Thank you so much @nirgrahamuk. It worked perfectly. I appreciated. best

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.