Combining Multiple Lists From Nested Output in dplyr


#1

How do you pull nested data from the same source to make a tibble?

women %>% lm(height~1+.,.) %>% .$fitted.values # THIS WORKS
women %>% lm(height~1+.,.) %>% .$model # THIS WORKS
women %>% lm(height~1+.,.) %>% as_tibble(.$model, .$fitted.values) # THIS DOESN'T WORK

#2

Pipes generally put the Left-Hand Side into the Right-Hand Side as the first argument, and that’s Not Ideal in this case. If you use the data pronoun . to manually specify position, the pipe sometimes doesn’t implicitly include it as the first argument—but exactly when that happens is a little tricky.

if you enclose that last part of the pipe in braces, you can ensure that the pronoun isn’t placed first:

women %>% lm(height~1+.,.) %>% {as_tibble(.$model, .$fitted.values)}

This way the pipe evaluates to as_tibble(.$model, .$fitted.value) and not as_tibble(., .$model, .$fitted.value). (I only just learnt this trick reading your post! Wish I’d known it earlier :sweat_smile:)


#3

An alternative to @rensa 's answer is to use the %$% operator from magrittr. It exposes only the variables from the LHS object in the RHS object (Note that model is a data.frame and fitted.values is a vector so you need to turn fitted.values into a data.frame to bind them).

library(dplyr)
library(magrittr)

women %>% 
  lm(height ~ 1 + ., data = .) %$% 
  bind_cols(model, tibble(fitted.values))
#>    height weight fitted.values
#> 1      58    115      58.75712
#> 2      59    117      59.33162
#> 3      60    120      60.19336
#> 4      61    123      61.05511
#> 5      62    126      61.91686
#> 6      63    129      62.77861
#> 7      64    132      63.64035
#> 8      65    135      64.50210
#> 9      66    139      65.65110
#> 10     67    142      66.51285
#> 11     68    146      67.66184
#> 12     69    150      68.81084
#> 13     70    154      69.95984
#> 14     71    159      71.39608
#> 15     72    164      72.83233

But of course the easiest way to do this specific example might be to use broom::augment().

women %>% 
  lm(height ~ 1 + ., data = .) %>%
  broom::augment()
#>    height weight  .fitted   .se.fit      .resid       .hat    .sigma
#> 1      58    115 58.75712 0.2002641 -0.75711680 0.20712077 0.3866824
#> 2      59    117 59.33162 0.1879640 -0.33161526 0.18245976 0.4456021
#> 3      60    120 60.19336 0.1703858 -0.19336294 0.14992863 0.4539882
#> 4      61    123 61.05511 0.1541707 -0.05511062 0.12274998 0.4576921
#> 5      62    126 61.91686 0.1397938  0.08314170 0.10092380 0.4573072
#> 6      63    129 62.77861 0.1278767  0.22139402 0.08445008 0.4531106
#> 7      64    132 63.64035 0.1191595  0.35964634 0.07332884 0.4451279
#> 8      65    135 64.50210 0.1143764  0.49789866 0.06756007 0.4331457
#> 9      66    139 65.65110 0.1149121  0.34890175 0.06819443 0.4459639
#> 10     67    142 66.51285 0.1204412  0.48715407 0.07491476 0.4340420
#> 11     68    146 67.66184 0.1336164  0.33815716 0.09220125 0.4464007
#> 12     69    150 68.81084 0.1517996  0.18916026 0.11900325 0.4542973
#> 13     70    154 69.95984 0.1734227  0.04016335 0.15532075 0.4578334
#> 14     71    159 71.39608 0.2036096 -0.39608278 0.21409880 0.4394719
#> 15     72    164 72.83233 0.2360451 -0.83232892 0.28774483 0.3587713
#>         .cooksd  .std.resid
#> 1  0.4876664537 -1.93227145
#> 2  0.0775185018 -0.83346757
#> 3  0.0200312397 -0.47660030
#> 4  0.0012509293 -0.13371569
#> 5  0.0022285683  0.19926427
#> 6  0.0127513262  0.52581582
#> 7  0.0285208433  0.84902718
#> 8  0.0497413772  1.17176170
#> 9  0.0246883563  0.82138974
#> 10 0.0536443631  1.15102361
#> 11 0.0330356154  0.80655238
#> 12 0.0141663366  0.45798528
#> 13 0.0009067618  0.09930995
#> 14 0.1404228626 -1.01533829
#> 15 1.0146442939 -2.24122754

Created on 2018-01-31 by the reprex package (v0.1.1.9000).


#4

As an alternative to @davis answer, you can also use some of the tidyverse tools to get all this workflow done keeping the structure of a table.

library(tidyverse)
tab <- women %>%
  as_tibble() %>%
  nest() %>%
  mutate(model = map(data, ~ lm(height ~ 1 + ., data = .x)),
         augmented = map(model, broom::augment))
tab
#> # A tibble: 1 x 3
#>   data              model    augmented            
#>   <list>            <list>   <list>               
#> 1 <tibble [15 x 2]> <S3: lm> <data.frame [15 x 9]>

You can see that everything is stored in the tibble, inside list column. Notice
the model column that contains the lm objects in a list.
you can then use unnest to get the result table you want.

tab %>%
  unnest(augmented)
#> # A tibble: 15 x 9
#>    height weight .fitted .se.fit  .resid   .hat .sigma  .cooksd .std.resid
#>     <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>    <dbl>      <dbl>
#>  1   58.0    115    58.8   0.200 -0.757  0.207   0.387 0.488       -1.93  
#>  2   59.0    117    59.3   0.188 -0.332  0.182   0.446 0.0775      -0.833 
#>  3   60.0    120    60.2   0.170 -0.193  0.150   0.454 0.0200      -0.477 
#>  4   61.0    123    61.1   0.154 -0.0551 0.123   0.458 0.00125     -0.134 
#>  5   62.0    126    61.9   0.140  0.0831 0.101   0.457 0.00223      0.199 
#>  6   63.0    129    62.8   0.128  0.221  0.0845  0.453 0.0128       0.526 
#>  7   64.0    132    63.6   0.119  0.360  0.0733  0.445 0.0285       0.849 
#>  8   65.0    135    64.5   0.114  0.498  0.0676  0.433 0.0497       1.17  
#>  9   66.0    139    65.7   0.115  0.349  0.0682  0.446 0.0247       0.821 
#> 10   67.0    142    66.5   0.120  0.487  0.0749  0.434 0.0536       1.15  
#> 11   68.0    146    67.7   0.134  0.338  0.0922  0.446 0.0330       0.807 
#> 12   69.0    150    68.8   0.152  0.189  0.119   0.454 0.0142       0.458 
#> 13   70.0    154    70.0   0.173  0.0402 0.155   0.458 0.000907     0.0993
#> 14   71.0    159    71.4   0.204 -0.396  0.214   0.439 0.140       -1.02  
#> 15   72.0    164    72.8   0.236 -0.832  0.288   0.359 1.01        -2.24

or without using broom

women %>%
  as_tibble() %>%
  nest() %>%
  mutate(lm = map(data, ~ lm(height ~ 1 + ., data = .x)),
         fitted = map(lm, "fitted.values")) %>%
  unnest(data, fitted)
#> # A tibble: 15 x 3
#>    fitted height weight
#>     <dbl>  <dbl>  <dbl>
#>  1   58.8   58.0    115
#>  2   59.3   59.0    117
#>  3   60.2   60.0    120
#>  4   61.1   61.0    123
#>  5   61.9   62.0    126
#>  6   62.8   63.0    129
#>  7   63.6   64.0    132
#>  8   64.5   65.0    135
#>  9   65.7   66.0    139
#> 10   66.5   67.0    142
#> 11   67.7   68.0    146
#> 12   68.8   69.0    150
#> 13   70.0   70.0    154
#> 14   71.4   71.0    159
#> 15   72.8   72.0    164

This kind of workflow allow to apply model by group for example

# dummy dataset
women %>%
  as_tibble() %>%
  mutate(type = "C1") %>%
  bind_rows(women %>% mutate(type = "C2")) %>%
  # nesting by type, get list column of one element per type
  nest(-type) %>%
  # applying function using map to iterate through list column
  mutate(model = map(data, ~ lm(height ~ 1 + ., data = .x)),
         augmented = map(model, broom::augment))
#> # A tibble: 2 x 4
#>   type  data              model    augmented            
#>   <chr> <list>            <list>   <list>               
#> 1 C1    <tibble [15 x 2]> <S3: lm> <data.frame [15 x 9]>
#> 2 C2    <tibble [15 x 2]> <S3: lm> <data.frame [15 x 9]>

Created on 2018-01-31 by the reprex package (v0.1.1.9000).