Trouble using group_by and map2 together

I am trying to use the group_by function from dplyr alongside map2 from the purrr package and can't seem to figure out how to make it work.

Here is my sample data:

library(tidyverse)
library(rstatix)

df <- tibble::tribble(
  ~id, ~edge, ~trt,         ~nl,          ~lm,         ~md,           ~c,        ~mgg,       ~mgcm,          ~p,         ~sp,         ~ap,         ~la,       ~lacm,      ~lacmd,
   1L,   "S",  "C", 1.802500944, -1.126394361, 1.747757193, -0.302911966, 2.942376992,  1.01978392, 1.603508872, 1166.214587, 1.104182097, 3.630403855, 0.925433649, 2.083967271,
   2L,   "S",  "T",          NA,           NA,          NA,           NA,          NA,          NA,          NA,          NA,          NA,          NA,          NA,          NA,
   3L,   "D",  "C",  1.59505822, -1.554475881, 1.173922711, -0.340665184, 1.854642163, 0.787036727,  1.40878277,  663.125567, 0.898799413, 3.332281129, 0.803131628, 1.841247752,
   4L,   "D",  "T", 1.342572531,  -2.21548947, 0.961702527, -0.331617331, 1.645569808, 0.750246559,  0.67441638, 63.63830862, 0.542966439, 2.416127169, 0.574963833, 1.529239129,
   5L,   "S",  "C", 1.802500944, -0.165110402, 2.162768585, -0.285160482, 3.136984959, 1.093832043, 2.209960854,   495.84715, 1.164570199, 4.799142774, 0.994423991, 2.034565573,
   6L,   "S",  "T", 2.141481291,  0.250369615, 2.439863935, -0.218925863, 3.358891011, 1.163094493,  2.30563155, 910.5631088, 1.198833277, 5.219802305, 1.097357732, 2.149000371,
   7L,   "S",  "C", 2.419988191,  0.137237105, 2.207129987, -0.272668696, 2.994211062, 1.115777647, 1.816124079, 646.4053085, 1.230485336, 4.712448496, 1.040577686,  2.24411723,
   8L,   "S",  "T", 2.544210664,  0.863058759, 2.421522773, -0.253972926, 2.850969229,  1.16769711, 2.491440334,    727.8533, 1.527643265, 5.056660602, 1.050339825, 2.255789124,
   9L,   "D",  "C", 2.660725071,   0.19152912, 2.386815891,  -0.24551055, 3.303287045, 1.239226175, 1.885908231, 1086.513581, 1.205152636, 3.988855225, 0.936746188, 2.079213139,
  10L,   "D",  "T", 1.000000043,   -1.6879396, 1.975281747, -0.294636707,  4.03267281, 0.995491642, 2.388872583, 1144.241477, 1.416744746, 5.304303716, 1.132988861, 2.310946879,
  11L,   "S",  "C", 2.141481291,  -0.74849056, 1.274560633, -0.322967401, 1.705223669, 0.851859637, 1.592147228, 950.0054511, 1.466445721, 3.109025081, 0.839443953, 2.205810179,
  12L,   "S",  "T", 2.286476118, -0.290218486, 3.250075892, -0.082343235, 5.791439422, 1.572320436, 1.881082365, 986.5403759, 1.643689034, 3.757771205, 0.929345145, 2.334078973,
  13L,   "D",  "C",  1.59505822, -0.191039326, 2.504620142,  -0.22271534, 3.893795023, 1.133908309, 2.875731301, 1206.865724, 1.899459741, 6.064683327, 1.251683566, 2.603789187,
  14L,   "D",  "T",          NA,           NA, 1.572801946, -0.327462881,          NA,          NA,          NA,          NA,          NA,          NA,          NA,          NA,
  15L,   "S",  "C", 1.342572531, -1.697176281, 2.408643972, -0.468159588, 5.366091529, 1.520750286, 1.138225013, 111.4449683, 0.738816027, 2.033130123, 0.656435751, 1.614656879,
  16L,   "S",  "T", 1.802500944, -0.732303688, 2.420710317, -0.344370078, 4.246182243, 1.247083106, 1.806157277,  832.702169, 1.013302882, 4.040063799, 0.971004704, 2.006970051,
  17L,   "D",  "C", 1.342572531, -1.854059857, 1.803770473, -0.286815219, 3.692115749, 1.028534333, 0.824815252, 95.99426329, 0.500215288, 3.807243365, 0.784481642, 1.670080345,
  18L,   "D",  "T",  1.59505822, -1.804236383, 1.997985528, -0.288746162, 4.219977817, 1.241483915, 1.231843099, 545.3200946, 0.658181237, 2.574010051, 0.752951513, 1.632449327,
  19L,   "S",  "C", 1.342572531, -0.930642984,  1.03242075, -0.503640583, 1.320804746, 0.656743456,  2.76719333, 1789.699746, 1.326705241, 4.513433003, 1.105230957, 2.138136014,
  20L,   "S",  "T", 1.802500944, -1.081754877, 1.466031332, -0.631448078, 2.263589014, 0.898784301,  1.72608042, 936.4750912,  0.96056337,  3.63967137, 0.877613994, 1.881034282,
  21L,   "D",  "C", 1.000000043, -1.728784107, 1.697957298, -0.479575779, 3.282216435, 0.982733355, 2.427019294, 1327.285158, 1.221209356, 3.838064089, 0.914446077, 1.929598324,
  22L,   "D",  "T", 1.000000043, -1.837593377, 1.750266035, -0.039879239, 3.522133032, 1.034483709,  1.73758575, 404.0302053, 0.971611663, 3.470045517, 0.708356917, 1.674374006,
  23L,   "S",  "C", 2.141481291,  0.191363967, 1.805933831, -0.317752965, 2.217889147, 1.005032455,  2.14788367, 669.9874584, 1.219174303, 4.130536487, 0.938941835, 2.020832148,
  24L,   "S",  "T", 1.802500944, -0.120135688, 2.100619249, -0.259378795, 2.975361781, 1.097003809, 2.315129457, 698.7286322,  1.28335495, 4.427268796, 1.001421538, 2.094467904,
  25L,   "D",  "C", 1.802500944, -0.520875791, 2.539247754, -0.396236469, 4.312245708, 1.295020117, 1.979774157, 967.5562875, 1.067153537, 3.991306135, 0.878947043, 1.890897171,
  26L,   "D",  "T", 1.802500944, -0.370483994, 2.584270669, -0.312059668, 4.258720006, 1.296184349, 2.355128628, 1207.368392, 1.201754292, 4.152649306, 0.979084355,  2.00770456,
  27L,   "S",  "C", 1.981811095, -1.861752198, 1.920984275, -0.541176118, 4.047354641, 1.220211223, 1.167983617, 1617.992364, 0.941976564, 2.481297153, 0.787796557, 1.948593082,
  28L,   "S",  "T", 1.981811095,  -2.98776212, 1.897759527, -0.495455681, 5.270839188, 1.246415667, 0.489932575, 997.7861814,  0.51986096, 2.241169632, 0.653211663, 1.755986019,
  29L,   "D",  "C",  1.59505822, -2.428147182, 1.757692508, -0.576181278, 4.107251379, 1.128487377, 1.162635546,  1469.96656, 0.963771422,  2.60839009, 0.815017088, 2.005683173,
  30L,   "D",  "T", 1.000000043, -2.205457475, 1.864854558, -0.555249134, 4.227564743, 0.969926235, 1.489406214, 440.6697575, 1.141758545, 5.039286549, 1.124115287, 2.397962673,
  31L,   "S",  "C", 1.981811095, -0.876829423, 1.671004518, -0.553861125, 2.592626965, 1.013137476, 1.611116267, 1112.571559, 1.141133018, 3.328469719, 0.865311069, 2.030677821,
  32L,   "S",  "T",  1.59505822, -1.497448963, 1.468900707, -0.597667427, 2.518512621, 0.930701863, 1.450795751, 869.3145195, 1.062161593, 3.247858627, 0.889035313, 2.061422444,
  33L,   "D",  "C", 1.802500944, -1.521426577, 2.128307219, -0.438535061, 4.303356207, 1.461910157, 1.419284617, 1367.673961, 1.046322175, 1.721569081, 0.715512853, 1.827162709,
  34L,   "D",  "T", 2.875093886,  0.254797323, 1.823846553, -0.587334268, 2.213992598, 1.033290886, 1.682901812,  748.573141,  1.17568147, 3.850099022, 0.920017562, 2.101029065,
  35L,   "S",  "C", 1.981811095,  -0.88940516, 2.415534679, -0.414364457, 4.402788148, 1.283594144, 1.391677585, 602.7375088, 1.036605687, 3.643517268, 0.885312293, 2.059875887,
  36L,   "S",  "T", 1.802500944, -1.596027732, 2.292753199, -0.408480571, 4.876213553, 1.325179463, 1.165981267, 680.9242961, 0.918404542, 2.880179112, 0.736713011, 1.859981968,
  37L,   "D",  "C", 2.419988191,  0.206135816, 2.733507433, -0.148265596, 3.994893274, 1.273992769, 1.821244418, 462.5763752, 1.203128298, 5.047062376, 1.018372432, 2.196664948,
  38L,   "D",  "T", 1.981811095, -0.597473249, 2.137170588, -0.518691744, 3.436122945, 1.129616086, 1.775977004, 1070.890919, 1.182586476, 4.179006868,  0.97310094, 2.141865994
  )
head(df)
#> # A tibble: 6 x 15
#>      id edge  trt      nl     lm     md      c   mgg   mgcm      p     sp     ap
#>   <int> <chr> <chr> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1     1 S     C      1.80 -1.13   1.75  -0.303  2.94  1.02   1.60  1166.   1.10 
#> 2     2 S     T     NA    NA     NA     NA     NA    NA     NA       NA   NA    
#> 3     3 D     C      1.60 -1.55   1.17  -0.341  1.85  0.787  1.41   663.   0.899
#> 4     4 D     T      1.34 -2.22   0.962 -0.332  1.65  0.750  0.674   63.6  0.543
#> 5     5 S     C      1.80 -0.165  2.16  -0.285  3.14  1.09   2.21   496.   1.16 
#> 6     6 S     T      2.14  0.250  2.44  -0.219  3.36  1.16   2.31   911.   1.20 
#> # ... with 3 more variables: la <dbl>, lacm <dbl>, lacmd <dbl>

I have managed to successfully create a list of 12 models as follows:

models_1 <- map(df[,4:15], ~ (lm(.x ~ edge * trt, data = df)))

Now I would like to run the following code, but keep getting an error that the vector lengths do not agree:

sim <- df %>%
  group_by(edge) %>%
  map2(df[, 4:15], models_1, ~ anova_test(.x, ~ df$trt, error = .y, type = 3))

The number of columns I call (4:15) is equal to the length of models_1, so I'm not sure how to fix this error:

Error: Mapped vectors must have consistent lengths:
* `.x` has length 15
* `.y` has length 12

I don't think I can simply subset my dataframe to only include cols 4:15, as I need cols 2:3 in order to group_by and compare against (i.e. I use df$trt when using the map2 function. I'm sure it's a relatively easy fix, but I haven't been able to find a working solution. Any thoughts would be greatly appreciated!

Here is a working example, starting with creating the initial model, for a single variable:

model <- lm(nl ~ edge*trt, data = df)

df %>%                            #These three lines are what I need help running in a loop/map2 function
  group_by(edge) %>%
  anova_test(nl ~ trt, error = model, type = 3)

Created on 2020-05-19 by the reprex package (v0.3.0)

Haven't you just missed a comma?

sim <- df %>%
  group_by(edge) %>%
  map2(df[, 4:15], models_1, ~ anova_test(.x, ~ df$trt, error = .y, type = 3))

Hey @martin.R - Thanks! I still get the same error about vector lengths disagreeing though. I've fixed the comma error in my original post :+1: - I seem to have accidentally dropped it while making my reprex

could you wrtie a standalone handcrafted anova_test function call that would work for the first of your list of 12 models ?

models_1[[1]] ?

@nirgrahamuk - I'm not sure I follow you. The second model is different and only uses models_1 for the error rate. I may just be missing your point though so I'm all ears if you could elaborate a bit more!

what would you type here

 anova_test(mvulas parameters)

so as to perform your anova_test on the "nl" model, i.e. your first one.

Before you try and do things a dozen times with mapping. try to do them succesfully once directly without mapping. This is a technique relating to how to map succesfullly.

Here's a working example for a single variable (starting from the top, so creating model_1 and then proceeding):

model <- lm(nl ~ edge*trt, data = df)

df %>%                            #These three lines are what I need help running in a loop/map2 function
  group_by(edge) %>%
  anova_test(nl ~ trt, error = model, type = 3)

thanks.
I first lifted out the hardcoded model variable targer name

df %>%
  group_by(edge) %>%
  anova_test(as.formula(paste0("nl","~ trt")), error = model, type = 3)

this worked fine. Then making the params explicit and to my taste clearer to follow, as map wants a sequence to iterate over and a function to apply at each iteration i like to make the function call part obvious


  anova_test(data=group_by(df,edge),
             formula = as.formula(paste0("nl","~ trt")),
             error = model,
             type = 3)
  

then we can map, we use the names of the models_1 list


  map(names(models_1) ,
      ~ anova_test(data=group_by(df,edge),
                   formula = as.formula(paste0(.x,"~ trt")),
                   error = models_1[[.x]],
                   type = 3))
1 Like

Amazing!!! Thanks @nirgrahamuk!! You have no idea how much of a pain this has been for me to figure out. Thanks for working it through with me :+1:

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.