Group_by not working

I am trying to run a rolling regression with the new "roll" package. I need to run it on grouped data (in other words run the rolling regression per stock). Yet, grouping by stock-identifier (LPERMNO) does not work for some reason, although I've used it several times before.
This issue remains even if I run the dplyr package right before running the code, so other conflicting packages should not be an issue.

roll <-data %>%
group_by(LPERMNO) %>%
do(as.data.frame(coefficients( roll_lm(data$mktrf, data$exc_ret, 252, intercept = TRUE,
min_obs = 200, complete_obs = TRUE, na_restore = FALSE))))

here is some sample data created with dput

structure(list(date = structure(c(17021, 17022, 17023, 17024,
17025, 17028, 17029, 17030, 17031, 17032, 17035, 17036, 17037,
17038, 17039, 17042, 17043, 17044, 17045, 17046, 17050, 17051,
17052, 17053, 17056, 17057, 17058, 17059, 17060, 17063, 17064,
17065, 17066, 17067, 17070, 17071, 17072, 17073, 17074, 17077,
17078, 17079, 17080, 17081, 17084, 17085, 17086, 17087, 17088,
17091, 17092, 17093, 17094, 17095, 17098, 17099, 17100, 17101,
17102, 17105, 17106, 17107, 17108, 17109, 17112, 17113, 17114,
17115, 17116, 17119, 17120, 17121, 17122, 17123, 17126, 17127,
17128, 17130, 17133, 17134, 17135, 17136, 17137, 17140, 17141,
17142, 17143, 17144, 17147, 17148, 17149, 17150, 17151, 17154,
17155, 17156, 17157, 17158, 17162, 17163, 17164, 17165, 17169,
17170, 17171, 17172, 17175, 17176, 17177, 17178, 17179, 17183,
17184, 17185, 17186, 16804, 16805, 16806, 16807, 16808, 16811,
16812, 16813, 16814, 16815, 16819, 16820, 16821, 16822, 16825,
16826, 16827, 16828, 16829, 16832, 16833, 16834, 16835, 16836,
16839, 16840, 16841, 16842, 16843, 16847, 16848, 16849, 16850,
16853, 16854, 16855, 16856, 16857, 16860, 16861, 16862, 16863,
16864, 16867, 16868, 16869, 16870, 16871, 16874, 16875, 16876,
16877, 16878, 16881, 16882, 16883, 16884, 16888, 16889, 16890,
16891, 16892, 16895, 16896, 16897, 16898, 16899, 16902, 16903,
16904, 16905, 16906, 16909, 16910, 16911, 16912, 16913, 16916,
16917, 16918, 16919, 16920, 16923, 16924, 16925, 16926), class = "Date"),
LPERMNO = c(10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L, 10025L,
10025L, 10025L, 10025L, 10025L, 10025L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L, 10026L,
10026L, 10026L, 10026L), mktrf = c(-6e-04, 5e-04, -0.0029,
0.0054, -6e-04, 0.0039, -0.0057, 0.0012, 0.0032, -0.0011,
-2e-04, 0.0027, -0.0056, -8e-04, -0.0015, 0.0053, -0.0014,
-0.0024, 3e-04, 0.0053, 0.0026, 9e-04, -0.0018, -0.0247,
0.0144, -0.0148, -8e-04, 0.0107, -0.0036, 5e-04, -2e-04,
0.0112, 0.007, -0.006, -0.0088, 0.0064, 0.0056, -0.0098,
0.0088, -0.0026, -0.0046, 0.0058, -6e-04, -0.0038, 0.0052,
-0.013, 6e-04, -0.0042, 1e-04, -0.0029, 0.006, 0.0025, -0.0016,
2e-04, 0.0054, -0.0046, -0.0023, -0.0033, -0.0029, 2e-04,
-0.0068, -0.0073, -0.004, -0.0012, 0.0223, 0.004, 0.0146,
0.0032, 0.0018, 0.0021, 0.008, -0.0012, 0.0056, -0.0016,
0.0077, 0.0031, 0.0016, 0.004, -0.0064, 0.0011, -0.0025,
-0.0036, 0, 0.0075, 0.0048, 0.0126, 0.0036, 0.0046, -0.003,
0.006, -0.0082, 0.0046, -0.0022, 0.0022, 0.0044, -0.0024,
-0.003, 0.0019, 0.0027, -0.0087, -4e-04, -0.0052, 0.0083,
0.0079, -0.0021, 0.0029, -0.0037, 0.0016, 0.0031, -0.003,
0.0029, -0.0049, 0.0024, -0.0038, 0.0033, -0.0159, 0.0012,
-0.0135, -0.0244, -0.0111, -6e-04, 0.0071, -0.0267, 0.0165,
-0.0214, -0.002, -0.0094, 0.0045, 0.0208, -0.0171, 0.0152,
-0.0111, 0.0049, 0.0257, -4e-04, -0.02, 0.0046, 0.0027, -0.0206,
-0.0151, -0.001, 1e-04, -0.0117, 0.0198, 0.0178, 0.0175,
-0.0051, 6e-04, 0.0143, -0.0122, 0.0053, 0.011, -1e-04, -0.0069,
0.0234, 0.0054, 0.0051, 0.0037, 0.0021, -0.0127, 0.005, -0.0011,
0.0169, -0.0012, -0.0036, 0.0063, 0.0072, 0.0054, 0.001,
-5e-04, -0.0086, 0, 4e-04, 0.0107, 0.0041, -0.0011, 0.0064,
-0.0041, -0.0094, 0.0114, -0.0123, 0.0029, -0.0028, 0.0099,
0.0123, 3e-04, -4e-04, 0.0065, 0.0029, 0.0015, -0.0047, 0.0012,
-0.0024, 0.0029, 0.0021, -0.0096, -0.005, 0.0078, -0.0105,
-0.0066, -8e-04), exc_ret = c(0.00620354298757293, -0.0137466099558914,
0.000117779197546474, -1e-05, -0.0182800907116391, 0.0318744351900052,
-0.0379719119687225, 0.00208753539590984, 0.0177817320774463,
-0.00926449871465289, -0.0141511520498184, 0.0085437570732992,
0.00429584551148217, 0.442889831103027, -0.00658302359085188,
0.00270911538112923, 0.00143626231582756, -0.00172495622348594,
0.00179831826401451, 0.00531490974729249, -0.0021645919741448,
0.00421852001799368, -0.0139859899659559, -0.0169098727966564,
0.00775340110905727, 0.00099880410858406, -0.00605672469079259,
0.0111431016683566, -0.00265357338195087, 0.00465136550589535,
0.00235535662299864, 0.0133318043202034, -0.00323436184505148,
0.00124797466079614, -0.002881758054384, 0.004310043200432,
-0.00296725423425042, -0.013851452453712, -0.0031999380240612,
-0.00695888909207281, -0.00599471595617356, 0.00730752500926275,
0.00228885057471257, -0.00514761467889906, 0.00607631501291024,
-0.00367636113657193, 0.010385584176633, -0.00246834471455886,
0.00774830595107707, 0.00171085861787874, 0.00677119349005435,
0.00537841490794795, -0.000456627958910151, -1e-05, 0.00624558534405712,
-0.0106671936056837, -0.0197586535008977, -0.000467875457875456,
-0.000468085203848047, 0.00411465627864345, -0.000466412596987699,
-0.00594607305936084, 0.00320543408360125, -0.0032151282051283,
0.0100956499770327, 0.0131778126421101, 0.0152503231597845,
-0.0238826790450927, 0.0117653623188406, 0.00222813786929283,
0.00668941938365337, 0.0026519343389529, 0.000432477876106073,
-0.00177912870411328, 0.00663599025254753, 0.000430140845070491,
-0.00308963044434678, 0.00263783759929399, -0.00441140845070425,
0.041988231653404, -1e-05, -0.0144351166737377, 0.0034338226431337,
-0.00215500214500213, 0.00901837489251929, -1e-05, 0.0102158201959948,
0.00125528890763389, 0.00715090985678175, -0.00837470096194064,
0.00125528890763389, 0.00209614995787695, 0.00755620428751575,
-0.00251312891113891, -0.00544705562526131, -0.00211260723296885,
-0.00464548251158885, 0.0025302201524134, -0.0033883783783784,
-0.00594220338983058, -0.00214128729752776, -0.00812618966253735,
0.00687061154177439, 0.00896203592814362, -0.00595471810089019,
0.00637658848614078, -2e-05, 0.00294610169491518, 0.00589465990705542,
-0.00631987400251997, 0.00674246830092975, -0.000859630562552429,
0.00670268907563023, -0.0818230050083473, -0.000474545454545374,
NA, -0.00583605977540014, 0.00213466156719733, -0.0164196325552499,
-0.00207543764663409, 0.0121168279229587, 0.00866613061735011,
-0.0273693534100975, 0.0101994353883983, -0.0174885062652124,
-0.00266079456830892, -0.0163753449862005, -0.0143097643097643,
0.0197362178574818, -0.00260537824509166, -0.00923593618807717,
-0.0112994350282486, 0.00838095238095238, 0.0198337740838685,
-0.00788182811631792, -0.0180254951927564, 0.00103562737642588,
0.0017941971322762, 3.73933649287606e-05, -0.00489128524714471,
0.000466235831984084, 0.00951018278750955, -0.000953040362127556,
0.017169535586181, 0.00862028953229383, -0.00792241144539512,
0.00982028841695255, 0.00944908715217203, 0.000444876273653593,
-0.00264708284077468, 0.0070104230488693, 0.0122127252150293,
-0.0108328980322003, 0.00179848177954607, 0.00278808646989799,
-0.00460045904590449, -0.00561629351659281, -0.0111039347094662,
-0.00046977011494247, -0.0174893008279669, -0.00113359550561791,
-0.00666541807274095, 0.00036746532037386, -0.0117070097160646,
-0.00583227736947594, 0.0191912288786483, 0.0106343104747551,
-0.0169733703047815, 0.0012225779842608, -0.00673348484848484,
0.00246878730098194, 0.00312837375178316, 0.00596269624573382,
0.00837752238243323, 0.0117657009345796, 0.000174740439682196,
0.00637236793498341, -0.0168853812976049, -0.0196023522822738,
-0.00418927925354662, -0.0113777607801893, 0.0117988394584139,
-0.0159625310648059, -0.00485672656629432, 0.0081015129331381,
-0.0122966692486445, 0, 0.00872463483972163, 0.00242954324586986,
-0.000969461948618577, -0.0243571081999029, 0.00646508852198124,
-0.00207530388378296, 0.00148544266191308, 0.0053396618214181,
-0.00373758237434829, -0.0015796228650411, 0.0150201592010285,
-0.00293255236239658, 0.0232435417684416, -0.00125128711925919
)), class = "data.frame", row.names = c(NA, -201L))

Hi @Pia, could you please clarify what is the expected output here? Given that your data df has only 201 rows, are you certain you want to set the min_obs = 200 in your roll_lm call?

Hi @valeri. The 200 minimum observations mean that when there is not at least 252 past observations available, start the regression when 200 observations are available. I just narrowed down my sample data frame for the upload, which is why it’s not big enough for this minimum observation expression right now. Just for the purpose of trying with the smaller sample that I uploaded, we could change it to e.g. 50 and change the rolling window to e.g. 100.

Maybe one of these will get you going (2nd one more wordy, but possibly more instructive). I guess someone more experienced with purrr can even simplify it and find an easy way to name the output list as well.

In fact you can name the list with setNames (see the roll object below).

roll <- data %>%
	group_by(LPERMNO) %>% 
	group_map(~roll_lm(.x$mktrf, .x$exc_ret, 100, intercept = TRUE,
					min_obs = 50, complete_obs = TRUE, na_restore = FALSE)$coefficients) %>% 
	setNames(unique(data$LPERMNO))

nested <- data %>% 
	group_by(LPERMNO)

roll2 <- 	purrr::map(.x = unique(nested$LPERMNO), function(x) {
	roll_lm(filter(nested, LPERMNO == x)$mktrf, filter(nested, LPERMNO == x)$exc_ret, 100, intercept = TRUE,
					min_obs = 50, complete_obs = TRUE, na_restore = FALSE)$coefficients})
1 Like

Great! I really appreciate your help! I used the first alternative as I am not familiar with purrr.

I only need the mktrf coefficient, so I specified that in the code using $coefficients[,2] instead. :slight_smile: I used melt() to have the data in the original format and used cbind to also have a date column again. As long as the dates don't get mixed up in the regression, this should work. Do you have a "prettier" idea for also including the date?

roll <- data %>%
group_by(LPERMNO) %>%
group_map(~roll_lm(.x$mktrf, .x$exc_ret, 100, intercept = TRUE,
min_obs = 50, complete_obs = TRUE, na_restore = FALSE)$coefficients[,2]) %>%
setNames(unique(data$LPERMNO))

output<-as.data.table(cbind(melt(roll),test$date))


    output
         value    L1  test$date
  1:        NA 10025 2016-01-05
  2:        NA 10025 2016-01-06
  3:        NA 10025 2016-01-07
  4:        NA 10025 2016-01-08
  5:        NA 10025 2016-01-11
 ---                           
794: 0.8091265 10026 2018-02-08
795: 0.8121928 10026 2018-02-09

Hi @Pia,

here is another more 'tidyvers'-y way of doing it, not sure it is clearer though. In the last left_join, the LPERMNO column needs to be converted to character since it is numeric in the original data df. The setNames function on the other hand implicitly converts the the unique values of LPERMNO to strings, since list names cannot be numeric.

roll <- data %>%
	group_by(LPERMNO) %>%
	group_map(~roll_lm(.x$mktrf, .x$exc_ret, 100, intercept = TRUE,
										 min_obs = 50, complete_obs = TRUE, na_restore = FALSE)$coefficients[,2]) %>% 
	setNames(unique(data$LPERMNO)) %>% 
	map_dfr(.f = ~tibble(coefficient = .x), .id = 'LPERMNO') %>% 
	left_join(data %>%
							mutate(LPERMNO = as.character(LPERMNO)) %>% 
							select(date, LPERMNO))

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.