Thank you @_shortessay !
This is great. It worked perfectly on the example dataset and yes by mode I meant the max.
I had a follow-up question and wanted to get your suggestion. In my actual data, the data is not perfectly structured as in the example. For example ( I tried to recreate in the data frame below "exerciselongV1"), some subjectid enter the dataset only on month"3" for subjectid "f" i.e end of first quarter or month5 :subjectid "g", i.e during the 2nd quarter, or some subjectid leave the dataset early e.g. subjectid "a" on month6.
I created a variable "entry_mnt" to show which month they enter the dataset.
Under this condition, to do similar conversion that you showed, do you suggest?
a. Creating rows 1-9 for each subjectid even if they do not have data (entered late or left early) and just keep the values NA? "e.g. "exerciselongV1"
b. Or is there a better way to do this without having to create the NA's/do the step a? (E.g "exerciselongV2")
The format I have is the second one - "exerciselongV2" so that is preferable.
I have a large sample ~ 8000 sampleid and the quarter goes upto quarter 100 so wanted to check if there is an easier way of doing this?
Thank you again for your time. This is super helpful !
exerciseV1 <- data.frame(
subjectid = c("a","b","c","d","e","f","g","h","i","j","k"),
location = c("NY","NC","WA","WA","OR","CA","AR","KS","AZ","VT","MA"),
entry_mnt1 = c(0,1,1,1,0,0,0,1,1,1,1),
entry_mnt2 = c(2,2,2,2,2,0,0,2,2,2,2),
entry_mnt3 = c(3,3,3,3,3,3,0,3,3,3,3),
entry_mnt4 = c(4,4,4,4,4,4,0,4,4,4,4),
entry_mnt5 = c(5,5,5,5,5,5,5,5,5,5,5),
entry_mnt6 = c(0,6,6,6,6,6,6,6,6,6,6),
entry_mnt7 = c(0,7,7,7,7,7,7,7,7,7,7),
entry_mnt8 = c(0,8,8,8,8,8,8,8,8,8,8),
entry_mnt9 = c(0,9,9,9,9,9,9,9,9,9,9),
age1 = c(NA,18,28,14,NA,NA,NA,19,12,23,34),
age2 = c(28,19,29,15,12,NA,NA,20,13,24,35),
age3 = c(29,20,30,16,13,34,NA,21,14,25,36),
age4 = c(30,21,31,17,14,35,NA,22,15,26,37),
age5 = c(31,22,32,18,15,36,24,23,16,27,38),
age6 = c(NA,23,33,19,16,37,25,24,17,28,39),
age7 = c(NA,24,34,20,17,38,26,25,18,29,40),
age8 = c(NA,25,35,21,18,39,27,26,19,30,41),
age9 = c(NA,26,36,22,19,40,28,27,20,31,42),
gym1 = c(NA,0,0,1,NA,NA,NA,1,0,0,0),
gym2 = c(1,0,0,1,0,NA,NA,1,0,0,0),
gym3 = c(1,1,0,1,0,0,NA,1,1,0,0),
gym4 = c(1,1,0,1,0,0,NA,1,1,1,1),
gym5 = c(0,1,0,1,0,0,0,1,1,1,1),
gym6 = c(NA,1,0,1,0,0,1,1,0,1,0),
gym7 = c(NA,1,0,0,1,0,1,0,0,1,1),
gym8 = c(NA,1,0,0,1,0,1,0,0,1,1),
gym9 = c(NA,1,0,0,1,0,1,0,0,1,1),
ff1 = c(NA,18,16,10,NA,NA,NA,9,0,8,3),
ff2 = c(16,14,17,7,3,NA,NA,4,8,5,12),
ff3 = c(3,20,6,0,9,16,NA,17,19,9,13),
ff4 = c(5,3,15,0,3,16,NA,8,10,7,17),
ff5 = c(8,7,20,0,2,3,17,0,1,16,15),
ff6 = c(NA,8,14,0,1,20,5,0,6,10,9),
ff7 = c(NA,4,7,11,11,10,15,6,20,13,16),
ff8 = c(NA,9,12,3,41,19,15,12,6,20,11),
ff9 = c(NA,1,22,17,18,13,2,5,3,19,7),
run1 = c(NA,16,16,0,NA,NA,NA,89,9,12,15),
run2 = c(3,14,17,7,30,NA,NA,48,45,50,11),
run3 = c(12,22,6,0,17,16,NA,17,19,69,67),
run4 = c(34,32,15,19,9,16,NA,8,10,7,16),
run5 = c(45,17,20,10,21,38,017,0,1,16,34),
run6 = c(NA,2,14,7,17,2,57,0,6,10,15),
run7 = c(NA,14,7,19,11,101,15,6,20,13,17),
run8 = c(NA,9,13,3,4,23,11,12,6,20,12),
run9 = c(NA,12,11,17,19,15,2,5,3,19,72)
)
library(tidyr)
exerciselongV1 <- pivot_longer(data = exerciseV1,cols = entry_mnt1:run9,
names_pattern = "([^\\d]+)(\\d+)",
names_to = c(".value","month_number"))
exerciselongV2 <- subset(exerciselongV1, entry_mnt ! = "0")