Hello! I doing a simple recode of an age categories variable, AGE, in order to get fewer categories. The command works completely fine, until I try to do it on a subset of the data. Something goes wrong after I subset the data with slice_min()
to only include the first row of each YEAR. Does anyone understand why?!?
this part works on my original df, reprex
reprex <- reprex %>%
mutate(age_r = ifelse(test = (reprex$AGE == 2 | reprex$AGE == 3), yes = 2,
ifelse(test = (reprex$AGE == 4 | reprex$AGE == 5), yes = 3,
ifelse(test = (reprex$AGE == 6 | reprex$AGE == 7), yes = 4,
ifelse(test = (reprex$AGE == 8 | reprex$AGE == 9), yes = 5,
no = reprex$AGE)))))
no longer works after using slice()
to only include the first year for each ID.
reprex_2 <- reprex %>% group_by(ID) %>% slice_min(YEAR, n=1)
reprex_2 <- reprex_2 %>%
mutate(age_r = ifelse(test = (reprex_2$AGE == 2 | reprex_2$AGE == 3), yes = 2,
ifelse(test = (reprex_2$AGE == 4 | reprex_2$AGE == 5), yes = 3,
ifelse(test = (reprex_2$AGE == 6 | reprex_2$AGE == 7), yes = 4,
ifelse(test = (reprex_2$AGE == 8 | reprex_2$AGE == 9), yes = 5,
no = reprex_2$AGE)))))
(Note that: I also tried the equivalent command filter(row_number() == 1L)
), instead of slice_min.
code to reproduce reprex:
reprex <- structure(list(ID = c(2079682L, 8114104L, 2079682L, 8114104L,
8313555L, 8325419L, 2079682L, 8114104L, 8337003L, 8325419L, 8313555L,
2079682L, 8114104L, 8337003L, 8384153L, 8313555L, 30146527L,
8325419L, 2079682L, 8325419L, 8313555L, 8114104L, 30356349L,
8384153L, 30146527L, 2079682L, 8384153L, 30146527L, 8325419L,
8114104L, 30356349L, 2079682L, 8313555L, 8325419L, 8384153L,
8114104L, 30356349L, 30146527L, 2079682L, 8384153L, 8114104L,
8325419L, 30356349L, 2079682L, 8114104L, 8325419L, 8926672L,
8313555L, 8384153L, 30356349L, 2079682L, 8325419L, 8926672L,
30356349L, 8114104L, 8384153L, 2079682L, 8114104L, 8384153L,
8325419L, 8926672L, 8313555L, 30356349L, 2079682L, 8313555L,
8926672L, 8114104L, 8384153L, 30356349L, 8325419L, 2079682L,
8114104L, 8384153L, 30356349L, 8313555L, 8325419L, 2079682L,
4596268L, 8313555L, 8325419L, 8384153L, 30356349L, 2079682L,
4596268L, 8313555L, 8325419L, 8384153L, 8926672L, 30356349L,
4596268L, 2079682L, 8384153L, 8325419L, 8313555L, 8926672L, 30356349L,
4596268L, 8325419L, 8384153L, 30356349L), AGE = c(8L, 7L, 8L,
7L, 0L, 4L, 8L, 7L, 3L, 4L, 0L, 8L, 8L, 3L, 8L, 0L, 8L, 4L, 8L,
4L, 0L, 8L, 8L, 8L, 8L, 9L, 8L, 8L, 4L, 8L, 8L, 9L, 0L, 5L, 8L,
8L, 8L, 8L, 9L, 8L, 8L, 5L, 8L, 9L, 8L, 5L, 7L, 0L, 9L, 8L, 9L,
5L, 8L, 8L, 8L, 9L, 9L, 8L, 9L, 5L, 8L, 0L, 8L, 9L, 0L, 8L, 8L,
9L, 8L, 6L, 9L, 8L, 9L, 8L, 0L, 6L, 9L, 8L, 8L, 6L, 9L, 9L, 9L,
8L, 9L, 6L, 9L, 8L, 9L, 8L, 9L, 9L, 6L, 9L, 8L, 9L, 8L, 7L, 9L,
9L), YEAR = c(2004L, 2004L, 2005L, 2005L, 2005L, 2005L, 2006L,
2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L, 2011L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2020L,
2020L, 2020L, 2020L)), row.names = c(NA, -100L), class = "data.frame")