Hi @yalem,
After a bit of editing, I think this code should get you closer to your desired output:
subject_id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 10L, 10L, 10L, 10L)
wave = c("wave1", "wave2", "wave3",
"wave4", "wave1", "wave2", "wave3", "wave1", "wave2", "wave1",
"wave2", "wave3", "wave4", "wave1", "wave2", "wave3", "wave4",
"wave1", "wave2", "wave1", "wave2", "wave3", "wave4", "wave1",
"wave2", "wave3", "wave1", "wave1", "wave2", "wave3", "wave4")
AgeinMonths = c(10L, 38L, 56L, 76L, 38L, 46L, 66L, 23L, 41L,35L, 56L, 76L, 98L, 36L,
54L, 74L, 92L, 38L, 58L, 11L, 36L, 49L, 69L, 17L, 37L, 55L, 36L,
36L, 54L, 72L, 90L)
BITSEA = c("7","Not applicable", "Not applicable", "Not applicable", "Not applicable",
"Not applicable", "Not applicable", "27", "Not applicable", "28",
"Not applicable", "Not applicable", "Not applicable", "Not applicable",
"Not applicable", "Not applicable", "Not applicable", "Not applicable",
"Not applicable", "23", "Not applicable", "Not applicable", "Not applicable",
"26", "Not applicable", "Not applicable", "Not applicable", "Not applicable",
"Not applicable", "Not applicable", "Not applicable")
CBCL = c("Not applicable", "66", "73", "87", "57", "63", "77", "Not applicable", "38",
"Not applicable", "70", "77", "97", "66", "45", "67", "68", "54", "63",
"Not applicable", "45", "51", "65", "Not applicable", "45", "57", "56",
"45", "55", "65", "71")
df <- data.frame(subject_id, wave, AgeinMonths, BITSEA, CBCL, stringsAsFactors=FALSE)
df
library(tidyverse)
df %>%
mutate(wave = as.factor(str_sub(wave, start=5, end=5)),
BITSEA = as.integer(ifelse(BITSEA=="Not applicable", NA, BITSEA)),
CBCL = as.integer(ifelse(CBCL=="Not applicable", NA, CBCL))) -> df2
str(df2)
# Find number of waves recorded per subject
df2 %>%
group_by(subject_id) %>%
summarise(total_wave = n()) -> temp1.df
# Find what type of wave was recorded first
df2 %>%
group_by(subject_id) %>%
filter(wave == 1) %>%
summarise(first_wave = ifelse(is.na(BITSEA), "CBCL", "BITSEA")) -> temp2.df
# Combine the new information
left_join(df2, temp1.df, by="subject_id") %>%
left_join(., temp2.df, by="subject_id") -> full.df
# Filter on certain criteria
full.df %>%
filter(total_wave == 4)
full.df %>%
filter(total_wave == 4) %>%
filter(first_wave=="BITSEA")
full.df %>%
filter(total_wave == 4) %>%
filter(first_wave=="CBCL")
HTH