I am conducting food related study and would like to remove all of a participants data if they identified that they have any food related allergies during the questionnaire part of my study. I am trying to accomplish this using group_by, filter and str_detect.
Unfortunately, the code I have at the moment results in a new table, with only the answers with "gluten". The group_by function also does not function as expected, as it doesn't remove all the participants answers, only the rows that contain "gluten".
Here is the code I have now. I would like all of a participants answers tobe removed if they answered "gluten" anywhere in the question:)
my_data_raw_quest %>%
group_by(user_id) %>%
filter(
str_detect(dv, "(G|g)luten"))
Here is the table created from that code.
structure(list(session_id = c(53877, 53891, 54090, 54469, 54929,
55038, 55061, 55096, 55104, 55108, 55145, 57068, 57074, 57146,
57276, 57435, 57952, 58817), project_id = c(495, 495, 495, 495,
495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495,
495), quest_name = c("Sociodemographic", "Sociodemographic",
"Sociodemographic", "Sociodemographic", "Sociodemographic", "Sociodemographic",
"Sociodemographic", "Sociodemographic", "Sociodemographic", "Sociodemographic",
"Sociodemographic", "Sociodemographic", "Sociodemographic", "Sociodemographic",
"Sociodemographic", "Sociodemographic", "Sociodemographic", "Sociodemographic"
), quest_id = c(2189, 2189, 2189, 2189, 2189, 2189, 2189, 2189,
2189, 2189, 2189, 2189, 2189, 2189, 2189, 2189, 2189, 2189),
user_id = c(47667, 47681, 47877, 48251, 48705, 48816, 48839,
48873, 48881, 48881, 48921, 50663, 50723, 50794, 50924, 51077,
51561, 52161), user_sex = c("male", "female", "female", "female",
"female", "na", "female", "female", "female", "female", "female",
"female", "female", "female", "female", "female", "male",
"female"), user_status = c("test", "test", "guest", "guest",
"registered", "guest", "guest", "guest", "test", "test",
"guest", "registered", "guest", "guest", "guest", "guest",
"guest", "test"), user_age = c(59, 40, 35, 38, 53.7, 28,
21, 65, 24, 24, 25, 20.8, 38, 44, 32, 34, 44, 20), q_name = c("food allergies",
"food allergies", "food allergies", "food allergies", "food allergies",
"food allergies", "food allergies", "food allergies", "food allergies",
"food allergies", "food allergies", "food allergies", "food allergies",
"food allergies", "food allergies", "food allergies", "Other",
"food allergies"), q_id = c(92827397, 92827397, 92827397,
92827397, 92827397, 92827397, 92827397, 92827397, 92827397,
92827397, 92827397, 92827397, 92827397, 92827397, 92827397,
92827397, 92831398, 92827397), order = c(4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4), dv = c("Gluten", "Gluten, cumin, paprika, anchovies",
"Gluten intolerance", "Dairy, gluten some veg, fruit and nuts",
"Gluten", "Gluten", "Gluten intolerant", "Gluten", "No allergies, but intolerant to gluten",
"No allergies, but gluten intolerant", "Lactose & gluten",
"gluten and dairy intolerance", "Sensitive to gluten and soy",
"Gluten", "Gluten", "Gluten", "Locked down with family, sister is gluten free",
"I am conscious of what gluten i eat as it sets my eczema off"
), starttime = structure(c(1607970136, 1607970692, 1607975785,
1607984805, 1608023741, 1608037872, 1608041491, 1608047134,
1608048524, 1608048811, 1608055657, 1609950997, 1609951334,
1609953692, 1609961095, 1609976350, 1610182572, 1610465355
), tzone = "UTC", class = c("POSIXct", "POSIXt")), endtime = structure(c(1607970180,
1607970791, 1607975825, 1607984927, 1608023787, 1608037944,
1608041525, 1608047239, 1608048613, 1608048856, 1608055709,
1609951071, 1609951428, 1609953730, 1609961133, 1609976399,
1610182657, 1610465458), tzone = "UTC", class = c("POSIXct",
"POSIXt")), undergraduate = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE), NoUni = c(FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), Masters = c(FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
Postgraduate = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), degree = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c(NA, -18L), groups = structure(list(user_id = c(47667,
47681, 47877, 48251, 48705, 48816, 48839, 48873, 48881, 48921,
50663, 50723, 50794, 50924, 51077, 51561, 52161), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9:10, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))