It was a sketchy outline I have you. Let me supplement that with a bit of code from some medical records I'm helping with and see if that helps you follow the logic better. The input file patients had multiple entries of diagnostic codes. As you did, I spread those into new columns, which is how I ended up in your situation. Here's what I did to get them back to a single record where each of the columns has a T/F logical value:
Example of a record with multiple entries to be reduced
> patient150580164 # the twin duplicates
# A tibble: 4 x 24
INC_KEY AGE GENDER ADULT `808.0` `808.1` `808.2` `808.3` `808.4` `808.41` `808.42` `808.43` `808.49`
<int> <int> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 150580164 29 Female TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
2 150580164 29 Female TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
3 150580164 29 Female TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
4 150580164 29 Female TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# ... with 11 more variables: `808.5` <lgl>, `808.51` <lgl>, `808.52` <lgl>, `808.53` <lgl>,
# `808.59` <lgl>, `808.8` <lgl>, `808.9` <lgl>, Expired <lgl>, Intervention <lgl>, SURGERY <lgl>,
# YEAR <dbl>
>
With that tibble as iinput
setasides150580164 <- patient150580164 %>% # save for later
select(INC_KEY, AGE, GENDER, ADULT, YEAR) %>%
distinct()
current_INC_KEY <- setasides150580164$INC_KEY # save INC_KEY separately
patient150580164 <- patient150580164 %>% # strip the saved data
select(-INC_KEY, -AGE,-GENDER,-ADULT, -YEAR)
mat150580164 <- as.matrix(patient150580164) # convert to matrix
list_of_col <- colnames(mat150580164) # save list of column names
# sum the columns, convert from numeric to T/F, convert to matrix, transposed
patient150580164 <- t(as.matrix(as.logical(colSums(mat150580164 > 0))))
colnames(patient150580164) <- list_of_col # use the save column names
patient150580164 <- as.tibble(patient150580164) %>% # add back in saved INC_KEY
mutate(INC_KEY = current_INC_KEY) %>%
select(INC_KEY, everything())
# Add back in all the data set aside and make a new object with the transformed data
patient150580164 <- inner_join(patient150580164, setasides150580164, by = "INC_KEY") %>%
select(INC_KEY, AGE, GENDER, ADULT, Expired, Intervention, list_of_col)
patient150580164 # show the result
# A tibble: 1 x 23
INC_KEY AGE GENDER ADULT Expired Intervention `808.0` `808.1` `808.2` `808.3` `808.4` `808.41`
<int> <int> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 150580164 29 Female TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE FALSE
# ... with 11 more variables: `808.42` <lgl>, `808.43` <lgl>, `808.49` <lgl>, `808.5` <lgl>,
# `808.51` <lgl>, `808.52` <lgl>, `808.53` <lgl>, `808.59` <lgl>, `808.8` <lgl>, `808.9` <lgl>,
# SURGERY <lgl>