Creating columns that add up multiple rows for each subject

I am trying to create a set of columns from two variables with multiple rows per subject. I have a data frame similar to the example below:

Subject Drug Dose
1 Roc 10
1 Sux 100
1 Roc 50
2 Sux 120
2 Cis 10
2 Cis 2
2 Cis 2
3 Roc 100
3 Roc 150

etc.

I would like to convert the above data frame into a new data frame that adds up all the drug doses by drug name in a separate column.

Subject Roc Sux Cis
1 60 100 0
2 0 120 14
3 150 0 0

etc

I would use the pivot_wider function from tidyr.

library(tibble)
library(tidyr)
DF <- tribble(
  ~Subject, ~Drug, ~Dose,
  1, 'Roc', 10,
  1, 'Sux', 100,
  1, 'Roc', 50,
  2, 'Sux', 120,
  2, 'Cis', 10,
  2, 'Cis', 2,
  2, 'Cis', 2,
  3, 'Roc', 100,
  3, 'Roc', 150
)

DF_wide <- DF |> 
  pivot_wider(names_from = "Drug", values_from = "Dose", values_fn = sum)
DF_wide
#> # A tibble: 3 × 4
#>   Subject   Roc   Sux   Cis
#>     <dbl> <dbl> <dbl> <dbl>
#> 1       1    60   100    NA
#> 2       2    NA   120    14
#> 3       3   250    NA    NA

Created on 2022-09-20 with reprex v2.0.2

Thank you for your reply. I should point out that I am a novice with respect to R coding. Your reply was helpful but I must be interpreting your code incorrectly. My df is CTICUpostNMBD which I used in your code.

CTICUpostNMBD_new <- tibble(CTICUpostNMBD)
CTICUpostNMBD_wide <- CTICUpostNMBD |> 
  pivot_wider(names_from = "Drug", values_from = "Dose", values_fn = sum)

The wide df creates 3 new columns but does not add the doses and does not combine multiple rows into a single row for each subject.

Please post the output of

dput(head(CTICUpostNMBD, 10))
> dput(head(CTICUpostNMBD, 10))
structure(list(MRN = c("1", "1", "1", "1", 
"1", "2", "3", "3", "3", "4"
), DOS = structure(c(1596758400, 1596758400, 1596758400, 1596758400, 
1596758400, 1598572800, 1600732800, 1600732800, 1600732800, 1601337600
), tzone = "UTC", class = c("POSIXct", "POSIXt")), Drug = c("Succinylcholine", 
"Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium", 
"Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium"), Time = structure(c(1596786967.15, 
1596787350.797, 1596790680, 1596794566.093, 1596811978.32, 1598601047.537, 
1600764744.487, 1600780184.167, 1600788483.047, 1601365020), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), Dose = c(100, 20, 20, 20, 20, 100, 100, 50, 60, 50
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))

Your original post did not mention the DOS and Time columns. Try

CTICUpostNMBD_wide <- CTICUpostNMBD |> 
  select(MRN, Drug, Dose) |>
  pivot_wider(names_from = "Drug", values_from = "Dose", values_fn = sum)

@EconProf has a good solution. Alternatively, you can identify MRN as the id_col in the pivot_wider function.

library(tidyr)
DF <- structure(list(
  MRN = c("1", "1", "1", "1", 
                       "1", "2", "3", "3", "3", "4"), 
  DOS = structure(c(1596758400, 1596758400, 1596758400, 1596758400, 
                     1596758400, 1598572800, 1600732800, 1600732800, 1600732800, 1601337600), tzone = "UTC", class = c("POSIXct", "POSIXt")), 
  Drug = c("Succinylcholine", "Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium", 
           "Rocuronium", "Rocuronium", "Rocuronium", "Rocuronium"), 
  Time = structure(c(1596786967.15, 1596787350.797, 1596790680, 1596794566.093, 1596811978.32, 1598601047.537,
                     1600764744.487, 1600780184.167, 1600788483.047, 1601365020), tzone = "UTC", class = c("POSIXct", "POSIXt")), 
  Dose = c(100, 20, 20, 20, 20, 100, 100, 50, 60, 50)), 
  row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

DF_wide <- DF |> 
  pivot_wider(id_cols = "MRN",names_from = "Drug", values_from = "Dose", values_fn = sum)
DF_wide
#> # A tibble: 4 × 3
#>   MRN   Succinylcholine Rocuronium
#>   <chr>           <dbl>      <dbl>
#> 1 1                 100         80
#> 2 2                  NA        100
#> 3 3                  NA        210
#> 4 4                  NA         50

Created on 2022-09-20 with reprex v2.0.2

1 Like

Thank you very much for both solutions and prompt responses.