concatenate a variable from 3 rows

I have a dataset df and want to change table from 'narrow to fat'. Key made by compound variables (cert_id:obj_cd) and combine rows of month variable identified by key to one variable, say paid_months, with value 10,11,12 or whatever it was. How to do it? Thanks in advance!

df <- data.frame(
  stringsAsFactors = FALSE,
           cert_id = c(48L,48L,48L,48L,48L,48L,
                       48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,
                       48L,48L),
            sch_id = c("10001","10001","10001",
                       "10001","10001","10001","10001","10001","10001","10001",
                       "10001","10001","10005","10005","10005","10005",
                       "10005","10005","10005","10005"),
        obj_id = c("40610","40610","40610",
                       "40612","40612","40612","40613","40613","40613","40614",
                       "40614","40614","3","3","4","4","40610","40610",
                       "40610","40612"),
            obj_cd = c("S-OP-1b","S-OP-1b","S-OP-1b",
                       "S-OP-1d","S-OP-1d","S-OP-1d","S-OP-1e","S-OP-1e",
                       "S-OP-1e","S-OP-1f","S-OP-1f","S-OP-1f","S-3","S-3",
                       "S-4","S-4","S-OP-1b","S-OP-1b","S-OP-1b","S-OP-1d"),
             month = c(10,11,12,10,11,12,10,11,
                       12,10,11,12,10,11,11,12,10,11,12,10)
)

Not sure I understood your final 'fat' table structure.
But your wish looks relatively straightforward.

try this.

library(data.table)

df <- data.frame(
  stringsAsFactors = FALSE,
  cert_id = c(48L,48L,48L,48L,48L,48L,
              48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,
              48L,48L),
  sch_id = c("10001","10001","10001",
             "10001","10001","10001","10001","10001","10001","10001",
             "10001","10001","10005","10005","10005","10005",
             "10005","10005","10005","10005"),
  obj_id = c("40610","40610","40610",
             "40612","40612","40612","40613","40613","40613","40614",
             "40614","40614","3","3","4","4","40610","40610",
             "40610","40612"),
  obj_cd = c("S-OP-1b","S-OP-1b","S-OP-1b",
             "S-OP-1d","S-OP-1d","S-OP-1d","S-OP-1e","S-OP-1e",
             "S-OP-1e","S-OP-1f","S-OP-1f","S-OP-1f","S-3","S-3",
             "S-4","S-4","S-OP-1b","S-OP-1b","S-OP-1b","S-OP-1d"),
  month = c(10,11,12,10,11,12,10,11,
            12,10,11,12,10,11,11,12,10,11,12,10)
)

setDT(df)
dcast.data.table(df,cert_id + obj_cd ~ month)
#> Using 'month' as value column. Use 'value.var' to override
#> Aggregate function missing, defaulting to 'length'
#>    cert_id  obj_cd 10 11 12
#> 1:      48     S-3  1  1  0
#> 2:      48     S-4  0  1  1
#> 3:      48 S-OP-1b  2  2  2
#> 4:      48 S-OP-1d  2  1  1
#> 5:      48 S-OP-1e  1  1  1
#> 6:      48 S-OP-1f  1  1  1

Created on 2022-05-12 by the reprex package (v2.0.1)

You can can finetune the dcast arguments to get what you want.

library(tidyverse)
df %>% 
group_by_at(1:4) %>%
 summarise(month_texts = 
          paste0(month, collapse=","))
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.