Subtract columns from two different datasets

I would like to know how I can do a subtraction between the dataset I got (All) with my df1 dataset. I inserted an image to illustrate the output I want. So you'll notice that I want to subtract the value of the coef of All from the columns of DR0.. of the df1.

library(dplyr)
library(tidyverse)
library(lubridate)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,3),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,2),DR03= c(9,5,4,7),
       DR04 = c(5,4,3,2),DR05 = c(5,4,5,4),
       DR06 = c(2,4,3,2)),
  class = "data.frame", row.names = c(NA, -4L))

> df1
       date1      date2 Category      Week DR1 DR01 DR02 DR03 DR04 DR05 DR06
1 2021-06-28 2021-06-30      FDE Wednesday   4    4    4    9    5    5    2
2 2021-06-28 2021-06-30      ABC Wednesday   1    1    2    5    4    4    4
3 2021-06-28 2021-07-01      FDE    Friday   6    4    6    4    3    5    3
4 2021-06-28 2021-07-01      ABC    Friday   3    3    2    7    2    4    2

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

All<-cbind(df1 %>% select(date2, Category), coef = mapply(return_coef, df1$date2, df1$Category))
> All
       date2 Category coef
1 2021-06-30      FDE    4
2 2021-06-30      ABC    1
3 2021-07-01      FDE    6
4 2021-07-01      ABC    3

Output I want

The subtraction is straightforward in {base}, as shown below. The output shows the differences represented textually, however. Is your question on constructing the strings?

suppressPackageStartupMessages({
  library(dplyr)
  library(lubridate)
  library(tidyr)
})
df1 <- data.frame(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,3),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,2),DR03= c(9,5,4,7),
       DR04 = c(5,4,3,2),DR05 = c(5,4,5,4),
       DR06 = c(2,4,3,2))

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

All<-cbind(df1 %>% select(date2, Category), coef = mapply(return_coef, df1$date2, df1$Category))
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.

diffs <- df1[,5:11] - All[,3]
df2   <- df1[,1:4]
df2[,5:11] <- diffs
df2
#>        date1      date2 Category      Week           DR1          DR01
#> 1 2021-06-28 2021-06-30      FDE Wednesday -6.360605e-08 -6.360605e-08
#> 2 2021-06-28 2021-06-30      ABC Wednesday -1.300449e-08 -1.300449e-08
#> 3 2021-06-28 2021-07-01      FDE    Friday -1.020952e-07 -2.000000e+00
#> 4 2021-06-28 2021-07-01      ABC    Friday -1.160027e-08 -1.160027e-08
#>            DR02 DR03       DR04       DR05 DR06
#> 1 -6.360605e-08    5  0.9999999  0.9999999   -2
#> 2  1.000000e+00    4  3.0000000  3.0000000    3
#> 3 -1.020952e-07   -2 -3.0000001 -1.0000001   -3
#> 4 -1.000000e+00    4 -1.0000000  1.0000000   -1

Thanks for the answer @technocrat , but the output table I inserted in the question is a little different from yours.

That was my question. The caption asked for subtraction, but output shows character values. Do we differ in the calculation or just in the presentation?

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.