How to tweak code to generate output table

JojoSouza · October 12, 2021, 12:46pm

As you can see I have two df1 databases. For the first one it worked I generate my output table. However if I try for second df1 database it doesn't work. This is probably because the date 01/07 of the ABC category has all its columns equal to 0, and this ends up giving a problem with the datas. However, I would like a way to generate the output table anyway, in this case the table would be generated, but it would not have the row corresponding to that day (01/07 ABC category). Is there any way to make a condition or something?

Executable code below:

library(dplyr)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,1),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,0),DR03= c(9,5,4,0),
       DR04 = c(5,4,3,3),DR05 = c(5,4,5,0),
       DR06 = c(2,4,3,3),DR07 = c(2,5,4,0),
       DR08 = c(3,4,5,0),DR09 = c(2,3,4,0)),
  class = "data.frame", row.names = c(NA, -4L))

#df1 <- structure(
 # list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
  #     date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
   #    Category = c("FDE","ABC","FDE","ABC"),
    #   Week= c("Wednesday","Wednesday","Friday","Friday"),
     #  DR1 = c(4,1,6,0),
      # DR01 = c(4,1,4,0), DR02= c(4,2,6,0),DR03= c(9,5,4,0),
      # DR04 = c(5,4,3,0),DR05 = c(5,4,5,0),
      # DR06 = c(2,4,3,0),DR07 = c(2,5,4,0),
      # DR08 = c(3,4,5,0),DR09 = c(2,3,4,0)),
  #class = "data.frame", row.names = c(NA, -4L))

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

cbind(df1[2:3], coef = mapply(return_coef, df1$date2, df1$Category))

> cbind(df1[2:3], coef = mapply(return_coef, df1$date2, df1$Category))

           date2 Category coef
    1 2021-06-30      FDE    4
    2 2021-06-30      ABC    1
    3 2021-07-01      FDE    6
    4 2021-07-01      ABC    1

Till_K · October 12, 2021, 3:00pm

What do you mean by "doesn't work"? Do you get an error, or is the output not what you'd expect? When I run your code, it works for me. I renamed your second df1 to df2 and additionally loaded tidyr and lubridate since you are using functions from these packages in your code. See below.

library(dplyr)
library(lubridate)
library(tidyr)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,1),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,0),DR03= c(9,5,4,0),
       DR04 = c(5,4,3,3),DR05 = c(5,4,5,0),
       DR06 = c(2,4,3,3),DR07 = c(2,5,4,0),
       DR08 = c(3,4,5,0),DR09 = c(2,3,4,0)),
  class = "data.frame", row.names = c(NA, -4L))

df2 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
    date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
   Category = c("FDE","ABC","FDE","ABC"),
  Week= c("Wednesday","Wednesday","Friday","Friday"),
 DR1 = c(4,1,6,0),
DR01 = c(4,1,4,0), DR02= c(4,2,6,0),DR03= c(9,5,4,0),
DR04 = c(5,4,3,0),DR05 = c(5,4,5,0),
DR06 = c(2,4,3,0),DR07 = c(2,5,4,0),
DR08 = c(3,4,5,0),DR09 = c(2,3,4,0)),
class = "data.frame", row.names = c(NA, -4L))

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median), .groups = "drop")
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

cbind(df1[2:3], coef = mapply(return_coef, df1$date2, df1$Category))
#>        date2 Category coef
#> 1 2021-06-30      FDE    4
#> 2 2021-06-30      ABC    1
#> 3 2021-07-01      FDE    6
#> 4 2021-07-01      ABC    1
cbind(df2[2:3], coef = mapply(return_coef, df2$date2, df2$Category))
#>        date2 Category coef
#> 1 2021-06-30      FDE    4
#> 2 2021-06-30      ABC    1
#> 3 2021-07-01      FDE    6
#> 4 2021-07-01      ABC    1

JojoSouza · October 12, 2021, 3:42pm

Thanks for reply @Till_K!

When I do this:

df1 <- structure(
 list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
     date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
    Category = c("FDE","ABC","FDE","ABC"),
   Week= c("Wednesday","Wednesday","Friday","Friday"),
  DR1 = c(4,1,6,0),
 DR01 = c(4,1,4,0), DR02= c(4,2,6,0),DR03= c(9,5,4,0),
 DR04 = c(5,4,3,0),DR05 = c(5,4,5,0),
 DR06 = c(2,4,3,0),DR07 = c(2,5,4,0),
 DR08 = c(3,4,5,0),DR09 = c(2,3,4,0)),
class = "data.frame", row.names = c(NA, -4L))

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

cbind(df1[2:3], coef = mapply(return_coef, df1$date2, df1$Category))

The following error appears:

  Error: `cols` must select at least one column.
Run `rlang::last_error()` to see where the error occurred.

nirgrahamuk · October 12, 2021, 4:06pm

split up the long pipe that makes datas out of SPV and do a check for the edge case where everything is zero and there is nothing to pivot, and recover from that by setting the result to NA

datas_1<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) 
  
  if(ncol(datas_1)==1)
    return(NA)
  
  datas <- datas_1%>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")

Till_K · October 12, 2021, 4:12pm

You can add

  if(length(dropnames) == 9) return(NA)

after the line that creates dropnames.

See below for the full code with that modification. I also changed the code of the function, so that you can feed it a data object, instead of hard-coding df1 into it.

library(dplyr)
library(lubridate)
library(tidyr)

df1 <- structure(
  list(
    date1 = c("2021-06-28", "2021-06-28", "2021-06-28", "2021-06-28"),
    date2 = c("2021-06-30", "2021-06-30", "2021-07-01", "2021-07-01"),
    Category = c("FDE", "ABC", "FDE", "ABC"),
    Week = c("Wednesday", "Wednesday", "Friday", "Friday"),
    DR1 = c(4, 1, 6, 1),
    DR01 = c(4, 1, 4, 3),
    DR02 = c(4, 2, 6, 0),
    DR03 = c(9, 5, 4, 0),
    DR04 = c(5, 4, 3, 3),
    DR05 = c(5, 4, 5, 0),
    DR06 = c(2, 4, 3, 3),
    DR07 = c(2, 5, 4, 0),
    DR08 = c(3, 4, 5, 0),
    DR09 = c(2, 3, 4, 0)
  ),
  class = "data.frame",
  row.names = c(NA, -4L)
)

df2 <- structure(
  list(
    date1 = c("2021-06-28", "2021-06-28", "2021-06-28", "2021-06-28"),
    date2 = c("2021-06-30", "2021-06-30", "2021-07-01", "2021-07-01"),
    Category = c("FDE", "ABC", "FDE", "ABC"),
    Week = c("Wednesday", "Wednesday", "Friday", "Friday"),
    DR1 = c(4, 1, 6, 0),
    DR01 = c(4, 1, 4, 0),
    DR02 = c(4, 2, 6, 0),
    DR03 = c(9, 5, 4, 0),
    DR04 = c(5, 4, 3, 0),
    DR05 = c(5, 4, 5, 0),
    DR06 = c(2, 4, 3, 0),
    DR07 = c(2, 5, 4, 0),
    DR08 = c(3, 4, 5, 0),
    DR09 = c(2, 3, 4, 0)
  ),
  class = "data.frame",
  row.names = c(NA, -4L)
)

return_coef <- function(dmda, CategoryChosse, data) {
  x <- data %>% select(starts_with("DR0"))
  
  x <- cbind(data, setNames(data$DR1 - x, paste0(names(x), "_PV")))
  PV <- select(x, date2, Week, Category, DR1, ends_with("PV"))
  
  med <- PV %>%
    group_by(Category, Week) %>%
    summarize(across(ends_with("PV"), median), .groups = "drop")
  
  SPV <- data %>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~ .x +
                    get(paste0(
                      cur_column(), '_PV'
                    )),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV <- data.frame(SPV)
  
  mat1 <- data %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1, "_", mat1, "_PV"))
  if(length(dropnames) == 9) return(NA)
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  
  datas <- SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(
      cols = -Category,
      names_pattern = "DR0(.+)",
      values_to = "val"
    ) %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1] <- c("Days", "Numbers")
  
  datas <- datas %>%
    group_by(Category) %>%
    slice((as.Date(dmda) - min(as.Date(data$date1) [data$Category == first(Category)])):max(Days) +
            1) %>%
    ungroup()
  
  mod <-
    nls(
      Numbers ~ b1 * Days ^ 2 + b2,
      start = list(b1 = 0, b2 = 0),
      data = datas,
      algorithm = "port"
    )
  as.numeric(coef(mod)[2])
  
}

cbind(df1[2:3], coef = mapply(\(x, y) return_coef(x, y, data = df1), df1$date2, df1$Category))
#>        date2 Category coef
#> 1 2021-06-30      FDE    4
#> 2 2021-06-30      ABC    1
#> 3 2021-07-01      FDE    6
#> 4 2021-07-01      ABC    1

cbind(df2[2:3], coef = mapply(\(x, y) return_coef(x, y, data = df2), df2$date2, df2$Category))
#>        date2 Category coef
#> 1 2021-06-30      FDE    4
#> 2 2021-06-30      ABC    1
#> 3 2021-07-01      FDE    6
#> 4 2021-07-01      ABC   NA

return_coef(
  dmda = df2$date2[4],
  CategoryChosse = df2$Category[4],
  data = df2
)
#> [1] NA

JojoSouza · October 12, 2021, 5:34pm

Thanks for reply @Till_K

In

if(length(dropnames) == 9) return(NA)

You added 9 because I have 9 DR? but wouldn't I be able to leave it automatically, so that the code identifies how many numbers it has? So, if I have a new database, I don't need to keep modifying

Thanks again!

JojoSouza · October 12, 2021, 5:37pm

Thanks for reply @nirgrahamuk ! If possible could insert all code? I don't understand if I need to delete my datas and leave just your datas_1.

Till_K · October 12, 2021, 7:08pm

If the amount of data variables is variable testing for a length of 9 won't work, yes. In that case, you should test for any other condition you know will be true whenever pivot_longer() would fail. @nirgrahamuk's suggestion would work for example.

system · November 2, 2021, 7:08pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.