Another solution approach to find if trailing zeros exist for each row id

Is there another solution approach to solve this problem here Delete column depending on the date and code you choose? I wouldn't like to use a long pipeline. I will enter the same code below.

In general, it is to find if trailing zeros exist for each row id.

@nirgrahamuk, you helped me answer this mentioned question. However, I would like something more optimized, without using pivot_longer. Do you have any idea ?

library(dplyr)
library(tidyverse)
library(lubridate)
    
df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
               "2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-02","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
       Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
       Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
       DR1 = c(4,1,4,3,3,4,3,5),
       DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
       DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
       DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
       DR08 = c(0,0,0,1,2,0,0,0),DR09 = c(0,0,0,0,0,0,0,0),DR010 = c(0,0,0,0,0,0,0,0),DR011 = c(4,0,0,0,0,0,0,0), 
       DR012 = c(0,0,0,3,0,0,0,5),DR013 = c(0,0,1,0,0,0,2,0),DR014 = c(0,0,0,0,0,2,0,0)),
  class = "data.frame", row.names = c(NA, -8L))

> df1
       date1      date2 Code      Week DR1 DR01 DR02 DR03 DR04 DR05 DR06 DR07 DR08 DR09 DR010 DR011 DR012 DR013 DR014
1 2021-06-28 2021-06-30  FDE Wednesday   4    4    4    9    5    5    2    2    0    0     0     4     0     0     0
2 2021-06-28 2021-06-30  ABC Wednesday   1    1    2    5    4    4    4    5    0    0     0     0     0     0     0
3 2021-06-28 2021-07-02  ABC    Friday   4    4    6    4    3    5    3    4    0    0     0     0     0     1     0
4 2021-06-28 2021-07-07  ABC Wednesday   3    3    7    3    3    3    3    4    1    0     0     0     3     0     0
5 2021-06-28 2021-07-07  CDE Wednesday   3    3    3    3    6    6    5    9    2    0     0     0     0     0     0
6 2021-06-28 2021-07-09  FGE    Friday   4    4    2    2    2    2    6    4    0    0     0     0     0     0     2
7 2021-06-28 2021-07-09  ABC    Friday   3    3    7    1    1    1    7    7    0    0     0     0     0     2     0
8 2021-06-28 2021-07-09  CDE    Friday   5    6    4    5    9    9    8    8    0    0     0     0     5     0     0

dmda<-"2021-07-07"
CodeChosse<-"CDE"

x<-df1 %>% select(starts_with("DR0"))

x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
PV<-select(x, date2,Week, Code, DR1, ends_with("PV"))

med<-PV %>%
  group_by(Code,Week) %>%
  summarize(across(ends_with("PV"), median))

SPV<-df1%>%
  inner_join(med, by = c('Code', 'Week')) %>%
  mutate(across(matches("^DR0\\d+$"), ~.x + 
                  get(paste0(cur_column(), '_PV')),
                .names = '{col}_{col}_PV')) %>%
  select(date1:Code, DR01_DR01_PV:last_col())

mat1 <- df1 %>%
  filter(date2 == dmda, Code == CodeChosse) %>%
  select(starts_with("DR0")) %>%
  pivot_longer(cols = everything()) %>%
  arrange(desc(row_number())) %>%
  mutate(cs = cumsum(value)) %>%
  filter(cs == 0) %>%
  pull(name)

(dropnames <- paste0(mat1,"_",mat1, "_PV"))

 SPV %>%
   filter(date2 == dmda, Code == CodeChosse) %>%
   select(-dropnames)


    date1      date2 Code DR01_DR01_PV DR02_DR02_PV DR03_DR03_PV DR04_DR04_PV DR05_DR05_PV DR06_DR06_PV DR07_DR07_PV
1 2021-06-28 2021-07-07  CDE            3            3            3            3            3            3            3
  DR08_DR08_PV
1            3
  row_of_interest <- df1 %>%
    filter(date2 == dmda,
           Code == CodeChosse) %>%
           select(starts_with("DR0"))
    
    nc <- ncol(row_of_interest)
    
   for(i in rev(seq_len(nc))){
      if(row_of_interest[1,i] !=0)
        break
    }
  
  mat1 <-  names(row_of_interest[1,seq(from=i+1,to=nc)])

This new approach for calculating mat1 might be twice as fast.

1 Like

Thanks for the answer @nirgrahamuk . For that day and category it worked, however if I do it for dmda<-"2021-07-09" and CodeChosse<-"FGE" it gives an error in mat1. If you test that first code you made, you'll want mat1 for this case is character(0). This happens because I have the last column value equal to 2 in this date and code, so in this case no column is selected in mat1. So how to adjust this in your new code too?

 row_of_interest <- df1 %>%
    filter(date2 == dmda, Code == CodeChosse) %>%
    select(starts_with("DR0"))
    nc <- ncol(row_of_interest)
    for(i in rev(seq_len(nc))){
      if(row_of_interest[1,i] !=0)
        break
    }
    if(i==nc) {
        mat1 <-  character(0)} else {
      mat1 <-  names(row_of_interest[1,seq(from=i+1,to=nc)])}
1 Like

Can you please test your code again @nirgrahamuk ? It's giving problem in if(i==nc) Error: no function to return from, jumping to top level, and consequently in names afterwards.

I edited it again now

1 Like

Thank you very much @nirgrahamuk !

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.