Generate graphs for dates that are not in my database

JojoSouza · October 15, 2021, 7:34pm

The function below generates graphs for the days present in date2, that is, 01/07, 02/07 and 04/07. However, I would like to know if it is possible to adjust my function or maybe make a new function so that, if I don't have a date in date2, for example, 15/10 or any other day different from what I have in date2, it would consider using the following condition of my function:

  if (nrow(datas)<=2){
    abline(h=m,lwd=2) 
    points(0, m, col = "red", pch = 19, cex = 2, xpd = TRUE)
    text(.1,m+ .5, round(m,1), cex=1.1,pos=4,offset =1,col="black")}

So the graph for dates that are not present in date2 will have no points, just a line in m.

It is possible?

Executable code below:

library(dplyr)
library(tidyverse)
library(lubridate)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-07-01","2021-07-02","2021-07-04"),
       Category = c("ABC","ABC","ABC"),
       Week= c("Wednesday","Wednesday","Wednesday"),
       DR1 = c(4,1,0),
       DR01 = c(4,1,0), DR02= c(4,2,0),DR03= c(9,5,0),
       DR04 = c(5,4,0),DR05 = c(5,4,0),DR06 = c(5,4,0),DR07 = c(5,4,0),DR08 = c(5,4,0)),
  class = "data.frame", row.names = c(NA, -3L))


f1 <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  if(length(grep("DR0", names(SPV))) == 0) {
    SPV[head(mat1,10)] <- NA_real_
  }
  
  datas <-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  m<-df1 %>%
    group_by(Category,Week) %>%
    summarize(across(starts_with("DR1"), mean))
  
  m<-subset(m, Week == df1$Week[match(ymd(dmda), ymd(df1$date2))] & Category == CategoryChosse)$DR1
  
  maxrange <-  range(min(0, datas$Numbers, na.rm = TRUE), na.rm = TRUE)
  maxrange[1] <- maxrange[1] - (maxrange[1] %%10) + 35
  
  max<-max(0, datas$Days, na.rm = TRUE)+1
  
  plot(Numbers ~ Days,  xlim= c(0,max),  ylim= c(0,maxrange[1]),
       xaxs='i',data = datas,main = paste0(dmda, "-", CategoryChosse))
  
  if (nrow(datas)<=2){
    abline(h=m,lwd=2) 
    points(0, m, col = "red", pch = 19, cex = 2, xpd = TRUE)
    text(.1,m+ .5, round(m,1), cex=1.1,pos=4,offset =1,col="black")}
  
  else if(any(table(datas$Numbers) >= 3) & length(unique(datas$Numbers)) == 1){
    yz <- unique(datas$Numbers)
    lines(c(0,datas$Days), c(yz, datas$Numbers), lwd = 2)
    points(0, yz, col = "red", pch = 19, cex = 2, xpd = TRUE)
    text(.1,yz+ .5,round(yz,1), cex=1.1,pos=4,offset =1,col="black")}
  
  else{
    mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
    new.data <- data.frame(Days = with(datas, seq(min(Days),max(Days),len = 45)))
    new.data <- rbind(0, new.data)
    lines(new.data$Days,predict(mod,newdata = new.data),lwd=2)
    coef<-coef(mod)[1]
    points(0, coef, col="red",pch=19,cex = 2,xpd=TRUE)
    text(.99,coef + 1,max(0, round(coef,1)), cex=1.1,pos=4,offset =1,col="black")
  }
  
}
f1("2021-07-01", "ABC")
f1("2021-07-02", "ABC")
f1("2021-07-04", "ABC")

GreyMerchant · October 16, 2021, 10:57am

Hi! I was reading your post and it reminded me of a super handy function in tsibble called fill_gaps. It is essentially a function that adds missing values in for you based on what it doesn't find within your time series. You can specify further conditions on how it needs to fill that and then separate if you so wish.

Have a look here: Turn implicit missing values into explicit missing values — fill_gaps • tsibble

system · November 6, 2021, 10:58am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.