Help with Line chart

Hello,

I am struggling to add values from another data to existing line plot. My data has some missing values in some of the variables and thus, I forecasted these variables to complete the dataset. I want to be able to show entire dataset in one plot while showing newly forecasted data with different color (red or so). Below is the dummy data where each variable name is unique and doesn't start with brand in real data as seen in dummy data.

Line plot with missing values looks like this:

I would like to show these new values from brand_c, brand_e, and brand_b in the same line plot with different color.

# Sample data
df <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
                 date = c("2019-01-01",
                          "2019-04-01","2019-07-01","2019-10-01","2020-01-01",
                          "2020-04-01","2020-07-01","2020-10-01","2021-01-01",
                          "2021-04-01","2021-07-01","2021-10-01"),
            `brand A` = c(21999,28022,30464,
                          26861,24990,17015,30381,29716,NA,NA,NA,NA),
            `brand B` = c(2211.94,2259.38,2243.29,
                          2246.55,2158.49,2086.65,2305.75,NA,NA,NA,NA,
                          NA),
            `brand C` = c(191125,191125,189738,
                          238556,263929,274390,282798,292390,302517,NA,NA,
                          NA),
            `brand D` = c(60.6219,60.261,60.7619,
                          61.9552,65.9493,58.9019,63.1002,64.6841,65.4183,
                          65.9036,66.1447,66.6399),
            `brand E` = c(70.575,73.225,67.85,
                          75.175,60.8,NA,NA,NA,NA,NA,NA,NA),
            `brand F` = c(354.381,348.468,342.857,
                          337.546,303.792,236.958,279.61,276.814,273.921,
                          270.935,267.864,264.726)
   )

#This is the plot with missing values in some variables as shown in above line chart:
# Visualizing data
df %>% 
    mutate(date = ymd(date))%>%
    gather(key = "key", value = "value", -date) %>% 
    group_by(key) %>% 
    arrange(date) %>% 
    # the normalised value starts from 1
    mutate(NORMALISED_VALUE = value / first(value)) %>% 
    ungroup() %>% 
    ggplot(aes(x = date, y = value, colour = key)) + 
    geom_line() +
    facet_grid(rows = vars(key), scales = "free_y")+
  scale_y_continuous(labels = NULL)+
  theme_classic()

# Forecasted missing values and new values stored as individual data frame
brand_b <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
        date = c("2020-10-01","2021-01-01","2021-04-01",
                 "2021-07-01","2021-10-01"),
     `brand B` = c(2211.94, 2259.38, 2243.29, 2246.55, 2158.49)
)

brand_c <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
        date = c("2021-04-01", "2021-07-01", "2021-10-01"),
     `brand C` = c(263929L, 274390L, 282798L)
           )

brand_e <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
                   date = c("2020-04-01",
                            "2020-07-01","2020-10-01","2021-01-01","2021-04-01",
                            "2021-07-01","2021-10-01"),
     `brand E` = c(70.575, 73.225, 67.85, 75.175, 60.8, 70.575, 73.225)
           )

Thanks for your help!

image

library(tidyverse)
library(lubridate)
df <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  date = c("2019-01-01",
           "2019-04-01","2019-07-01","2019-10-01","2020-01-01",
           "2020-04-01","2020-07-01","2020-10-01","2021-01-01",
           "2021-04-01","2021-07-01","2021-10-01"),
  `brand A` = c(21999,28022,30464,
                26861,24990,17015,30381,29716,NA,NA,NA,NA),
  `brand B` = c(2211.94,2259.38,2243.29,
                2246.55,2158.49,2086.65,2305.75,NA,NA,NA,NA,
                NA),
  `brand C` = c(191125,191125,189738,
                238556,263929,274390,282798,292390,302517,NA,NA,
                NA),
  `brand D` = c(60.6219,60.261,60.7619,
                61.9552,65.9493,58.9019,63.1002,64.6841,65.4183,
                65.9036,66.1447,66.6399),
  `brand E` = c(70.575,73.225,67.85,
                75.175,60.8,NA,NA,NA,NA,NA,NA,NA),
  `brand F` = c(354.381,348.468,342.857,
                337.546,303.792,236.958,279.61,276.814,273.921,
                270.935,267.864,264.726)
) 


#This is the plot with missing values in some variables as shown in above line chart:
# Visualizing data

# Forecasted missing values and new values stored as individual data frame
brand_b <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  date = c("2020-10-01","2021-01-01","2021-04-01",
           "2021-07-01","2021-10-01"),
  `brand B` = c(2211.94, 2259.38, 2243.29, 2246.55, 2158.49)
)

brand_c <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  date = c("2021-04-01", "2021-07-01", "2021-10-01"),
  `brand C` = c(263929L, 274390L, 282798L)
)

brand_e <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  date = c("2020-04-01",
           "2020-07-01","2020-10-01","2021-01-01","2021-04-01",
           "2021-07-01","2021-10-01"),
  `brand E` = c(70.575, 73.225, 67.85, 75.175, 60.8, 70.575, 73.225)
)


all_together <- bind_rows(df,
                          df, #df goes in a second time to be new as well as new
          brand_b,
          brand_c,
          brand_e,
          .id = "source") %>% 
  mutate(new_data=source>1) %>%
  select(-source)



 all_pivoted <-  pivot_longer(all_together,cols=c(-date,-new_data)) %>% 
    filter(!is.na(value)) %>% mutate(date=ymd(date))


# plot new
ggplot(data = all_pivoted %>% filter(new_data),
       aes(x = date, y = value)) + 
  geom_line() +#  then old
  geom_line(data = all_pivoted %>% filter(!new_data),aes(color=name)) +
  facet_grid(rows = vars(name), scales = "free_y")+
  scale_y_continuous(labels = NULL)+
  theme_classic()

Thank you so much @nirgrahamuk!

what does "source>1" do?

Thank you so much for your help!

it tests if source value is above 1 returning true or false

Perfect! Thank you @nirgrahamuk !

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.