Hello,
I am struggling to add values from another data to existing line plot. My data has some missing values in some of the variables and thus, I forecasted these variables to complete the dataset. I want to be able to show entire dataset in one plot while showing newly forecasted data with different color (red or so). Below is the dummy data where each variable name is unique and doesn't start with brand in real data as seen in dummy data.
Line plot with missing values looks like this:
I would like to show these new values from brand_c, brand_e, and brand_b in the same line plot with different color.
# Sample data
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
date = c("2019-01-01",
"2019-04-01","2019-07-01","2019-10-01","2020-01-01",
"2020-04-01","2020-07-01","2020-10-01","2021-01-01",
"2021-04-01","2021-07-01","2021-10-01"),
`brand A` = c(21999,28022,30464,
26861,24990,17015,30381,29716,NA,NA,NA,NA),
`brand B` = c(2211.94,2259.38,2243.29,
2246.55,2158.49,2086.65,2305.75,NA,NA,NA,NA,
NA),
`brand C` = c(191125,191125,189738,
238556,263929,274390,282798,292390,302517,NA,NA,
NA),
`brand D` = c(60.6219,60.261,60.7619,
61.9552,65.9493,58.9019,63.1002,64.6841,65.4183,
65.9036,66.1447,66.6399),
`brand E` = c(70.575,73.225,67.85,
75.175,60.8,NA,NA,NA,NA,NA,NA,NA),
`brand F` = c(354.381,348.468,342.857,
337.546,303.792,236.958,279.61,276.814,273.921,
270.935,267.864,264.726)
)
#This is the plot with missing values in some variables as shown in above line chart:
# Visualizing data
df %>%
mutate(date = ymd(date))%>%
gather(key = "key", value = "value", -date) %>%
group_by(key) %>%
arrange(date) %>%
# the normalised value starts from 1
mutate(NORMALISED_VALUE = value / first(value)) %>%
ungroup() %>%
ggplot(aes(x = date, y = value, colour = key)) +
geom_line() +
facet_grid(rows = vars(key), scales = "free_y")+
scale_y_continuous(labels = NULL)+
theme_classic()
# Forecasted missing values and new values stored as individual data frame
brand_b <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
date = c("2020-10-01","2021-01-01","2021-04-01",
"2021-07-01","2021-10-01"),
`brand B` = c(2211.94, 2259.38, 2243.29, 2246.55, 2158.49)
)
brand_c <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
date = c("2021-04-01", "2021-07-01", "2021-10-01"),
`brand C` = c(263929L, 274390L, 282798L)
)
brand_e <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
date = c("2020-04-01",
"2020-07-01","2020-10-01","2021-01-01","2021-04-01",
"2021-07-01","2021-10-01"),
`brand E` = c(70.575, 73.225, 67.85, 75.175, 60.8, 70.575, 73.225)
)
Thanks for your help!