Thank you @robjhyndman! This is very close. There are still some problems with the last days of every year (see the new longer example below). Is there a way to chose the n parameter of the lead function automatically?
library(zoo)
library(tidyverse)
library(lubridate)
DF <- structure(list(Year = c(1998, 1999, 2000), Wk01 = c(3.39, 2.421,
3.39), Wk02 = c(2.591, 3.518, 2.591), Wk03 = c(2.211, 4.539,
2.211), Wk04 = c(3.487, 3.251, 3.487), Wk05 = c(3.908, 4.05,
3.908), Wk06 = c(3.714, 4.129, 3.714), Wk07 = c(3.481, 3.967,
3.481), Wk08 = c(3.939, 1.317, 3.939), Wk09 = c(3.77, 3.507,
3.77), Wk10 = c(3.347, 3.235, 3.347), Wk11 = c(4.607, 3.856,
4.607), Wk12 = c(4.871, 4.9, 4.871), Wk13 = c(6.704, 3.534, 6.704
), Wk14 = c(7.236, 3.673, 7.236), Wk15 = c(6.559, 3.631, 6.559
), Wk16 = c(6.037, 4.789, 6.037), Wk17 = c(7.838, 6.597, 7.838
), Wk18 = c(5.466, 4.542, 5.466), Wk19 = c(5.692, 4.4, 5.692),
Wk20 = c(5.403, 5.935, 5.403), Wk21 = c(5.56, 5.937, 5.56
), Wk22 = c(6.933, 11.755, 6.933), Wk23 = c(11.005, 11.755,
11.005), Wk24 = c(13.286, 10.484, 13.286), Wk25 = c(10.646,
12.688, 10.646), Wk26 = c(12.303, 13.811, 12.303), Wk27 = c(11.826,
29.954, 11.826), Wk28 = c(8.961, 28.48, 8.961), Wk29 = c(7.795,
37.812, 7.795), Wk30 = c(7.813, 20.841, 7.813), Wk31 = c(5.609,
13.835, 5.609), Wk32 = c(6.538, 15.241, 6.538), Wk33 = c(6.046,
20.373, 6.046), Wk34 = c(4.521, 11.184, 4.521), Wk35 = c(5.489,
9.519, 5.489), Wk36 = c(7.567, 8.578, 7.567), Wk37 = c(5.377,
7.154, 5.377), Wk38 = c(5.541, 6.025, 5.541), Wk39 = c(5.213,
6.655, 5.213), Wk40 = c(5.072, 5.827, 5.072), Wk41 = c(5.046,
5.841, 5.046), Wk42 = c(4.59, 5.823, 4.59), Wk43 = c(4.423,
5.607, 4.423), Wk44 = c(4.27, 5.394, 4.27), Wk45 = c(3.959,
5.359, 3.959), Wk46 = c(3.621, 5.177, 3.621), Wk47 = c(3.704,
5.976, 3.704), Wk48 = c(3.242, 6.358, 3.242), Wk49 = c(3.143,
5.025, 3.143), Wk50 = c(1.786, 4.339, 1.786), Wk51 = c(1.442,
4.228, 1.442), Wk52 = c(3.225, 4.872, 3.225)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
DF_lg <- DF %>%
gather(key = "WeekNr", value = "Sale", -Year) %>%
separate(WeekNr, into = c("dummy", "Week"),
sep = "k",
remove = FALSE) %>%
mutate(Date = as.Date(paste(Year, Week, 1, sep = "-"), "%Y-%U-%u")) %>%
arrange(Year) %>%
select(Date, Year, Week, Sale)
DF_lg
#> # A tibble: 156 x 4
#> Date Year Week Sale
#> <date> <dbl> <chr> <dbl>
#> 1 1998-01-05 1998 01 3.39
#> 2 1998-01-12 1998 02 2.59
#> 3 1998-01-19 1998 03 2.21
#> 4 1998-01-26 1998 04 3.49
#> 5 1998-02-02 1998 05 3.91
#> 6 1998-02-09 1998 06 3.71
#> 7 1998-02-16 1998 07 3.48
#> 8 1998-02-23 1998 08 3.94
#> 9 1998-03-02 1998 09 3.77
#> 10 1998-03-09 1998 10 3.35
#> # ... with 146 more rows
DF_lg2 <- DF %>%
gather(key="Week", value="Sale", -Year) %>%
mutate(
Week = str_sub(Week, 3, 4),
Date = as.Date(paste(Year, Week, 1, sep="-"), "%Y-%U-%u")
) %>%
select(Date, Week, Sale) %>%
arrange(Date)
# create a daily date
start_date <- min(as.Date(paste(DF$Year, "1", "1", sep = "-")))
end_date <- max(as.Date(paste(DF$Year, "12", "31", sep = "-")))
daily_vec <- tibble(Date = seq(start_date,
end_date,
by = 'day'))
# Can we automate the number of `lead` e.g., `n=xxx`
DF_daily2 <- DF_lg %>% right_join(
daily_vec,
by="Date") %>%
na.locf(na.rm=FALSE) %>%
mutate(Sale = lead(Sale, n=4)) %>%
select(Date, Sale)
DF_daily2
#> # A tibble: 1,096 x 2
#> Date Sale
#> <date> <dbl>
#> 1 1998-01-01 3.39
#> 2 1998-01-02 3.39
#> 3 1998-01-03 3.39
#> 4 1998-01-04 3.39
#> 5 1998-01-05 3.39
#> 6 1998-01-06 3.39
#> 7 1998-01-07 3.39
#> 8 1998-01-08 2.59
#> 9 1998-01-09 2.59
#> 10 1998-01-10 2.59
#> # ... with 1,086 more rows
# Problem with the last days of the year
DF_daily2 %>%
filter(Date >= daily_vec$Date[363] & Date <= daily_vec$Date[367])
#> # A tibble: 5 x 2
#> Date Sale
#> <date> <dbl>
#> 1 1998-12-29 3.22
#> 2 1998-12-30 3.22
#> 3 1998-12-31 2.42
#> 4 1999-01-01 2.42
#> 5 1999-01-02 2.42
DF_daily2 %>%
filter(Date >= daily_vec$Date[727] & Date <= daily_vec$Date[731])
#> # A tibble: 5 x 2
#> Date Sale
#> <date> <dbl>
#> 1 1999-12-28 4.87
#> 2 1999-12-29 4.87
#> 3 1999-12-30 3.39
#> 4 1999-12-31 3.39
#> 5 2000-01-01 3.39
DF_daily2 %>%
tail()
#> # A tibble: 6 x 2
#> Date Sale
#> <date> <dbl>
#> 1 2000-12-26 3.22
#> 2 2000-12-27 3.22
#> 3 2000-12-28 NA
#> 4 2000-12-29 NA
#> 5 2000-12-30 NA
#> 6 2000-12-31 NA