how to append missing dates using tidyr::complete for each combination excluding result column

library(tidyverse)
sample_data <- tribble(~A, ~B, ~C, ~ Date, ~ Result,
        "AL",123,"12", as.Date("2014-02-01"), 12345,
        "AL",123,"12", as.Date("2014-04-01"), 12349,
        "AL",123,"12", as.Date("2014-06-01"), 12977,
        "AZ",123,"12", as.Date("2014-01-01"),23435,
        "AZ",123,"12", as.Date("2014-04-01"),453454,
        "AZ",123,"12", as.Date("2014-07-01"),123976)

sample_data %<>% complete(Date = seq.Date(min(Date), max(Date), by="month")) 
# Output
> sample_data
# A tibble: 8 x 5
  Date       A         B C     Result
  <date>     <chr> <dbl> <chr>  <dbl>
1 2014-01-01 AZ      123 12     23435
2 2014-02-01 AL      123 12     12345
3 2014-03-01 NA       NA NA        NA
4 2014-04-01 AL      123 12     12349
5 2014-04-01 AZ      123 12    453454
6 2014-05-01 NA       NA NA        NA
7 2014-06-01 AL      123 12     12977
8 2014-07-01 AZ      123 12    123976

# Expected output
expected_output <-tribble(~A, ~B, ~C, ~ Date, ~ Result,
                            "AL",123,"12", as.Date("2014-01-01"), NA,
                            "AL",123,"12", as.Date("2014-02-01"), 12345,
                            "AL",123,"12", as.Date("2014-03-01"), NA,
                            "AL",123,"12", as.Date("2014-04-01"), 12349,
                            "AL",123,"12", as.Date("2014-05-01"), NA,
                            "AL",123,"12", as.Date("2014-06-01"), 12977,
                            "AL",123,"12", as.Date("2014-07-01"), NA,
                            "AZ",123,"12", as.Date("2014-01-01"),23435,
                            "AZ",123,"12", as.Date("2014-02-01"),NA,
                            "AZ",123,"12", as.Date("2014-03-01"),NA,
                            "AZ",123,"12", as.Date("2014-04-01"),453454,
                            "AZ",123,"12", as.Date("2014-05-01"),NA,
                            "AZ",123,"12", as.Date("2014-06-01"),NA,
                            "AZ",123,"12", as.Date("2014-07-01"),123976)
# Tried but 
sample_data %>% 
  group_by(A,B,C) %>% 
  mutate(tidyr::complete(Date = seq.Date(min(Date), max(Date), by="month")))

Thanks for your quick response. It seems if we use tribble, answer is different

sample_data <- tribble(~A, ~B, ~C, ~ Date, ~ Result,
        "AL",123,"12", as.Date("2014-02-01"), 12345,
        "AL",123,"12", as.Date("2014-04-01"), 12349,
        "AL",123,"12", as.Date("2014-06-01"), 12977,
        "AZ",123,"12", as.Date("2014-01-01"),23435,
        "AZ",123,"12", as.Date("2014-04-01"),453454,
        "AZ",123,"12", as.Date("2014-07-01"),123976)

sample_data %<>% complete(Date = seq.Date(min(Date), max(Date), by="month")) 

tidyr::complete(data = sample_data,
                tidyr::nesting(A, B, C), Date = seq.Date(from = min(Date),
                                                         to = max(Date),
                                                         by = "month"))

# A tibble: 21 x 5
   A         B C     Date       Result
   <chr> <dbl> <chr> <date>      <dbl>
 1 AL      123 12    2014-01-01     NA
 2 AL      123 12    2014-02-01  12345
 3 AL      123 12    2014-03-01     NA
 4 AL      123 12    2014-04-01  12349
 5 AL      123 12    2014-05-01     NA
 6 AL      123 12    2014-06-01  12977
 7 AL      123 12    2014-07-01     NA
 8 AZ      123 12    2014-01-01  23435
 9 AZ      123 12    2014-02-01     NA
10 AZ      123 12    2014-03-01     NA
# … with 11 more rows
# Leads to unnecessary rows !

Please check once again. It doesn't happen to me, as you can see below. I've added my session information, too.

library(tibble)
library(tidyr)

sample_data_tibble <- tibble(A = c("AL", "AL", "AL", "AZ", "AZ", "AZ"),
                             B = c(123, 123, 123, 123, 123, 123),
                             C = c("12", "12", "12", "12", "12", "12"),
                             Date = as.Date(x = c("2014-02-01", "2014-04-01", "2014-06-01", "2014-01-01", "2014-04-01", "2014-07-01")),
                             Result = c(12345, 12349, 12977, 23435, 453454, 123976))

result_tibble <- sample_data_tibble %>%
  complete(nesting(A, B, C), Date = seq.Date(from = min(Date),
                                             to = max(Date),
                                             by = "month"))

sample_data_tribble <- tribble(~A, ~B, ~C, ~Date, ~Result,
                               "AL", 123, "12", as.Date(x = "2014-02-01"), 12345,
                               "AL", 123, "12", as.Date(x = "2014-04-01"), 12349,
                               "AL", 123, "12", as.Date(x = "2014-06-01"), 12977,
                               "AZ", 123, "12", as.Date(x = "2014-01-01"), 23435,
                               "AZ", 123, "12", as.Date(x = "2014-04-01"), 453454,
                               "AZ", 123, "12", as.Date(x = "2014-07-01"), 123976)

result_tribble <- sample_data_tribble %>%
  complete(nesting(A, B, C), Date = seq.Date(from = min(Date),
                                             to = max(Date),
                                             by = "month"))

all.equal(target = result_tibble,
          current = result_tribble)
#> [1] TRUE

sessionInfo()
#> R version 3.6.1 (2019-07-05)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 18362)
#> 
#> Matrix products: default
#> 
#> locale:
#> [1] LC_COLLATE=English_India.1252  LC_CTYPE=English_India.1252   
#> [3] LC_MONETARY=English_India.1252 LC_NUMERIC=C                  
#> [5] LC_TIME=English_India.1252    
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] tidyr_0.8.3  tibble_2.1.3
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.1       assertthat_0.2.1 dplyr_0.8.3      digest_0.6.20   
#>  [5] crayon_1.3.4     R6_2.4.0         magrittr_1.5     evaluate_0.14   
#>  [9] highr_0.8        pillar_1.4.2     rlang_0.4.0      stringi_1.4.3   
#> [13] rmarkdown_1.13   tools_3.6.1      stringr_1.4.0    glue_1.3.1      
#> [17] purrr_0.3.2      xfun_0.8         yaml_2.2.0       compiler_3.6.1  
#> [21] pkgconfig_2.0.2  htmltools_0.3.6  tidyselect_0.2.5 knitr_1.23

Why do you suddenly use assignment pipe (%<>%) here? Most probably, this is the reason for your separate result. It changes the sample_data.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.