How to filter dates using ifelse/between/if ?

Please consider reprex and 2 attempts to solve this.

How can we filter the data by the following condition:
when
date1 is in between start_date and current_date
or
date2 is in between stop_date and current_date
or
when both datae1 and date2 are NA's

library(tidyverse)
data <- data.frame(date1 = c(as.Date('2012-01-01'),NA,NA,
                             seq(as.Date('2012-04-01'), 
                               as.Date('2013-01-10'), by = 'month'),
                             NA,NA,NA,
                             seq(as.Date('2013-04-01'), 
                                 as.Date('2014-06-10'), by = 'month'),
                             NA,NA,NA,NA,NA
                             ),
                   date2 = c(seq(as.Date('2013-01-01'), 
                               as.Date('2013-11-10'), by = 'month'),
                             NA,NA,NA,NA,NA,
                             seq(as.Date('2015-04-01'), 
                                 as.Date('2016-10-10'), by = 'month'),
                             NA),
                   other_col1 = c(1:36),
                   other_col2 = LETTERS[1:36]
                   )
data
        date1      date2 col1 col2
1  2012-01-01 2014-01-01    1    A
2        <NA> 2014-02-01    2    B
3        <NA> 2014-03-01    3    C
4  2012-04-01 2014-04-01    4    D
5  2012-05-01 2014-05-01    5    E
6  2012-06-01 2014-06-01    6    F
7  2012-07-01 2014-07-01    7    G
8  2012-08-01 2014-08-01    8    H
9  2012-09-01 2014-09-01    9    I
10 2012-10-01 2014-10-01   10    J
11 2012-11-01 2014-11-01   11    K
12 2012-12-01       <NA>   12    L
13 2013-01-01       <NA>   13    M
14       <NA>       <NA>   14    N
15       <NA>       <NA>   15    O
16       <NA>       <NA>   16    P
17 2013-04-01 2015-04-01   17    Q
18 2013-05-01 2015-05-01   18    R
19 2013-06-01 2015-06-01   19    S
20 2013-07-01 2015-07-01   20    T
21 2013-08-01 2015-08-01   21    U
22 2013-09-01 2015-09-01   22    V
23 2013-10-01 2015-10-01   23    W
24 2013-11-01 2015-11-01   24    X
25 2013-12-01 2015-12-01   25    Y
26 2014-01-01 2016-01-01   26    Z
27 2014-02-01 2016-02-01   27 <NA>
28 2014-03-01 2016-03-01   28 <NA>
29 2014-04-01 2016-04-01   29 <NA>
30 2014-05-01 2016-05-01   30 <NA>
31 2014-06-01 2016-06-01   31 <NA>
32       <NA> 2016-07-01   32 <NA>
33       <NA> 2016-08-01   33 <NA>
34       <NA> 2016-09-01   34 <NA>
35       <NA> 2016-10-01   35 <NA>
36       <NA>       <NA>   36 <NA>
current_date = as.Date("2014-01-01")
start_date = as.Date("2012-04-01")
end_date = as.Date("2016-06-01")

expected_data =
  date1      date2 col1 col2
1  2012-01-01 2014-01-01    1    A
2        <NA> 2014-02-01    2    B
3        <NA> 2014-03-01    3    C
4  2012-04-01 2014-04-01    4    D
5  2012-05-01 2014-05-01    5    E
6  2012-06-01 2014-06-01    6    F
7  2012-07-01 2014-07-01    7    G
8  2012-08-01 2014-08-01    8    H
9  2012-09-01 2014-09-01    9    I
10 2012-10-01 2014-10-01   10    J
11 2012-11-01 2014-11-01   11    K
12 2012-12-01       <NA>   12    L
13 2013-01-01       <NA>   13    M
14       <NA>       <NA>   14    N
15       <NA>       <NA>   15    O
16       <NA>       <NA>   16    P
17 2013-04-01 2015-04-01   17    Q
18 2013-05-01 2015-05-01   18    R
19 2013-06-01 2015-06-01   19    S
20 2013-07-01 2015-07-01   20    T
21 2013-08-01 2015-08-01   21    U
22 2013-09-01 2015-09-01   22    V
23 2013-10-01 2015-10-01   23    W
24 2013-11-01 2015-11-01   24    X
25 2013-12-01 2015-12-01   25    Y
26 2014-01-01 2016-01-01   26    Z
27 2014-02-01 2016-02-01   27 <NA>
  28 2014-03-01 2016-03-01   28 <NA>
  29 2014-04-01 2016-04-01   29 <NA>
  30 2014-05-01 2016-05-01   30 <NA>
  31 2014-06-01 2016-06-01   31 <NA>
36       <NA>       <NA>   36 <NA>

Attempt 1:

data %>% 
  filter(
    ifelse(is.na(date1) & is.na(date2)) {
      # If both are NA, keep as it is 
      .
    }
    ifelse (!is.na(date1)) {
      # date1 is not NA, check if it lies between current and start
      date1 %between% c(current_date, start_date)
    } ifelse (!is.na(date2)) {
      # date2 is not NA, check if it lies between current and stop
      date2 %between% c(stop_date,current_date)
    }, 
    # Else if both has value, keep if either  matches the condition
    date2 %between% c(stop_date,current_date) |
      date1 %between% c(current_date, start_date)
)

Attempt 2:

data %>% 
  filter(
    if(is.na(date1) & is.na(date2)) {
      # If both are NA, keep as it is 
      .
    }
    else if(!is.na(date1) & is.na(date2)) {
      # date1 is not NA, check if it lies between current and start
      date1 %between% c(current_date, start_date)
    } else if(!is.na(date2) & is.na(date1)) {
      # date2 is not NA, check if it lies between current and stop
      date2 %between% c(stop_date,current_date)
    } else {
    # Else if both has value, keep if either  matches the condition
    date2 %between% c(stop_date,current_date) |
      date1 %between% c(current_date, start_date)
    }
)
Warning in if (is.na(date1) & is.na(date2)) { :
  the condition has length > 1 and only the first element will be used
Warning in if (!is.na(date1) & is.na(date2)) { :
  the condition has length > 1 and only the first element will be used
Warning in if (!is.na(date2) & is.na(date1)) { :
  the condition has length > 1 and only the first element will be used
Error: Evaluation error: object 'stop_date' not found.

Hi @AbhishekHP. You may use %within% to check whether the date is within the interval.

library(tidyverse)
library(lubridate)

data <- data.frame(date1 = c(as.Date('2012-01-01'),NA,NA,
                             seq(as.Date('2012-04-01'), 
                                 as.Date('2013-01-10'), by = 'month'),
                             NA,NA,NA,
                             seq(as.Date('2013-04-01'), 
                                 as.Date('2014-06-10'), by = 'month'),
                             NA,NA,NA,NA,NA
),
date2 = c(seq(as.Date('2014-01-01'), 
              as.Date('2014-11-10'), by = 'month'),
          NA,NA,NA,NA,NA,
          seq(as.Date('2015-04-01'), 
              as.Date('2016-10-10'), by = 'month'),
          NA),
other_col1 = c(1:36),
other_col2 = LETTERS[1:36]
)

current_date = as.Date("2014-01-01")
start_date = as.Date("2012-04-01")
end_date = as.Date("2016-06-01")


data %>%
  filter(as.Date(date1) %within% interval(start_date, current_date) |
           as.Date(date2) %within% interval(current_date, end_date) |
           (is.na(date1) & is.na(date2)))
#>         date1      date2 other_col1 other_col2
#> 1  2012-01-01 2014-01-01          1          A
#> 2        <NA> 2014-02-01          2          B
#> 3        <NA> 2014-03-01          3          C
#> 4  2012-04-01 2014-04-01          4          D
#> 5  2012-05-01 2014-05-01          5          E
#> 6  2012-06-01 2014-06-01          6          F
#> 7  2012-07-01 2014-07-01          7          G
#> 8  2012-08-01 2014-08-01          8          H
#> 9  2012-09-01 2014-09-01          9          I
#> 10 2012-10-01 2014-10-01         10          J
#> 11 2012-11-01 2014-11-01         11          K
#> 12 2012-12-01       <NA>         12          L
#> 13 2013-01-01       <NA>         13          M
#> 14       <NA>       <NA>         14          N
#> 15       <NA>       <NA>         15          O
#> 16       <NA>       <NA>         16          P
#> 17 2013-04-01 2015-04-01         17          Q
#> 18 2013-05-01 2015-05-01         18          R
#> 19 2013-06-01 2015-06-01         19          S
#> 20 2013-07-01 2015-07-01         20          T
#> 21 2013-08-01 2015-08-01         21          U
#> 22 2013-09-01 2015-09-01         22          V
#> 23 2013-10-01 2015-10-01         23          W
#> 24 2013-11-01 2015-11-01         24          X
#> 25 2013-12-01 2015-12-01         25          Y
#> 26 2014-01-01 2016-01-01         26          Z
#> 27 2014-02-01 2016-02-01         27       <NA>
#> 28 2014-03-01 2016-03-01         28       <NA>
#> 29 2014-04-01 2016-04-01         29       <NA>
#> 30 2014-05-01 2016-05-01         30       <NA>
#> 31 2014-06-01 2016-06-01         31       <NA>
#> 32       <NA>       <NA>         36       <NA>

Created on 2019-09-26 by the reprex package (v0.3.0)

5 Likes

Alternatively, which returns the same result

data %>%
	filter(between (as.Date(date1), start_date, current_date) |
				 	between(as.Date(date2), current_date, end_date) |
				 	(is.na(date1) & is.na(date2)))
1 Like

thanks for the code. Not sure why it is not resulting in expected result

thanks for the code. Not sure why it is not resulting in expected result

@raytong and @valeri

library(tidyverse)
library(lubridate)
data <- data.frame(date1 = c(as.Date('2012-01-01'),NA,NA,
                             seq(as.Date('2012-04-01'), 
                               as.Date('2013-01-10'), by = 'month'),
                             NA,NA,NA,
                             seq(as.Date('2013-04-01'), 
                                 as.Date('2014-06-10'), by = 'month'),
                             NA,NA,NA,NA,NA
                             ),
                   date2 = c(seq(as.Date('2013-01-01'), 
                               as.Date('2013-11-10'), by = 'month'),
                             NA,NA,NA,NA,NA,
                             seq(as.Date('2015-04-01'), 
                                 as.Date('2016-10-10'), by = 'month'),
                             NA),
                   other_col1 = c(1:36),
                   other_col2 = LETTERS[1:36]
                   )
data

current_date = as.Date("2014-01-01")
start_date = as.Date("2012-04-01")
end_date = as.Date("2016-06-01")

raytong_output = data %>%
  filter(as.Date(date1) %within% interval(start_date, current_date) |
           as.Date(date2) %within% interval(current_date, end_date) |
           (is.na(date1) & is.na(date2)))

raytong_output

raytong_output
        date1      date2 other_col1 other_col2
1  2012-04-01 2013-04-01          4          D
2  2012-05-01 2013-05-01          5          E
3  2012-06-01 2013-06-01          6          F
4  2012-07-01 2013-07-01          7          G
5  2012-08-01 2013-08-01          8          H
6  2012-09-01 2013-09-01          9          I
7  2012-10-01 2013-10-01         10          J
8  2012-11-01 2013-11-01         11          K
9  2012-12-01       <NA>         12          L
10 2013-01-01       <NA>         13          M
11       <NA>       <NA>         14          N
12       <NA>       <NA>         15          O
13       <NA>       <NA>         16          P
14 2013-04-01 2015-04-01         17          Q
15 2013-05-01 2015-05-01         18          R
16 2013-06-01 2015-06-01         19          S
17 2013-07-01 2015-07-01         20          T
18 2013-08-01 2015-08-01         21          U
19 2013-09-01 2015-09-01         22          V
20 2013-10-01 2015-10-01         23          W
21 2013-11-01 2015-11-01         24          X
22 2013-12-01 2015-12-01         25          Y
23 2014-01-01 2016-01-01         26          Z
24 2014-02-01 2016-02-01         27       <NA>
25 2014-03-01 2016-03-01         28       <NA>
26 2014-04-01 2016-04-01         29       <NA>
27 2014-05-01 2016-05-01         30       <NA>
28 2014-06-01 2016-06-01         31       <NA>
29       <NA>       <NA>         36       <NA>
output_valeri = data %>%
  filter(between (as.Date(date1), start_date, current_date) |
           between(as.Date(date2), current_date, end_date) |
           (is.na(date1) & is.na(date2)))

output_valeri
> output_valeri
        date1      date2 other_col1 other_col2
1  2012-04-01 2013-04-01          4          D
2  2012-05-01 2013-05-01          5          E
3  2012-06-01 2013-06-01          6          F
4  2012-07-01 2013-07-01          7          G
5  2012-08-01 2013-08-01          8          H
6  2012-09-01 2013-09-01          9          I
7  2012-10-01 2013-10-01         10          J
8  2012-11-01 2013-11-01         11          K
9  2012-12-01       <NA>         12          L
10 2013-01-01       <NA>         13          M
11       <NA>       <NA>         14          N
12       <NA>       <NA>         15          O
13       <NA>       <NA>         16          P
14 2013-04-01 2015-04-01         17          Q
15 2013-05-01 2015-05-01         18          R
16 2013-06-01 2015-06-01         19          S
17 2013-07-01 2015-07-01         20          T
18 2013-08-01 2015-08-01         21          U
19 2013-09-01 2015-09-01         22          V
20 2013-10-01 2015-10-01         23          W
21 2013-11-01 2015-11-01         24          X
22 2013-12-01 2015-12-01         25          Y
23 2014-01-01 2016-01-01         26          Z
24 2014-02-01 2016-02-01         27       <NA>
25 2014-03-01 2016-03-01         28       <NA>
26 2014-04-01 2016-04-01         29       <NA>
27 2014-05-01 2016-05-01         30       <NA>
28 2014-06-01 2016-06-01         31       <NA>
29       <NA>       <NA>         36       <NA>

Hi @AbhishekHP - running your code above, I get the exact same result as you (both outputs are the same) - what result did you expect?

In date2, it shouldnt include 2013 as per our filtering. isnt it ?

We see this because we have an OR condition. For the rows you mention, the condition on date1 is met and since we have an OR then the row is kept in the filtering - e.g. in the first row date1 is 2012-04-01 which satisfies between (as.Date(date1), start_date, current_date)

Just to add a bit: between uses weak inequalities:

R Documentation - between {dplyr}

Do values in a numeric vector fall in specified range?

Description

This is a shortcut for x >= left & x <= right , implemented efficiently in C++ for local values, and translated to the appropriate SQL for remote tables.

2 Likes

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.