Please consider reprex and 2 attempts to solve this.
How can we filter the data by the following condition:
when
date1 is in between start_date and current_date
or
date2 is in between stop_date and current_date
or
when both datae1 and date2 are NA's
library(tidyverse)
data <- data.frame(date1 = c(as.Date('2012-01-01'),NA,NA,
seq(as.Date('2012-04-01'),
as.Date('2013-01-10'), by = 'month'),
NA,NA,NA,
seq(as.Date('2013-04-01'),
as.Date('2014-06-10'), by = 'month'),
NA,NA,NA,NA,NA
),
date2 = c(seq(as.Date('2013-01-01'),
as.Date('2013-11-10'), by = 'month'),
NA,NA,NA,NA,NA,
seq(as.Date('2015-04-01'),
as.Date('2016-10-10'), by = 'month'),
NA),
other_col1 = c(1:36),
other_col2 = LETTERS[1:36]
)
data
date1 date2 col1 col2
1 2012-01-01 2014-01-01 1 A
2 <NA> 2014-02-01 2 B
3 <NA> 2014-03-01 3 C
4 2012-04-01 2014-04-01 4 D
5 2012-05-01 2014-05-01 5 E
6 2012-06-01 2014-06-01 6 F
7 2012-07-01 2014-07-01 7 G
8 2012-08-01 2014-08-01 8 H
9 2012-09-01 2014-09-01 9 I
10 2012-10-01 2014-10-01 10 J
11 2012-11-01 2014-11-01 11 K
12 2012-12-01 <NA> 12 L
13 2013-01-01 <NA> 13 M
14 <NA> <NA> 14 N
15 <NA> <NA> 15 O
16 <NA> <NA> 16 P
17 2013-04-01 2015-04-01 17 Q
18 2013-05-01 2015-05-01 18 R
19 2013-06-01 2015-06-01 19 S
20 2013-07-01 2015-07-01 20 T
21 2013-08-01 2015-08-01 21 U
22 2013-09-01 2015-09-01 22 V
23 2013-10-01 2015-10-01 23 W
24 2013-11-01 2015-11-01 24 X
25 2013-12-01 2015-12-01 25 Y
26 2014-01-01 2016-01-01 26 Z
27 2014-02-01 2016-02-01 27 <NA>
28 2014-03-01 2016-03-01 28 <NA>
29 2014-04-01 2016-04-01 29 <NA>
30 2014-05-01 2016-05-01 30 <NA>
31 2014-06-01 2016-06-01 31 <NA>
32 <NA> 2016-07-01 32 <NA>
33 <NA> 2016-08-01 33 <NA>
34 <NA> 2016-09-01 34 <NA>
35 <NA> 2016-10-01 35 <NA>
36 <NA> <NA> 36 <NA>
current_date = as.Date("2014-01-01")
start_date = as.Date("2012-04-01")
end_date = as.Date("2016-06-01")
expected_data =
date1 date2 col1 col2
1 2012-01-01 2014-01-01 1 A
2 <NA> 2014-02-01 2 B
3 <NA> 2014-03-01 3 C
4 2012-04-01 2014-04-01 4 D
5 2012-05-01 2014-05-01 5 E
6 2012-06-01 2014-06-01 6 F
7 2012-07-01 2014-07-01 7 G
8 2012-08-01 2014-08-01 8 H
9 2012-09-01 2014-09-01 9 I
10 2012-10-01 2014-10-01 10 J
11 2012-11-01 2014-11-01 11 K
12 2012-12-01 <NA> 12 L
13 2013-01-01 <NA> 13 M
14 <NA> <NA> 14 N
15 <NA> <NA> 15 O
16 <NA> <NA> 16 P
17 2013-04-01 2015-04-01 17 Q
18 2013-05-01 2015-05-01 18 R
19 2013-06-01 2015-06-01 19 S
20 2013-07-01 2015-07-01 20 T
21 2013-08-01 2015-08-01 21 U
22 2013-09-01 2015-09-01 22 V
23 2013-10-01 2015-10-01 23 W
24 2013-11-01 2015-11-01 24 X
25 2013-12-01 2015-12-01 25 Y
26 2014-01-01 2016-01-01 26 Z
27 2014-02-01 2016-02-01 27 <NA>
28 2014-03-01 2016-03-01 28 <NA>
29 2014-04-01 2016-04-01 29 <NA>
30 2014-05-01 2016-05-01 30 <NA>
31 2014-06-01 2016-06-01 31 <NA>
36 <NA> <NA> 36 <NA>
Attempt 1:
data %>%
filter(
ifelse(is.na(date1) & is.na(date2)) {
# If both are NA, keep as it is
.
}
ifelse (!is.na(date1)) {
# date1 is not NA, check if it lies between current and start
date1 %between% c(current_date, start_date)
} ifelse (!is.na(date2)) {
# date2 is not NA, check if it lies between current and stop
date2 %between% c(stop_date,current_date)
},
# Else if both has value, keep if either matches the condition
date2 %between% c(stop_date,current_date) |
date1 %between% c(current_date, start_date)
)
Attempt 2:
data %>%
filter(
if(is.na(date1) & is.na(date2)) {
# If both are NA, keep as it is
.
}
else if(!is.na(date1) & is.na(date2)) {
# date1 is not NA, check if it lies between current and start
date1 %between% c(current_date, start_date)
} else if(!is.na(date2) & is.na(date1)) {
# date2 is not NA, check if it lies between current and stop
date2 %between% c(stop_date,current_date)
} else {
# Else if both has value, keep if either matches the condition
date2 %between% c(stop_date,current_date) |
date1 %between% c(current_date, start_date)
}
)
Warning in if (is.na(date1) & is.na(date2)) { :
the condition has length > 1 and only the first element will be used
Warning in if (!is.na(date1) & is.na(date2)) { :
the condition has length > 1 and only the first element will be used
Warning in if (!is.na(date2) & is.na(date1)) { :
the condition has length > 1 and only the first element will be used
Error: Evaluation error: object 'stop_date' not found.