Generate random date from two variable fields

I am creating simulation data and have a data set that has date variables. I have two date fields and I would like to generate a random date in between those two variables. So, I'm using the variable field names as the min/max, or trying to dynamically calculate the difference and use that as x. I've tried runif, rdunif, sample.int to no luck. Here is a reprex using sample.int.

Any assistance is greatly appreciated.

test_date1 <- as.Date(rdunif(100, as.integer(as.Date("2015-01-01")), as.integer(as.Date("2016-01-01"))), origin = "1970-01-01")
test_date2 <- as.Date(rdunif(100, as.integer(as.Date("2017-01-01")), as.integer(as.Date("2018-01-01"))), origin = "1970-01-01")
test_df <- as.data.frame(cbind(test_date1, test_date2))
test_df <- test_df %>% 
mutate(random_date = as.Date(test_date1 + sample.int(as.integer(test_date2 - test_date1), size = nrow(test_df), replace = TRUE)))

Error in sample.int(as.integer(test_date2 - test_date1), size = nrow(test_df), :
invalid first argument

I've tried several iterations with no luck
sample.int(as.integer(test_date2) - as.integer(test_date1), ...
sample.int(as.integer(as.Date(test_date2)) - as.integer(as.Date(test_date2)), ...
creating another date_diff variable and using that: sample.int(date_diff, ...

Do you need to do using purr or mutate?

Otherwise, these two posts are relevant.

I do need to use mutate. This is an existing data set with 100+ variables and I'm generating additional data within these tables. I had read through these posts, but since the start and stop dates change per row (those two variables I mentioned), I couldn't just designate them as they were in these links.

Right now, I'm providing you a base R solution (which most probably you don't want, and it's inappropriate or this category), as I'm much comfortable here. I'll update my post, if I've a mutate solution.

base R solution
test_date1 <- as.Date(x = sample(x = seq(from = as.integer(x = as.Date(x = "2015-01-01")),
                                           to = as.integer(x = as.Date(x = "2016-01-01"))),
                                   size = 100),
                        origin = "1970-01-01")

test_date2 <- as.Date(x = sample(x = seq(from = as.integer(x = as.Date(x = "2017-01-01")),
                                           to = as.integer(x = as.Date(x = "2018-01-01"))),
                                   size = 100),
                        origin = "1970-01-01")

test_df <- data.frame(test_date1, test_date2)

test_df$test_df <- as.Date(x = apply(X = test_df,
                                     MARGIN = 1,
                                     FUN = function(dates)
                                     {
                                       sample(x = seq(from = as.Date(x = dates[1],
                                                                     origin = "1970-01-01"),
                                                      to = as.Date(x = dates[2],
                                                                   origin = "1970-01-01"),
                                                      by = "day"),
                                              size = 1)
                                     }),
                           origin = "1970-01-01")

test_df
#>     test_date1 test_date2    test_df
#> 1   2015-08-28 2017-04-23 2016-11-13
#> 2   2015-07-05 2017-05-13 2016-01-19
#> 3   2015-10-01 2017-09-03 2016-02-10
#> 4   2015-05-06 2017-06-03 2016-08-08
#> 5   2015-09-24 2017-01-16 2016-05-18
#> 6   2015-02-21 2017-11-09 2016-12-20
#> 7   2015-04-15 2017-01-20 2016-09-24
#> 8   2015-12-31 2017-10-04 2016-10-27
#> 9   2015-11-30 2017-10-15 2017-09-01
#> 10  2015-08-07 2017-11-11 2017-04-16
#> 11  2015-02-16 2017-11-14 2017-10-05
#> 12  2015-01-18 2017-10-22 2016-10-31
#> 13  2015-08-21 2017-05-17 2016-02-05
#> 14  2015-09-06 2017-08-14 2016-02-15
#> 15  2015-01-01 2017-07-14 2015-08-03
#> 16  2015-10-18 2017-01-12 2016-09-14
#> 17  2015-11-06 2017-11-21 2016-04-30
#> 18  2015-07-20 2017-05-20 2016-09-09
#> 19  2015-07-06 2017-05-21 2015-07-29
#> 20  2015-11-03 2017-04-30 2017-03-09
#> 21  2015-06-17 2017-07-01 2016-11-24
#> 22  2015-02-02 2017-05-05 2015-10-20
#> 23  2015-04-07 2017-11-16 2017-08-12
#> 24  2015-01-02 2017-11-13 2016-03-05
#> 25  2015-05-02 2017-09-13 2017-06-28
#> 26  2015-10-22 2017-04-15 2016-02-17
#> 27  2015-08-05 2017-07-29 2015-10-06
#> 28  2015-01-27 2017-03-12 2016-09-04
#> 29  2015-05-26 2017-12-16 2017-08-30
#> 30  2015-02-19 2017-10-12 2015-08-21
#> 31  2015-03-25 2017-10-14 2015-11-22
#> 32  2015-10-25 2017-05-31 2016-01-28
#> 33  2015-04-26 2017-01-11 2016-09-18
#> 34  2015-06-26 2017-04-03 2015-10-09
#> 35  2015-03-30 2017-02-11 2016-05-26
#> 36  2015-08-18 2017-08-23 2016-01-21
#> 37  2015-08-04 2017-04-10 2015-10-10
#> 38  2015-09-08 2017-07-15 2016-06-30
#> 39  2015-12-02 2017-04-22 2016-07-26
#> 40  2015-03-23 2017-04-18 2015-07-12
#> 41  2015-03-05 2017-07-23 2015-06-25
#> 42  2015-12-20 2017-06-16 2016-03-07
#> 43  2015-05-16 2017-11-29 2016-07-27
#> 44  2015-07-24 2017-06-01 2016-02-03
#> 45  2015-11-07 2017-10-24 2016-06-19
#> 46  2015-05-22 2017-09-27 2017-09-21
#> 47  2015-09-04 2017-02-15 2016-07-25
#> 48  2015-07-30 2017-04-06 2016-09-02
#> 49  2015-11-26 2017-12-14 2017-07-07
#> 50  2015-06-27 2017-03-26 2016-01-16
#> 51  2015-05-14 2017-01-24 2016-06-18
#> 52  2015-03-08 2017-01-05 2016-07-12
#> 53  2015-01-17 2017-08-24 2016-05-05
#> 54  2015-08-26 2017-08-16 2016-04-18
#> 55  2015-09-02 2017-06-14 2017-03-22
#> 56  2015-12-21 2017-04-11 2016-02-24
#> 57  2015-08-01 2017-11-05 2015-10-18
#> 58  2015-06-01 2017-07-04 2016-05-25
#> 59  2015-05-15 2017-03-01 2016-07-09
#> 60  2015-11-15 2017-02-06 2016-08-23
#> 61  2015-10-19 2018-01-01 2017-01-17
#> 62  2015-01-12 2017-07-10 2016-02-26
#> 63  2015-01-07 2017-08-17 2017-03-26
#> 64  2015-07-29 2017-06-23 2015-11-20
#> 65  2015-10-12 2017-11-03 2017-10-06
#> 66  2015-07-10 2017-09-02 2015-09-14
#> 67  2015-11-25 2017-07-11 2016-06-09
#> 68  2015-08-31 2017-10-01 2016-12-08
#> 69  2015-11-09 2017-04-05 2016-09-13
#> 70  2015-11-01 2017-10-16 2017-08-23
#> 71  2015-04-18 2017-08-28 2016-10-15
#> 72  2015-06-13 2017-12-17 2016-05-31
#> 73  2015-09-30 2017-08-06 2016-04-21
#> 74  2015-01-19 2017-09-15 2017-09-10
#> 75  2015-02-24 2017-12-31 2017-07-22
#> 76  2015-07-17 2017-03-19 2015-09-12
#> 77  2015-12-12 2017-12-27 2016-12-11
#> 78  2015-03-09 2017-03-18 2016-07-04
#> 79  2015-05-29 2017-03-16 2015-06-30
#> 80  2015-09-29 2017-05-14 2016-04-01
#> 81  2015-04-30 2017-03-22 2015-05-20
#> 82  2015-03-06 2017-10-05 2016-07-21
#> 83  2015-05-23 2017-06-13 2015-07-06
#> 84  2015-07-03 2017-07-03 2015-10-04
#> 85  2016-01-01 2017-02-12 2016-04-10
#> 86  2015-12-16 2017-10-30 2017-01-05
#> 87  2015-06-12 2017-04-27 2016-08-10
#> 88  2015-04-12 2017-02-23 2015-07-04
#> 89  2015-04-09 2017-01-30 2016-09-16
#> 90  2015-10-29 2017-12-04 2017-08-13
#> 91  2015-05-21 2017-01-23 2015-09-20
#> 92  2015-07-08 2017-07-05 2016-11-03
#> 93  2015-11-08 2017-11-23 2016-07-18
#> 94  2015-06-21 2017-02-24 2015-06-25
#> 95  2015-03-26 2017-10-27 2017-06-03
#> 96  2015-10-13 2017-01-13 2016-10-16
#> 97  2015-10-03 2017-02-25 2016-11-11
#> 98  2015-11-17 2017-11-20 2015-12-14
#> 99  2015-04-17 2017-12-11 2017-05-08
#> 100 2015-04-27 2017-09-30 2016-01-20

Created on 2019-03-14 by the reprex package (v0.2.1)

Update: And, here's the mutate version.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(purrr)

test_date1 <- as.Date(x = rdunif(n = 100,
                                 a = as.integer(as.Date("2015-01-01")),
                                 b = as.integer(as.Date("2016-01-01"))),
                      origin = "1970-01-01")
test_date2 <- as.Date(x = rdunif(n = 100,
                                 a = as.integer(as.Date("2017-01-01")),
                                 b = as.integer(as.Date("2018-01-01"))),
                      origin = "1970-01-01")
test_df <- data.frame(start = test_date1,
                      end = test_date2)

(test_df <- test_df %>%
    rowwise() %>%
    mutate(random_date = sample(x = seq(from = start,
                                        to = end,
                                        by = "day"),
                                size = 1)))
#> Source: local data frame [100 x 3]
#> Groups: <by row>
#> 
#> # A tibble: 100 x 3
#>    start      end        random_date
#>    <date>     <date>     <date>     
#>  1 2015-07-02 2017-09-21 2017-06-12 
#>  2 2015-08-12 2017-11-02 2016-07-11 
#>  3 2016-01-01 2017-03-06 2016-06-13 
#>  4 2015-04-28 2017-10-20 2016-05-05 
#>  5 2015-03-25 2017-02-23 2016-02-02 
#>  6 2015-05-03 2017-02-16 2015-09-10 
#>  7 2015-01-11 2017-08-14 2017-07-01 
#>  8 2015-03-01 2017-04-09 2015-09-22 
#>  9 2015-06-12 2017-07-27 2016-09-08 
#> 10 2015-06-26 2017-04-02 2015-10-08 
#> # … with 90 more rows

Created on 2019-03-14 by the reprex package (v0.2.1)

Hope this helps.

1 Like

Thanks, it sure does!

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.