Rather than trying to shoehorn this into an existing framework I'd do the following:
library(tidyverse)
data <- iris %>%
slice(1:100) %>%
as_tibble()
data
#> # A tibble: 100 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
#> 7 4.6 3.4 1.4 0.3 setosa
#> 8 5 3.4 1.5 0.2 setosa
#> 9 4.4 2.9 1.4 0.2 setosa
#> 10 4.9 3.1 1.5 0.1 setosa
#> # ... with 90 more rows
First we solve the simplest problem: sampling two consecutive data points (note that this won't wrap around, i.e. you'll never sample the 100th and 1st data points together).
sample_cluster <- function(data) {
idx <- sample(1:99, 1)
slice(data, idx:(idx + 1))
}
sample_cluster(data)
#> # A tibble: 2 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 4.6 3.2 1.4 0.2 setosa
#> 2 5.3 3.7 1.5 0.2 setosa
Next we figure out how to get n clusters this way:
sample_n_clusters <- function(data, n_clusters) {
map_dfr(1:n_clusters, ~sample_cluster(data), .id = "cluster")
}
sample_n_clusters(data, 10)
#> # A tibble: 20 x 6
#> cluster Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <chr> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 1 5 3.5 1.6 0.6 setosa
#> 2 1 5.1 3.8 1.9 0.4 setosa
#> 3 2 5.7 2.8 4.5 1.3 versicolor
#> 4 2 6.3 3.3 4.7 1.6 versicolor
#> 5 3 5 3.5 1.3 0.3 setosa
#> 6 3 4.5 2.3 1.3 0.3 setosa
#> 7 4 4.8 3.4 1.9 0.2 setosa
#> 8 4 5 3 1.6 0.2 setosa
#> 9 5 5.1 3.5 1.4 0.3 setosa
#> 10 5 5.7 3.8 1.7 0.3 setosa
#> 11 6 5 3.5 1.3 0.3 setosa
#> 12 6 4.5 2.3 1.3 0.3 setosa
#> 13 7 5.8 4 1.2 0.2 setosa
#> 14 7 5.7 4.4 1.5 0.4 setosa
#> 15 8 4.3 3 1.1 0.1 setosa
#> 16 8 5.8 4 1.2 0.2 setosa
#> 17 9 6.9 3.1 4.9 1.5 versicolor
#> 18 9 5.5 2.3 4 1.3 versicolor
#> 19 10 5.7 2.6 3.5 1 versicolor
#> 20 10 5.5 2.4 3.8 1.1 versicolor
And now we need to repeat 10,000 times (although I'll only do 20 here to save my laptop).
create_resamples <- function(data, n_resamples = 20, n_clusters = 10) {
map_dfr(
1:n_resamples,
~sample_n_clusters(data, n_clusters),
.id = "resample"
)
}
flat <- create_resamples(data)
flat
#> # A tibble: 400 x 7
#> resample cluster Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 5 3.5 1.6 0.6
#> 2 1 1 5.1 3.8 1.9 0.4
#> 3 1 2 4.4 2.9 1.4 0.2
#> 4 1 2 4.9 3.1 1.5 0.1
#> 5 1 3 5 3 1.6 0.2
#> 6 1 3 5 3.4 1.6 0.4
#> 7 1 4 6 2.9 4.5 1.5
#> 8 1 4 5.7 2.6 3.5 1
#> 9 1 5 5.7 2.6 3.5 1
#> 10 1 5 5.5 2.4 3.8 1.1
#> # ... with 390 more rows, and 1 more variable: Species <fct>
This is an embarrassingly parallel problem, so I'd it it with the furrr package for a free speedup:
library(furrr)
plan(multiprocess)
create_resamples2 <- function(data, n_resamples = 20, n_clusters = 10) {
future_map_dfr(
1:n_resamples,
~sample_n_clusters(data, n_clusters),
.id = "resample"
)
}
flat2 <- create_resamples2(data)
flat2
#> # A tibble: 400 x 7
#> resample cluster Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 4.4 3.2 1.3 0.2
#> 2 1 1 5 3.5 1.6 0.6
#> 3 1 2 5.1 3.4 1.5 0.2
#> 4 1 2 5 3.5 1.3 0.3
#> 5 1 3 4.7 3.2 1.6 0.2
#> 6 1 3 4.8 3.1 1.6 0.2
#> 7 1 4 5 3.4 1.5 0.2
#> 8 1 4 4.4 2.9 1.4 0.2
#> 9 1 5 5 3.2 1.2 0.2
#> 10 1 5 5.5 3.5 1.3 0.2
#> # ... with 390 more rows, and 1 more variable: Species <fct>
If you prefer to work with nested tables, as in rsample, you can nest() and get a very similar list-column:
nested <- flat2 %>%
nest(-resample)
nested
#> # A tibble: 20 x 2
#> resample data
#> <chr> <list>
#> 1 1 <tibble [20 x 6]>
#> 2 2 <tibble [20 x 6]>
#> 3 3 <tibble [20 x 6]>
#> 4 4 <tibble [20 x 6]>
#> 5 5 <tibble [20 x 6]>
#> 6 6 <tibble [20 x 6]>
#> 7 7 <tibble [20 x 6]>
#> 8 8 <tibble [20 x 6]>
#> 9 9 <tibble [20 x 6]>
#> 10 10 <tibble [20 x 6]>
#> 11 11 <tibble [20 x 6]>
#> 12 12 <tibble [20 x 6]>
#> 13 13 <tibble [20 x 6]>
#> 14 14 <tibble [20 x 6]>
#> 15 15 <tibble [20 x 6]>
#> 16 16 <tibble [20 x 6]>
#> 17 17 <tibble [20 x 6]>
#> 18 18 <tibble [20 x 6]>
#> 19 19 <tibble [20 x 6]>
#> 20 20 <tibble [20 x 6]>
Created on 2018-12-06 by the reprex package (v0.2.1)