@brshallo I'm conflicted because, TBH, I'm not at all sold on this method. I definitely think that you need thousands of resamples to do this. I have some results below that show that the bands seem to be more narrow than the ones that we can compute analytically.
I do have some code for regression models but it's still not ready to be public. I'll try to put it in a gist soon.
Example:
library(tidyverse)
library(tidymodels)
set.seed(123)
iris <- as_tibble(iris)
split <- initial_split(iris)
train <- training(split)
test <- testing(split)
model_variance <- rsample::bootstraps(train, 5000) %>%
mutate(mod = map(splits, ~ lm(Sepal.Length ~ Sepal.Width, data = analysis(.x))),
test_preds = map(mod, ~ tibble(
pred = predict(.x, test),
index = seq_len(nrow(test))
))) %>%
unnest(test_preds) %>%
group_by(index) %>%
mutate(m = pred - mean(pred)) %>%
select(index, m, pred)
sampling_variance <- rsample::vfold_cv(train, 10) %>%
mutate(
mod = map(splits, ~ lm(Sepal.Length ~ Sepal.Width, data = analysis(.x))),
resids = map2(
mod,
splits,
~ predict(.x, assessment(.y)) - assessment(.y)$Sepal.Length
)
) %>%
select(resids) %>%
unnest(resids)
lm_mod <- lm(Sepal.Length ~ Sepal.Width, data = iris)
analytical <-
predict(lm_mod, newdata = test, interval = "prediction") %>%
tibble::as_tibble() %>%
bind_cols(test)
tidyr::crossing(model_variance, sampling_variance) %>%
mutate(c = m + resids + pred) %>%
group_by(index) %>%
summarise(q_05 = quantile(c, 0.05),
q_95 = quantile(c, 0.95)) %>%
bind_cols(test, lm(Sepal.Length ~ Sepal.Width, data = train) %>% predict(test) %>% tibble(.pred = .)) %>%
ggplot(aes(x = Sepal.Width))+
geom_point(aes(y = Sepal.Length))+
geom_line(aes(y = q_05), colour = "red")+
geom_line(aes(y = q_95), colour = "red")+
geom_line(aes(y = .pred), colour = "blue") +
geom_line(data = analytical, aes(y = lwr), col = "red", lty = 2) +
geom_line(data = analytical, aes(y = upr), col = "red", lty = 2) +
ylim(c(3, 9))

Created on 2021-02-12 by the reprex package (v1.0.0.9000)