I posted the same question on the ggplot2 list because I am not sure if these two lists are linked.
Please consider the small dataset below. I am trying to bootstrap only one column (pd) 1000 times and estimate 95% intervals with quantiles (lower=0.025 and upper=0.975)
I was able to bootstrap the whole dataset but I get really high variances because I am resampling all the columns, therefore I only want to bootstrap pd.
require(rms)
require(dplyr)
require(plyr)
fish <- structure(list(wk = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
5, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8,
8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10), pd = c(317.308439683869,
0, 126.719553152898, NA, NA, NA, NA, 2671.6, 3540.6976744186,
1270.35740604274, 1067.69362430466, 688.099646524154, 317.444499806234,
420.941879550524, 280.475476696762, 250.681324772507, 159.048160622895,
258.125109208457, 450.868907331836, 0, 120.83949704142, 244.794377928162,
0, 226.610029158717, 0, NA, NA, NA, 0, 0, 776.419523429887, 0,
0, 5572.7956254272, NA, 0, 235.711495898971, 0, 0, 0, 0, 0, 0,
158.796322731685, 0, 0, 0, 278.669954021457, 0, 0, 0, 0, 0, 0,
0, NA, 623.451776649746, 0, 440.704258124564, 0, 69.0758191406588,
0, 0, 51.2873010185801, 26.8224496254879, 104.366153205662, 0,
71.1744651415584, 0, 0)), .Names = c("wk", "pd"), row.names = c(NA,
70L), class = "data.frame")
fish
# Resampling the entire dataset works but the intervals are too wide when compared with other method (see rms script on the bottom)
x <- data.frame(boot=1:1000) %>%
group_by(boot) %>%
do(sample_n(fish, nrow(fish), replace=TRUE)) %>%
group_by(boot,wk)
plyr::ddply(x,'wk',summarise,Mean=mean(pd),lower=quantile(pd,prob=0.025),upper=quantile(pd,prob=0.975))
# These intervals are tighter than the ones above
boots <- fish %>%
group_by(wk) %>%
do(data.frame(rbind(smean.cl.boot(.$pd))))