I am working with R. I wrote the following program which creates a table for a series of data manipulations steps:
#load library
library(dplyr)
set.seed(123)
# data
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
#generate random numbers
random_1 = runif(1, 80, 120)
random_2 = runif(1, random_1, 120)
random_3 = runif(1, 85, 120)
random_4 = runif(1, random_3, 120)
#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))
#calculate 60th quantile ("quant") for each bin
final_table = data.frame(train_data %>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = .6)))
#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
final_table$diff = ifelse(final_table$quant > final_table$c1,1,0)
#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))
#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table$diff)))
#format this table: add the random criteria to this table for reference
final_table_2$random_1 = random_1
final_table_2$random_2 = random_2
final_table_2$random_3 = random_3
final_table_2$random_4 = random_4
#optional: view table
head(final_table_2)
cat mean random_1 random_2 random_3 random_4
1 a 0.5897436 95.67371 111.8133 94.00313 102.0569
2 b 0.5992366 95.67371 111.8133 94.00313 102.0569
3 c 0.5995423 95.67371 111.8133 94.00313 102.0569
4 total 0.5990000 95.67371 111.8133 94.00313 102.0569
Now, I am trying to create a loop which repeats this process 10 times - for each iteration, it should "stack" the new results on top of the older results (i.e. keep everything).
I can do this manually:
#manually repeat
#generate random numbers
random_1 = runif(1, 80, 120)
random_2 = runif(1, random_1, 120)
random_3 = runif(1, 85, 120)
random_4 = runif(1, random_3, 120)
#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))
#calculate 60th quantile ("quant") for each bin
final_table = data.frame(train_data %>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = .6)))
#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
final_table$diff = ifelse(final_table$quant > final_table$c1,1,0)
#create a table: for each bin, calculate the average of "diff"
final_table_3 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))
#add "total mean" to this table
final_table_3 = data.frame(final_table_3 %>% add_row(cat = "total", mean = mean(final_table$diff)))
#format this table: add the random criteria to this table for reference
final_table_3$random_1 = random_1
final_table_3$random_2 = random_2
final_table_3$random_3 = random_3
final_table_3$random_4 = random_4
From here, I can manually combine the results:
#combine results:
final_table_4 = rbind(final_table_2, final_table_3)
final_table_4
cat mean random_1 random_2 random_3 random_4
1 a 0.5897436 95.67371 111.8133 94.00313 102.0569
2 b 0.5992366 95.67371 111.8133 94.00313 102.0569
3 c 0.5995423 95.67371 111.8133 94.00313 102.0569
4 total 0.5990000 95.67371 111.8133 94.00313 102.0569
5 a 0.6002215 113.61826 118.0106 112.12262 114.5476
6 b 0.5961538 113.61826 118.0106 112.12262 114.5476
7 c 0.6000000 113.61826 118.0106 112.12262 114.5476
8 total 0.6000000 113.61826 118.0106 112.12262 114.5476
But this is not a very effective way to repeat this process multiple times. Does anyone know if there is a quicker way to run this process 10 times and then store/keep all the results in a single table? Can this be done with a "loop"?
Thanks