 # Using “Expand.Grid” and “mapply” to evaluate functions

I am working with the R programming language. I am trying to evaluate a function I defined, at points contained in two grids I defined.

First, I created some sample data for this problem:

``````#load library
library(dplyr)
library(data.table)

# create some data for this example
set.seed(123)
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
``````

Then, I defined the two grids:

``````#grid_1 - does not work for some reason
random_1 <- seq(80,100,5)
random_2 <- seq(random_1,120,5)
random_3 <- seq(85,120,5)
random_4 <- seq(random_3,120,5)
split_1 =  seq(0,1,0.5)
split_2 =  seq(0,1,0.5)
split_3 =  seq(0,1,0.5)
DF_2 <- expand.grid(random_1 , random_2, random_3, random_4, split_1, split_2, split_3)

#grid_2
random_1 <- seq(80,100,5)
random_2 <- seq(85,120,5)
random_3 <- seq(85,120,5)
random_4 <- seq(90,120,5)
split_1 =  seq(0,1,0.5)
split_2 =  seq(0,1,0.5)
split_3 =  seq(0,1,0.5)
DF_1 <- expand.grid(random_1 , random_2, random_3, random_4, split_1, split_2, split_3)
``````

Next, I defined the function ("grid_function") that I want to evaluate at the points contained in these two grids:

``````#### define function
results_table <- data.frame()

grid_function <- function(random_1, random_2, random_3, random_4, split_1, split_2, split_3) {

#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))

train_data\$cat = as.factor(train_data\$cat)

#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)

b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)

c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)

#calculate random quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_1)))

table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_2)))

table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_3)))

#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
table_a\$diff = ifelse(table_a\$quant > table_a\$c1,1,0)
table_b\$diff = ifelse(table_b\$quant > table_b\$c1,1,0)
table_c\$diff = ifelse(table_c\$quant > table_c\$c1,1,0)

#group all tables

final_table = rbind(table_a, table_b, table_c)

#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))

#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table\$diff)))

#format this table: add the random criteria to this table for reference
final_table_2\$random_1 = random_1

final_table_2\$random_2 = random_2

final_table_2\$random_3 = random_3

final_table_2\$random_4 = random_4

final_table_2\$split_1 = split_1

final_table_2\$split_2 = split_2

final_table_2\$split_3 = split_3

final_table_2\$iteration_number = i

results_table <- rbind(results_table, final_table_2)

final_results = dcast(setDT(results_table), iteration_number + random_1 + random_2 + random_3 + random_4 + split_1 + split_2 + split_3 ~ cat, value.var = 'mean')

}
``````

Question: Now, I am trying to evaluate "grid_function" at every point contained within "DF_1" and "DF_2". I tried two different ways to do this, but nothing seemed to work.

Method 1 :

``````group_by(DF_2, random_1, random_2, random_3, random_4, split_1, split_2, split_3) %>% dplyr::mutate(z = grid_function(random_1, random_2, random_3, random_4, split_1, split_2, split_3))

Error: Must group by variables found in `.data`.
Run `rlang::last_error()` to see where the error occurred.
``````

Method 2:

`````` do.call(mapply, c(function(random_1, random_2, random_3, random_4, split_1, split_2, split_3)) , unname(DF_2)))

Error: unexpected ')' in "do.call(mapply, c(function(random_1, random_2, random_3, random_4, split_1, split_2, split_3))"
``````

Can someone please show me how to fix this?

Thanks

I'm not sure what you want, because the final calculation of your function is going to return a `data.table` instead of a scalar. May be that's what your requirement is, but I think that's not a "minimal" example for your question in the title.

Are you looking for something like this?

``````# define different possible values for parameters
x <- 1:2
y <- seq(from = 0.25, to = 1, by = 0.25)
z <- letters[3:5]

# define the entire space of the parameters
space <- expand.grid(x, y, z)

# define function with parameters being arguments
fn <- function(arg1, arg2, arg3) {
arg_vals <- c(arg1, arg2, arg3)
args <- c("arg1", "arg2", "arg3")
paste(args, arg_vals, sep = ":", collapse = ",")
}

# apply function
space["do_call_apply"] <- apply(
X = space,
MARGIN = 1,
FUN = \(x) do.call(
what = fn,
args = unname(
obj = as.list(x = x),
force = TRUE
)
)
)

# results
space
#>    Var1 Var2 Var3           do_call_apply
#> 1     1 0.25    c arg1:1,arg2:0.25,arg3:c
#> 2     2 0.25    c arg1:2,arg2:0.25,arg3:c
#> 3     1 0.50    c arg1:1,arg2:0.50,arg3:c
#> 4     2 0.50    c arg1:2,arg2:0.50,arg3:c
#> 5     1 0.75    c arg1:1,arg2:0.75,arg3:c
#> 6     2 0.75    c arg1:2,arg2:0.75,arg3:c
#> 7     1 1.00    c arg1:1,arg2:1.00,arg3:c
#> 8     2 1.00    c arg1:2,arg2:1.00,arg3:c
#> 9     1 0.25    d arg1:1,arg2:0.25,arg3:d
#> 10    2 0.25    d arg1:2,arg2:0.25,arg3:d
#> 11    1 0.50    d arg1:1,arg2:0.50,arg3:d
#> 12    2 0.50    d arg1:2,arg2:0.50,arg3:d
#> 13    1 0.75    d arg1:1,arg2:0.75,arg3:d
#> 14    2 0.75    d arg1:2,arg2:0.75,arg3:d
#> 15    1 1.00    d arg1:1,arg2:1.00,arg3:d
#> 16    2 1.00    d arg1:2,arg2:1.00,arg3:d
#> 17    1 0.25    e arg1:1,arg2:0.25,arg3:e
#> 18    2 0.25    e arg1:2,arg2:0.25,arg3:e
#> 19    1 0.50    e arg1:1,arg2:0.50,arg3:e
#> 20    2 0.50    e arg1:2,arg2:0.50,arg3:e
#> 21    1 0.75    e arg1:1,arg2:0.75,arg3:e
#> 22    2 0.75    e arg1:2,arg2:0.75,arg3:e
#> 23    1 1.00    e arg1:1,arg2:1.00,arg3:e
#> 24    2 1.00    e arg1:2,arg2:1.00,arg3:e
``````
2 Likes

Hello @Yarnabrina ,

excellent example!
And I also like your remark about posting a (most) minimal example of the problem for this OP.

1 Like

Thank you everyone for your replies! Just to clarify, here is what I am trying to do:

I wrote this loop that evaluates the following "function: (it's actually a "loop") 100 times, at randomly selected inputs for "random_1, random_2, random_3, random_4, split_1, split_2, split_3":

``````
library(dplyr)

library(data.table)

set.seed(123)

# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)

####
results_table <- data.frame()

for (i in 1:100 ) {

#generate random numbers
random_1 =  runif(1, 80, 120)
random_2 =  runif(1, random_1, 120)
random_3 =  runif(1, 85, 120)
random_4 =  runif(1, random_3, 120)

#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))

train_data\$cat = as.factor(train_data\$cat)

#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)

b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)

c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)

split_1 =  runif(1,0, 1)
split_2 =  runif(1, 0, 1)
split_3 =  runif(1, 0, 1)

#calculate 60th quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_1)))

table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_2)))

table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_3)))

#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
table_a\$diff = ifelse(table_a\$quant > table_a\$c1,1,0)
table_b\$diff = ifelse(table_b\$quant > table_b\$c1,1,0)
table_c\$diff = ifelse(table_c\$quant > table_c\$c1,1,0)

#group all tables

final_table = rbind(table_a, table_b, table_c)

#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))

#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table\$diff)))

#format this table: add the random criteria to this table for reference
final_table_2\$random_1 = random_1

final_table_2\$random_2 = random_2

final_table_2\$random_3 = random_3

final_table_2\$random_4 = random_4

final_table_2\$split_1 = split_1

final_table_2\$split_2 = split_2

final_table_2\$split_3 = split_3

final_table_2\$iteration_number = i

results_table <- rbind(results_table, final_table_2)

final_results = dcast(setDT(results_table), iteration_number + random_1 + random_2 + random_3 + random_4 + split_1 + split_2 + split_3 ~ cat, value.var = 'mean')

}
``````

Here is how the results of the above code look like:

``````head(final_results)
iteration_number random_1 random_2  random_3 random_4    split_1   split_2   split_3          a         b         c total
1:                1 95.67371 111.8133  94.00313 102.0569 0.84045638 0.6882731 0.7749321 0.82051282 0.6870229 0.7734554 0.730
2:                2 92.31360 110.0762 106.46871 109.5343 0.24615922 0.8777580 0.7847697 0.24731183 0.8777429 0.7840909 0.744
3:                3 81.02645 110.4645 116.42006 119.6172 0.11943576 0.9762721 0.9100522 0.14285714 0.9758162 0.9103448 0.943
4:                4 90.35986 116.7089 114.15588 116.7231 0.07675141 0.8661540 0.3236617 0.08139535 0.8658065 0.3207547 0.702
5:                5 89.28374 114.7103 119.70448 119.7725 0.08881443 0.6351936 0.8565509 0.09027778 0.6349614 0.8461538 0.573
6:                6 87.35767 103.8575  97.44462 116.0414 0.48372890 0.2319129 0.2701634 0.47368421 0.2326333 0.2711370 0.255
``````

Here is my question: Instead of evaluating the above function at "randomly selected points", I want to evaluate this function at points defined within the a grid.

First, I defined the gird:

``````#grid_2
random_1 <- seq(80,100,5)
random_2 <- seq(85,120,5)
random_3 <- seq(85,120,5)
random_4 <- seq(90,120,5)
split_1 =  seq(0.4,1,0.2)
split_2 =  seq(0.4,1,0.2)
split_3 =  seq(0.4,1,0.2)
DF_1 <- expand.grid(random_1 , random_2, random_3, random_4, split_1, split_2, split_3)

Var1 Var2 Var3 Var4 Var5 Var6 Var7
1   80   85   85   90  0.4  0.4  0.4
2   85   85   85   90  0.4  0.4  0.4
3   90   85   85   90  0.4  0.4  0.4
4   95   85   85   90  0.4  0.4  0.4
5  100   85   85   90  0.4  0.4  0.4
6   80   90   85   90  0.4  0.4  0.4
``````

Next, I converted that "loop" into a "function"

``````
results_table <- data.frame()

grid_function <- function(random_1, random_2, random_3, random_4, split_1, split_2, split_3) {

#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))

train_data\$cat = as.factor(train_data\$cat)

#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)

b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)

c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)

#calculate random quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_1)))

table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_2)))

table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_3)))

#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
table_a\$diff = ifelse(table_a\$quant > table_a\$c1,1,0)
table_b\$diff = ifelse(table_b\$quant > table_b\$c1,1,0)
table_c\$diff = ifelse(table_c\$quant > table_c\$c1,1,0)

#group all tables

final_table = rbind(table_a, table_b, table_c)

#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))

#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table\$diff)))

#format this table: add the random criteria to this table for reference
final_table_2\$random_1 = random_1

final_table_2\$random_2 = random_2

final_table_2\$random_3 = random_3

final_table_2\$random_4 = random_4

final_table_2\$split_1 = split_1

final_table_2\$split_2 = split_2

final_table_2\$split_3 = split_3

final_table_2\$iteration_number = i

results_table <- rbind(results_table, final_table_2)

final_results = dcast(setDT(results_table), iteration_number + random_1 + random_2 + random_3 + random_4 + split_1 + split_2 + split_3 ~ cat, value.var = 'mean')

}
``````

Problem: Instead of evaluating the function at randomly selected points, such as `" 95.67371 111.8133 94.00313 102.0569 0.84045638 0.6882731 0.7749321 0.82051282 0.6870229 0.7734554 0.730"` , I want to evaluate the function at points defined within the grid, e.g. "` 80 85 85 90 0.4 0.4 0.4`"

Can you please show me how to do this?

Thank you so much for your help!

Thank you for restating your problem (as a non-minimal example).
See again to the answer of @Yarnabrina for the solution

1 Like

Thank you for your reply! I am trying to apply the answer kindly provided by @Yarnabrina , but I am having some difficulties. Can you please suggest something?

I will keep trying in the meantime.

Just want to thank everyone for their help! It is very kind!

@Yarnabrina : thank you for your reply! I was able to figure out how to solve this problem! I will post the code in the morning

``````library(dplyr)
library(data.table)

results_table <- data.frame()

grid_function <- function(train_data, random_1, random_2, random_3, random_4, split_1, split_2, split_3) {

#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))

train_data\$cat = as.factor(train_data\$cat)

#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)

b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)

c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)

#calculate random quantile ("quant") for each bin

table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_1)))

table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_2)))

table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_3)))

#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
table_a\$diff = ifelse(table_a\$quant > table_a\$c1,1,0)
table_b\$diff = ifelse(table_b\$quant > table_b\$c1,1,0)
table_c\$diff = ifelse(table_c\$quant > table_c\$c1,1,0)

#group all tables

final_table = rbind(table_a, table_b, table_c)

#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))

#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table\$diff)))

#format this table: add the random criteria to this table for reference
final_table_2\$random_1 = random_1

final_table_2\$random_2 = random_2

final_table_2\$random_3 = random_3

final_table_2\$random_4 = random_4

final_table_2\$split_1 = split_1

final_table_2\$split_2 = split_2

final_table_2\$split_3 = split_3

results_table <- rbind(results_table, final_table_2)

final_results = dcast(setDT(results_table), random_1 + random_2 + random_3 + random_4 + split_1 + split_2 + split_3 ~ cat, value.var = 'mean')

}

# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)

#grid

#grid_2
random_1 <- seq(80,100,5)
random_2 <- seq(85,120,5)
random_3 <- seq(85,120,5)
random_4 <- seq(90,120,5)
split_1 =  seq(0,1,0.1)
split_2 =  seq(0,1,0.1)
split_3 =  seq(0,1,0.1)
DF_1 <- expand.grid(random_1 , random_2, random_3, random_4, split_1, split_2, split_3)

colnames(DF_1) <- c("random_1" , "random_2", "random_3",                     "random_4", "split_1", "split_2", "split_3")

train_data_new <- copy(train_data)

resultdf1 <- apply(DF_1,1, # 1 means rows
FUN=function(x){
do.call(
# Call Function grid_function2 with the arguments in
# a list
grid_function,
# force list type for the arguments
c(list(train_data_new), as.list(
# make the row to a named vector
unlist(x)
)
))
}
)

l = resultdf1
final_output = rbindlist(l, fill = TRUE)
``````

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.