I am trying to use the mlrMBO library in R to perform optimization on the following function:
library(dplyr)
library(mlrMBO)
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,9)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
obj.fun = makeSingleObjectiveFunction(
name = "Some function",
fn = function(x) {
#bin data according to random criteria
train_data <- train_data %>%
mutate(cat = ifelse(a1 <= x[1] & b1 <= x[3], "a",
ifelse(a1 <= x[2] & b1 <= x[4], "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 150,1,0 )))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 300,1,0 )))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 400,1,0 )))
f1 = mean(table_a$quant)
f2 = mean(table_b$quant)
f3 = mean(table_c$quant)
#group all tables
final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
f4 = mean(final_table$quant)
return (f4);
},
par.set = makeParamSet(
makeNumericParam("x[1]", lower = 80, upper = 90),
makeNumericParam("x[2]", lower = 95, upper = 110),
makeNumericParam("x[3]", lower = 80, upper = 90),
makeNumericParam("x[4]", lower = 95, upper = 110)
),
minimize = TRUE
)
ctrl = makeMBOControl()
ctrl = setMBOControlTermination(ctrl, iters = 20L)
# we can basically do an exhaustive search in 3 values
ctrl = setMBOControlInfill(ctrl, crit = makeMBOInfillCritEI())
# opt.restarts = 1L, opt.focussearch.points = 3L, opt.focussearch.maxit = 1L)
#design = generateDesign(20L, getParamSet(obj.fun), fun = lhs::maximinLHS)
lrn = makeMBOLearner(ctrl, obj.fun)
#run optimization
res = mbo(obj.fun, design = NULL, learner = lrn, control = ctrl, show.info = TRUE)
When I look at the final results of the optimization:
Solution Fitness Value: 1.784275e+02
Parameters at the Solution (parameter, gradient):
X[ 1] : 1.991896e+01 G[ 1] : 7.069061e-01
X[ 2] : 9.580277e+00 G[ 2] : -3.680317e-01
X[ 3] : 1.998836e+01 G[ 3] : 6.662898e-01
X[ 4] : 1.612573e+01 G[ 4] : -2.797171e-01
X[ 5] : 8.869973e-05 G[ 5] : -5.665712e-03
Solution Found Generation 5
Number of Generations Run 5
These do not seem to match the ranges that I specified:
par.set = makeParamSet(
makeNumericParam("x[1]", lower = 80, upper = 90),
makeNumericParam("x[2]", lower = 95, upper = 110),
makeNumericParam("x[3]", lower = 80, upper = 90),
makeNumericParam("x[4]", lower = 95, upper = 110)
Does anyone know why this is happening?
Thanks