adding a limit to the number of predictors in olsrr package

Hi,

I am using olsrr::ols_step_all_possible() to look at all possible combinations of a regression with many independent variables

I know that there are at least 3 control variables that I need to include. Otherwise, I can include as many independent variables as I want, as long as I avoid collinearity.

Is there a way to give ols_step_all_possible() some additional parameters, like all possible combination of predictors that includes control1, control2, and control3 and additionally something like does not include both var1 and var2, var3, or var4... does not include both var9 and var10....

I thought I'd just save the results to a table and then filter the table to get what I need, but since every additional potential variable increased the possible number of variables sets exponentially, this doesn't seem feasible with how long the computation time is taking R...

Thanks for any ideas!

data <- structure(list(dv = c(1.620525, 8.273554, 2.9471476, 0.38516948, 
22.444677, 0.5627783, 0.32347086, 23.465286, 35.01438, 4.1794767, 
6.0946875, 0.88717645, 6.5352798, 0.63300514, 1.0292054, 0.21336585, 
0.36019334, 17.203411, 1.2726696, 1.0189979, 0.16701117, 2.689854, 
0.35410365, 1.1903453, 0.60897493, 0.18345964, 3.1890416, 1.7016418, 
2.7290537, 6.8672185, 5.720347, 8.724248, 0.6427155, 0.18411201, 
20.445997, 33.49371, 4.640914, 9.786484, 4.72894, 7.459285), 
    iv1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), iv2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), iv3 = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 21652, 21652, 21652, 21652, 21652, 21652, 
    21652, 21652, 21652, 21652, 21652, 21652, 21652, 21652, 21652, 
    21652, 21652, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), iv4 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4781, 4781, 4781, 4781, 
    4781, 4781, 4781, 4781, 4781, 4781, 4781, 4781, 4781, 4781, 
    4781, 4781, 4781, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), iv5 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.17650692789692, 0.17650692789692, 
    0.17650692789692, 0.17650692789692, 0.17650692789692, 0.17650692789692, 
    0.17650692789692, 0.17650692789692, 0.17650692789692, 0.17650692789692, 
    0.17650692789692, 0.17650692789692, 0.17650692789692, 0.17650692789692, 
    0.17650692789692, 0.17650692789692, 0.17650692789692, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), iv6 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0.08825346394846, 0.08825346394846, 0.08825346394846, 
    0.08825346394846, 0.08825346394846, 0.08825346394846, 0.08825346394846, 
    0.08825346394846, 0.08825346394846, 0.08825346394846, 0.08825346394846, 
    0.08825346394846, 0.08825346394846, 0.08825346394846, 0.08825346394846, 
    0.08825346394846, 0.08825346394846, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), control1 = c("mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly urban", 
    "mostly urban", "mostly urban", "mostly urban", "mostly rural", 
    "mostly rural", "mostly rural", "mostly rural", "mostly rural", 
    "mostly rural", "mostly rural", "mostly rural", "mostly rural", 
    "mostly rural", "mostly rural"), control2 = c(585, 515, 10164, 
    1601, 791, 700, 2798, 320, 1910, 3585, 865, 1109, 2200, 285, 
    5843, 865, 523, 613, 1699, 158, 1215, 3915, 768, 875, 4301, 
    2531, 4100, 7386, 3766, 870, 375, 1321, 2165, 1240, 460, 
    4420, 6525, 750, 513, 7070), control3 = c("Agricultural Inputs and Services", 
    "Apparel", "Automotive", "Downstream Metal Products", "Forestry", 
    "Lighting and Electrical Equipment", "Livestock Processing", 
    "Nonmetal Mining", "Paper and Packaging", "Plastics", "Textile Manufacturing", 
    "Wood Products", "Aerospace Vehicles and Defense", "Agricultural Inputs and Services", 
    "Construction Products and Services", "Electric Power Generation and Transmission", 
    "Environmental Services", "Fishing and Fishing Products", 
    "Forestry", "Leather and Related Products", "Oil and Gas Production and Transportation", 
    "Paper and Packaging", "Performing Arts", "Textile Manufacturing", 
    "Transportation and Logistics", "Upstream Chemical Products", 
    "Upstream Metal Manufacturing", "Water Transportation", "Wood Products", 
    "Aerospace Vehicles and Defense", "Agricultural Inputs and Services", 
    "Downstream Metal Products", "Electric Power Generation and Transmission", 
    "Food Processing and Manufacturing", "Forestry", "Livestock Processing", 
    "Transportation and Logistics", "Wood Products", "Agricultural Inputs and Services", 
    "Construction Products and Services")), row.names = c(NA, 
40L), class = "data.frame")


library(olsrr)
library(tictoc)
mod.short  <- lm(log(dv) ~ iv1 + iv2  +  iv3 + control1 + control2 + control3, data = data)
mod.longer <-  lm(log(dv) ~ iv1 + iv2  + iv3 +iv4 + iv5 + iv6 +  control1 + control2 + control3, data = data)
tic()
results <- ols_step_all_possible(mod.short)
toc() #1.43 sec elapsed

tic()
results <- ols_step_all_possible(mod.longer)
toc() #10.57 sec elapsed
view(results)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.