Error using tidymodels and stacks: Location 3 doesn't exist

While using tidymodels and stacks, got the exact same error as FatBertLee trying to predict:

Error: Can't subset columns that don't exist.
x Location 3 doesn't exist.
i There are only 2 columns.
Run `rlang::last_error()` to see where the error occurred.

Sample from train data:

structure(list(id = c(24, 269, 316, 382, 424, 505, 551, 572, 
716, 794, 848, 971, 1016, 1076, 1133, 1189, 1234, 1312, 1365, 
1532), ticker = c("ABEV3", "ALPA4", "ALSC3", "ALUP11", "AMAR3", 
"ANIM3", "ARTR3", "ARZZ3", "BBRK3", "BEEF3", "BEMA3", "BPHA3", 
"BRAP4", "BRFS3", "BRKM5", "BRML3", "BRPR3", "BTOW3", "BVMF3", 
"CCRO3"), data = structure(c(16525, 16525, 16525, 16525, 16525, 
16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525, 
16525, 16525, 16525, 16525, 16525, 16525), class = "Date"), quarter = c(2015.1, 
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 
2015.1, 2015.1, 2015.1), ret3m = c(0.1648611076, 0.4624908206, 
0.0662460568, 0.1028571429, -0.0282563749, -0.5321228611, -0.3796526055, 
-0.0484496124, 0.1234567901, -0.2246835443, 0.0394890001, -0.7213114754, 
-0.2236070381, 0.0180122226, -0.3681792074, 0.0992861778, 0.2915019763, 
-0.0927694407, 0.1747368421, 0.1073369565), lret = c(0.0245911872, 
-0.1918554545, -0.2690459849, -0.1322073384, -0.293577729, -0.5385714286, 
-0.0052356021, -0.1671799162, -0.2081447964, 0.1805309735, 0.2878354861, 
-0.1428571429, -0.206195547, 0.1062529384, 0.2842835131, -0.280110117, 
0.0209923664, -0.2422233554, -0.0283757382, -0.1956378057), alvo = structure(c(2L, 
3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 
3L, 2L, 3L), .Label = c("buy", "keep", "sell"), class = "factor"), 
    p_l = c(22.7777943671, 17.013232239, 14.3857944343, 5.3290052757, 
    81.6607210863, 7.0093163352, 6.2590675835, 19.1883018103, 
    -20.310374125, -1.2175821514, 9.4653309443, -0.4779179664, 
    -4.6062594326, 22.7418227837, 16.3991693792, 18.9767524219, 
    16.2272067076, -32.6503400222, 20.0746301096, 24.3704146153
    ), vpa = c(2.9345399772, 4.4559425807, 11.9759963667, 19.1280966065, 
    6.3576040791, 7.9663703125, 6.3450343162, 6.6939288694, 2.9627168318, 
    -0.1624250847, 8.7869794656, 3.0582252548, 26.7151370268, 
    17.5274286146, 4.3957933828, 21.0059526254, 20.4579281623, 
    11.8652171379, 10.8462018391, 2.3057105308), lpa = c(0.8099994101, 
    0.5754344538, 1.1747700189, 3.6216890398, 0.1726656318, 2.1713957927, 
    1.1982615461, 1.2794253625, -0.1344140676, -6.0365536664, 
    1.0269054571, -1.4228383275, -2.2990454956, 2.7834180488, 
    0.67076568, 0.8926711812, 0.8054374505, -0.6110196704, 0.5559255607, 
    0.6688437705), roe = c(27.6022619, 12.9138659965, 9.8093718716, 
    18.9338704957, 2.7158915475, 27.2570280759, 18.8850286135, 
    19.1132201654, -4.5368516535, 3716.51563365, 11.6866718663, 
    -46.5249682066, -8.6057784141, 15.880355927, 15.2592631543, 
    4.249610561, 3.937043107, -5.1496712056, 5.1255321353, 29.0081413769
    ), payout = c(1.0195292727, 0.931375658, 0.270250729, 0.3893211108, 
    0.633884108, 0.2684079991, 0.4913832489, 0.4968062245, -5.7744104024, 
    0, 0.6504980525, 0, -0.7461901921, 0.340716886, 0.9037266218, 
    0.5255796718, 7.4942170294, 0, 0.7291198294, 1.1855942496
    ), dy12m = c(0.0447696477, 0.0547441923, 0.018785944, 0.0730569948, 
    0.0077624113, 0.0382930355, 0.0785074202, 0.0258910991, 0.2843084212, 
    0, 0.0687242798, 0, 0.1619948253, 0.0149819515, 0.0551080729, 
    0.0276959756, 0.4618303793, 0, 0.0363204615, 0.0486489158
    ), p_vpa = c(6.2871864562, 2.197066013, 1.4111560727, 1.0089869576, 
    2.2178166216, 1.9105313214, 1.1820267041, 3.667502371, 0.9214515443, 
    -45.2516310079, 1.1061821685, 0.2223511819, 0.39640448, 3.6114824023, 
    2.5023924107, 0.8064380751, 0.6388721231, 1.6813851587, 1.0289316173, 
    7.0694043257), ativo_circulante = c(19241017000, 2349169000, 
    458771000, 2256679000, 1935689000, 372126000, 1329203000, 
    668561000, 172137000, 4902444000, 280288000, 874442000, 944722000, 
    17774588000, 15339781000, 1190542000, 898780000, 3609719000, 
    3118127000, 3384242000), liq_corr = c(0.8839175747, 1.8308613158, 
    1.2004998037, 1.6443639188, 2.2566264077, 2.1538559489, 0.7579286675, 
    3.0155975841, 3.430185521, 2.038108597, 2.4949751204, 0.6588875853, 
    0.7662929788, 1.8775702843, 0.9387753091, 1.3318633571, 1.9499653954, 
    1.4177171092, 1.4075117803, 0.6784915709), divida_bruta = c(2691043000, 
    573308000, 1376110000, 4124207000, 1175307000, 121178000, 
    5855332000, 98138000, 0, 6395461000, 76266000, 814095000, 
    1199600000, 12721903000, 23126794000, 5363491000, 4338561000, 
    2302677000, 1982951000, 11538724000), quant_on = c(15713667000, 
    241609000, 159060920, 461243596, 185532000, 82865593, 344444000, 
    88682000, 184936000, 178002062, 50923870, 363051086, 122171000, 
    851501628, 451668652, 462653000, 298228000, 255484410, 1801392256, 
    1765587200), ibov3m = c(0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982), volume3m = c(194265323.885, 
    7227171.01639, 5639329.77049, 2949280.65574, 1908696.86885, 
    14730050.7541, 8171198.16393, 2894382.19672, 2480344.04918, 
    6642555.04918, 1400969.86885, 1363838.03279, 23186487.6721, 
    145235386.705, 31063969.0984, 39373887.7049, 31119377.2951, 
    9243986.29508, 134097842.066, 89693737.7049), volat3m = c(0.204997689, 
    0.3147291039, 0.3302444855, 0.3489084169, 0.3506794611, 0.9888233707, 
    0.702416568, 0.4027960265, 0.4375357642, 0.4431475177, 0.3459458746, 
    0.5925460944, 0.5092286168, 0.2317206403, 0.6327772099, 0.4529299407, 
    0.3838324421, 0.5354411807, 0.4211022767, 0.4499589031)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))                                                                                                                        "tbl", "data.frame"))

Code used:

pacman::p_load(tidyverse, tidymodels, xgboost, nnet, caret, stacks)

rec <-
     recipe(alvo ~ .,
            data = train %>%
     update_role(id, ticker, data, quarter, ret3m, lret,ibov3m,
                 volat3m, new_role = "ID")

wflow <-
     workflow() %>%
     add_recipe(rec)

ctrl_grid <- control_stack_grid()

xb_spec <- 
     boost_tree(trees = 500,
                min_n = tune(), 
                mtry = tune()) %>% 
     set_engine("xgboost") %>% 
     set_mode("classification")

xb_wflow <-
     wflow %>%
     add_model(xb_spec)

RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
xb_res <- 
     tune_grid(
          object = xb_wflow,
          resamples = cv_folds,
          grid = 10,
          control = ctrl_grid)

# nnet model ----
nn_spec <- 
     mlp(hidden_units = tune(), 
         penalty = tune(), 
         epochs = tune()) %>% 
     set_engine("nnet") %>% 
     set_mode("classification")

nnet_rec <- 
     rec %>%
     step_normalize(all_predictors())

nn_wflow <-
     workflow() %>%
     add_model(nn_spec) %>%
     add_recipe(nnet_rec)

RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
nn_res <- 
     tune_grid(
          object = nn_wflow,
          resamples = cv_folds,
          grid = 10,
          control = ctrl_grid)

# stack model ----
stack_model <- 
     stacks() %>%
     add_candidates(xb_res) %>%
     add_candidates(nn_res) %>%
     blend_predictions() %>%
     fit_members()

# predict ----
stack_pred <- predict(stack_model,
                      test,
                      type = "prob")

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.