How Do you Add New Data to `add_predictions`


#1

I am trying to add new (future) data (not in the nested “data” column) to the add_predictions function in modelr. For example:

EuStockMarkets %>% 
  as.tibble() %>%
  mutate(bus_day = date_decimal(as.numeric(time(EuStockMarkets)))) %>%
  gather(key = "stk_name", value = "price", -bus_day) %>% 
  group_by(stk_name) %>% 
  nest() %>%
  mutate(model = map(.x = data, .f = ~ lm(formula = price ~ bus_day, data = .x)), 
         tidy_model = map(model,tidy),
         bus_day = as.list(seq.POSIXt(from = as.POSIXct(x = "2018-01-21 05:00:00", tz = "MST"), 
                                      to = as.POSIXct(x = "2018-01-21 05:00:00", tz = "MST"), 
                                      by = "day")), # future date
         yhat2018 = map2(.x = model, .y = bus_day, 
                         .f = ~ add_predictions(model = .x, data = .y)))
#> Error in EuStockMarkets %>% as.tibble() %>% mutate(bus_day = date_decimal(as.numeric(time(EuStockMarkets)))) %>% : could not find function "%>%"

I get

“Error in mutate_impl(.data, dots) :
Evaluation error: object ‘bus_day’ not found.”

Does anyone have some suggestions on how to get this to work?


#2

The data argument to add_predictions() should be a data.frame containing all the columns (named correctly) necessary to make the prediction. Meaning it needs to be a data.frame with a column named bus_day. What you are doing in the map2 is just passing the bus_day vector of dates to add_predictions() not a data.frame.

The quick and dirty fix is (look at the last line):

library(tidyverse)
library(modelr)
library(lubridate)
library(broom)

EuStockMarkets %>% 
  as.tibble() %>%
  mutate(bus_day = date_decimal(as.numeric(time(EuStockMarkets)))) %>%
  gather(key = "stk_name", value = "price", -bus_day) %>% 
  group_by(stk_name) %>% 
  nest() %>%
  mutate(model = map(.x = data, .f = ~ lm(formula = price ~ bus_day, data = .x)), 
         tidy_model = map(model, tidy),
         bus_day = as.list(seq.POSIXt(from = as.POSIXct(x = "2018-01-21 05:00:00", tz = "MST"), 
                                      to = as.POSIXct(x = "2018-01-21 05:00:00", tz = "MST"), 
                                      by = "day")),
         yhat2018 = map2(.x = model, .y = bus_day, 
                         .f = ~ add_predictions(model = .x, data = data.frame(bus_day = .y))))

Created on 2018-01-18 by the reprex package (v0.1.1.9000).


#3

Hey @davis (or anyone else if Davis is busy),

I have another (dumb) followup question:
If I add future data for predicting it works for a single day:

EuStockMarkets %>% 
  as.tibble() %>%
  mutate(bus_day = lubridate::date_decimal(as.numeric(time(EuStockMarkets)))) %>%
  gather(key = "stk_name", value = "price", -bus_day) %>% 
  group_by(stk_name) %>% 
  nest() %>%
  mutate(model = map(.x = data, .f = ~ lm(formula = price ~ bus_day, data = .x)), 
         tidy_model = map(model, tidy),
         bus_day = seq.POSIXt(from = as.POSIXct(x = "2018-01-01 05:00:00", tz = "MST"), 
                                      to = as.POSIXct(x = "2018-01-01 05:00:00", tz = "MST"), 
                                      by = "day"),
         yhat2018 = map2(.x = model, .y = bus_day, .f = ~ add_predictions(model = .x, data = data.frame(bus_day = .y)))) %>% 
  select(yhat2018) %>% 
  unnest()

But not for multiple days

EuStockMarkets %>% 
  as.tibble() %>%
  mutate(bus_day = lubridate::date_decimal(as.numeric(time(EuStockMarkets)))) %>%
  gather(key = "stk_name", value = "price", -bus_day) %>% 
  group_by(stk_name) %>% 
  nest() %>%
  mutate(model = map(.x = data, .f = ~ lm(formula = price ~ bus_day, data = .x)), 
         tidy_model = map(model, tidy),
         bus_day = seq.POSIXt(from = as.POSIXct(x = "2018-01-01 05:00:00", tz = "MST"), 
                                      to = as.POSIXct(x = "2018-01-05 05:00:00", tz = "MST"), 
                                      by = "day"),
         yhat2018 = map2(.x = model, .y = bus_day, .f = ~ add_predictions(model = .x, data = data.frame(bus_day = .y)))) %>% 
  select(yhat2018) %>% 
  unnest()

Any suggestions?


#4

The problem here is with the bus_day column. It used to be 1 date that mutate() knew how to repeat 4 times to stick in that column. Now its a vector of 5 dates and mutate() doesn’t know what to do. If it were me, I’d break this up a bit.

library(tidyverse)
library(broom)
library(modelr)

nested_stock <- EuStockMarkets %>% 
  as.tibble() %>%
  mutate(bus_day = lubridate::date_decimal(as.numeric(time(EuStockMarkets)))) %>%
  gather(key = "stk_name", value = "price", -bus_day) %>% 
  group_by(stk_name) %>% 
  nest()

forecast_dates <- tibble(
  bus_day = seq.POSIXt(
    from = as.POSIXct(x = "2018-01-01 05:00:00", tz = "MST"), 
    to   = as.POSIXct(x = "2018-01-05 05:00:00", tz = "MST"), 
    by   = "day")
) 

nested_stock_models <- nested_stock %>%
  mutate(
    forecast_dates = list(forecast_dates),
    model          = map(.x = data, .f = ~ lm(formula = price ~ bus_day, data = .x)), 
    tidy_model     = map(model, tidy),
    yhat2018       = map2(.x = model, .y = forecast_dates, .f = ~ add_predictions(model = .x, data = .y))
  ) 

nested_stock_models
#> # A tibble: 4 x 6
#>   stk_name data                 forecast_dates   model tidy_model yhat2018
#>   <chr>    <list>               <list>           <lis> <list>     <list>  
#> 1 DAX      <tibble [1,860 × 2]> <tibble [5 × 1]> <S3:… <data.fra… <tibble…
#> 2 SMI      <tibble [1,860 × 2]> <tibble [5 × 1]> <S3:… <data.fra… <tibble…
#> 3 CAC      <tibble [1,860 × 2]> <tibble [5 × 1]> <S3:… <data.fra… <tibble…
#> 4 FTSE     <tibble [1,860 × 2]> <tibble [5 × 1]> <S3:… <data.fra… <tibble…

nested_stock_models %>%
  select(stk_name, yhat2018) %>% 
  unnest()
#> # A tibble: 20 x 3
#>    stk_name bus_day              pred
#>    <chr>    <dttm>              <dbl>
#>  1 DAX      2018-01-01 05:00:00 12841
#>  2 DAX      2018-01-02 05:00:00 12842
#>  3 DAX      2018-01-03 05:00:00 12843
#>  4 DAX      2018-01-04 05:00:00 12844
#>  5 DAX      2018-01-05 05:00:00 12846
#>  6 SMI      2018-01-01 05:00:00 19829
#>  7 SMI      2018-01-02 05:00:00 19831
#>  8 SMI      2018-01-03 05:00:00 19833
#>  9 SMI      2018-01-04 05:00:00 19835
#> 10 SMI      2018-01-05 05:00:00 19837
#> 11 CAC      2018-01-01 05:00:00  6919
#> 12 CAC      2018-01-02 05:00:00  6919
#> 13 CAC      2018-01-03 05:00:00  6920
#> 14 CAC      2018-01-04 05:00:00  6920
#> 15 CAC      2018-01-05 05:00:00  6921
#> 16 FTSE     2018-01-01 05:00:00 13550
#> 17 FTSE     2018-01-02 05:00:00 13552
#> 18 FTSE     2018-01-03 05:00:00 13553
#> 19 FTSE     2018-01-04 05:00:00 13554
#> 20 FTSE     2018-01-05 05:00:00 13555

Created on 2018-01-19 by the reprex package (v0.1.1.9000).