debugging keras

brechbuehler · February 24, 2019, 12:51am

OK, I'm pasting it below. The FAQ you quote talks about "the error". So I could postulate the error is that pdb doesn't start up. Or that $history is all NaN.

#' ---
#' output:
#'   md_document:
#'     pandoc_args:
#'       - '--from=markdown-implicit_figures'
#'       - '--to=commonmark'
#'       - '--no-wrap'
#' ---



#+ reprex-setup, include = FALSE
options(tidyverse.quiet = TRUE)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", error = TRUE)
knitr::opts_knit$set(upload.fun = knitr::imgur_upload)

#+ reprex-body
library(keras)
tensorflow:::use_session_with_seed(1)
tensorflow:::use_session_with_seed(1)

generator.demo.x.y <- function(X, Y,
                               min_index=1,
                               max_index=nrow(X),
                               shuffle = TRUE, 
                               batch_size = 32,
                               uneven.mode=c("undersize.final.batch",
                                 "bump.min.index", "wrap.around", "stop"))
{   
   uneven.mode <- match.arg(uneven.mode)
   uneven.count <- (max_index-min_index+1) %% batch_size
   if(uneven.count!=0) {
      switch(uneven.mode,
             "stop"=stop("data size is not an even multiple of batch_size ",
                         "but uneven.mode is set to stop?"),
             "bump.min.index"={
                txt <- paste("data size is not an even multiple of batch_size;",
                             "changing min_index from ", min_index, " to ",
                             min_index+uneven.count, ".\n", sep="")
                cat("Notice, data generator setup: ", txt, "\n", sep="")
                #warning(txt)
                min_index <- min_index + uneven.count
             },
             "undersize.final.batch" = "continue on, handled.later",
             "wrap.around"="continue on, handled.later",
             stop("unexpected uneven.mode"))
   }
   
   i.gen.x.y <- min_index
   function() {
      if(i.gen.x.y==min_index) {
         #Do this first time through as well as possibly right after each
         #wraparound when numrows mod batch_size is zero:
         if (shuffle) {
            rows.gen.x.y <<- sample(c(min_index:max_index),
                                    size = max_index-min_index+1)
         } else {
            rows.gen.x.y <<- min_index:max_index
         }
      }
      
      #At this point, rows.gen.x.y is an index (in desired order, shuffled or
      #not) into the data we will step through over multiple calls to the
      #generator function.  Also, i.gen.x.y points to somewhere in the body of
      #rows.gen.x.y (possibly close to or at, but not past, its end).  So select
      #some data, hopefully a full batch, possibly a partial or singleton batch
      #if near end
      pick.from.rows.gen.x.y <- i.gen.x.y:min(i.gen.x.y+batch_size-1, max_index)
      this.batch <- rows.gen.x.y[pick.from.rows.gen.x.y-min_index+1]

      #now bump i.gen.x.y up for next time.
      i.gen.x.y <<- i.gen.x.y + length(this.batch)

      #wrap around when you run out of data.  By using > not >=, ensure that
      #final value of training data gets used (even if it happens to be the only
      #member in the next batch!)
      if (i.gen.x.y> max_index) {
         i.gen.x.y <<- min_index
      }

      count.extra.rows.needed <- batch_size-length(this.batch)
      
      if(count.extra.rows.needed>0) {
         #undersize.final.batch requires no action, but wrap.around
         #needs to do the work normally handled at the top of each iteration:
         if(uneven.mode=="wrap.around") {
            #copy/paste of earlier code setting up data from the top
            if (shuffle) {
               rows.gen.x.y <<- sample(c(min_index:max_index),
                                       size = max_index-min_index+1)
            } else {
               rows.gen.x.y <<- min_index:max_index
            }
            
            #copy/paste of earlier code, but modified to extract
            #count.extra.rows.needed, not batch_size, and glom on to the current
            #undersized batch
            extra.pick.from.rows.gen.x.y <-
               i.gen.x.y:(i.gen.x.y+count.extra.rows.needed-1)
            extra.this.batch <-
               rows.gen.x.y[extra.pick.from.rows.gen.x.y-min_index+1]
            
            #now bump i.gen.x.y up for next time.
            i.gen.x.y <<- i.gen.x.y + length(extra.this.batch)
            
            this.batch <- c(this.batch, extra.this.batch)
         }
      }   

      #X shape
      # = (samples, timesteps, features)
      samples <- X[this.batch, , , drop=F]

      #Y is a vector of length matching dim1() of X.
      targets <- Y[this.batch]
      # ##Keras also would allow Y to be a 1D array if you prefer:
      # ##targets <- array(Y[this.batch], dim = c(length(this.batch)))

      #Do NOT use names on this list() iof length two; their presence currently
      #breaks the python interface downstream unless you add a name stripping 
      #wrapper into keras:::fit_generator
      list(samples, targets)
   }
}

fake <- function(dim, bias) array((seq(prod(dim))*(sqrt(5)-1)+bias)%%2-1, dim)

X <- fake(c(200, 21, 1), .1)
Y <- fake(200, .2)
validation.X <- fake(c(118, 21, 1), .3)
validation.Y <- fake(118, .5)

gen1 <- generator.demo.x.y(X, Y, shuffle=FALSE, batch_size=32,
                           uneven.mode="undersize.final.batch")

use_virtualenv("~/.virtualenvs/r-tensorflow")
early.stopping.callback.list <- 
   list(keras:::callback_early_stopping(monitor = "val_loss",
                                patience = 75,
                                verbose=1))
reduce.lr.callback.list <- 
   list(keras:::callback_reduce_lr_on_plateau(monitor = "val_loss",
                                      factor = 0.5,
                                      patience = 5,
                                      verbose=1))

callbacks <- list()
callbacks.prelim.run <- callbacks
callbacks.fit.run <- callbacks
need.prelim.phase <- FALSE
lr.sched.callback.list <- NULL
callbacks.prelim.run <- c(callbacks.prelim.run, early.stopping.callback.list)
need.prelim.phase <- TRUE

callbacks.prelim.run <- c(callbacks.prelim.run, reduce.lr.callback.list)
add.lr.sched <- TRUE

validation.shuffle <- FALSE
num.epochs.fit.run <- 200


full.train.length <- dim(X)[1]
partial.train.length <- round(0.9 * full.train.length)
partial.train.indices <- 1:partial.train.length
partial_train_X <- X[partial.train.indices, , , drop=F]
partial_train_Y <- Y[partial.train.indices]
         
partial_val_X <- X[-partial.train.indices, , , drop=F]
partial_val_Y <- Y[-partial.train.indices]

callbacks.fit.run <- c(callbacks.fit.run, lr.sched.callback.list)


data.shape <- dim(X)[-1]

batch_size <- 32
steps_per_epoch <- round(dim(X)[1]/batch_size)
train.gen.full <- generator.demo.x.y(X, Y, shuffle=FALSE, batch_size=32,
                                     uneven.mode="undersize.final.batch")

train_generator <- train.gen.full
input_shape <- data.shape
num_epochs <- num.epochs.fit.run
callbacks <- callbacks.fit.run

validation.gen.full <- generator.demo.x.y(validation.X ,validation.Y,
                                          shuffle=FALSE,  batch_size=32,
                                          uneven.mode="undersize.final.batch")


#Build and compile a Keras model
n.units <- 2
dropout_frac <- 0.5
recurrent_dropout_frac <- 0.5
model0 <- keras_model_sequential() %>%
     layer_lstm(units=n.units, dropout = dropout_frac,
                recurrent_dropout = recurrent_dropout_frac,
                input_shape=input_shape) %>%
   layer_dense(units = 1)

my.opt <- optimizer_rmsprop()
model <- model0 %>% compile(optimizer = my.opt,
                            loss = "mse",
                            metrics = c("mae")
                            )

   #Save full history so I can pull out the computed metrics


history <- model %>% fit_generator(
     train_generator,
     steps_per_epoch=steps_per_epoch,
     validation_data = validation.gen.full,
     validation_steps = 1,
     epochs = num_epochs, verbose = TRUE,
     callbacks = callbacks
   )

fitted.model <- list(model=model, history=history)
fitted.model$model <- serialize_model(fitted.model$model,include_optimizer=TRUE)
names(fitted.model)[which(names(fitted.model)=="model")] <- "serialized.model"
fitted.model

#' ^{Created on r Sys.Date() by the reprex package (vr utils::packageVersion("reprex"))}