cluster doesn't work in Rstudio Cloud: error serialize open connection

#1

I had high hopes for the Rstudio Cloud but became quite desperate that it doesn't run in cluster mode - which would be a major advantage over using a local machine.
Would be very very happy to receive any concrete advice how to solve this. Reducing the number of cores doesn't matter - any number leads to the same error.
.

The following caret::train code runs without problems on my MacBook Pro (MacOs Mojave):

  system.time(
    models.list <- algorithm_list %>%
      map(function(algorithm_label) {
        train(formula1
              , method = algorithm_label
              , data = if (is.null(try_first)) training.set else head(training.set, try_first)
              , preProcess = c("center", "scale")
              , trControl = training_configuration
        )
      }) %>%
      setNames(algorithm_list)
  ) %>% print

The code is run on a cluster initiated by library(doParallel):

  require(doParallel) # loads parallel library for makeCluster
  cluster.new <- makeCluster(spec = if (!is.null(no_cores)) no_cores else { detectCores() - 1 },
                             type = "FORK",
                             outfile = "" # verbose
  )
  registerDoParallel(cluster.new)

The error - only in Rstudio Cloud - is:

Error in unserialize(socklist[[n]]) : error reading from connection 
40.
unserialize(socklist[[n]]) 
39.
recvOneData.SOCKcluster(cl) 
38.
recvOneData(cl) 
37.
recvOneResult(cl) 
36.
dynamicClusterApply(cl, fun, length(x), argfun) 
35.
clusterApplyLB(cl, argsList, evalWrapper) 
34.
e$fun(obj, substitute(ex), parent.frame(), e$data) 
33.
foreach(iter = seq(along = resampleIndex), .combine = "c", .verbose = FALSE, 
    .export = export, .packages = "caret") %:% foreach(parm = 1L:nrow(info$loop), 
    .combine = "c", .verbose = FALSE, .export = export, .packages = "caret") %op% 
    { ... 
32.
nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, 
    method = models, ppOpts = preProcess, ctrl = trControl, lev = classLevels, 
    ...) 
31.
train.default(x, y, weights = w, ...) 
30.
train(x, y, weights = w, ...) 
29.
train.formula(formula1, method = algorithm_label, data = if (is.null(try_first)) training.set else head(training.set, 
    try_first), preProcess = c("center", "scale"), trControl = training_configuration) 
28.
train(formula1, method = algorithm_label, data = if (is.null(try_first)) training.set else head(training.set, 
    try_first), preProcess = c("center", "scale"), trControl = training_configuration) 
27.
.f(.x[[i]], ...) 
26.
map(., function(algorithm_label) {
    train(formula1, method = algorithm_label, data = if (is.null(try_first)) 
        training.set
    else head(training.set, try_first), preProcess = c("center",  ... 
25.
function_list[[i]](value) 
24.
freduce(value, `_function_list`) 
23.
`_fseq`(`_lhs`) 
22.
eval(quote(`_fseq`(`_lhs`)), env, env) 
21.
eval(quote(`_fseq`(`_lhs`)), env, env) 
20.
withVisible(eval(quote(`_fseq`(`_lhs`)), env, env)) 
19.
algorithm_list %>% map(function(algorithm_label) {
    train(formula1, method = algorithm_label, data = if (is.null(try_first)) 
        training.set
    else head(training.set, try_first), preProcess = c("center",  ... 
18.
system.time(models.list <- algorithm_list %>% map(function(algorithm_label) {
    train(formula1, method = algorithm_label, data = if (is.null(try_first)) 
        training.set
    else head(training.set, try_first), preProcess = c("center",  ... 
17.
eval(lhs, parent, parent) 
16.
eval(lhs, parent, parent) 
15.
system.time(models.list <- algorithm_list %>% map(function(algorithm_label) {
    train(formula1, method = algorithm_label, data = if (is.null(try_first)) 
        training.set
    else head(training.set, try_first), preProcess = c("center",  ... 
14.
.f(target_label = .l[[1L]][[i]], features_set = .l[[2L]][[i]], 
    ...) 
13.
pmap(., train_model_permutations, data_set = dataset, algorithm_list = algorithm.list, 
    training_configuration = trainControl(method = "repeatedcv", 
        number = 10, repeats = CV.REPEATS), cv_repeats = CV.REPEATS, 
    try_first = TRY.FIRST) 
12.
function_list[[k]](value) 
11.
withVisible(function_list[[k]](value)) 
10.
freduce(value, `_function_list`) 
9.
`_fseq`(`_lhs`) 
8.
eval(quote(`_fseq`(`_lhs`)), env, env) 
7.
eval(quote(`_fseq`(`_lhs`)), env, env) 
6.
withVisible(eval(quote(`_fseq`(`_lhs`)), env, env)) 
5.
model.permutations.list %>% pmap(train_model_permutations, data_set = dataset, 
    algorithm_list = algorithm.list, training_configuration = trainControl(method = "repeatedcv", 
        number = 10, repeats = CV.REPEATS), cv_repeats = CV.REPEATS, 
    try_first = TRY.FIRST) 
4.
system.time(result.permutations <- model.permutations.list %>% 
    pmap(train_model_permutations, data_set = dataset, algorithm_list = algorithm.list, 
        training_configuration = trainControl(method = "repeatedcv", 
            number = 10, repeats = CV.REPEATS), cv_repeats = CV.REPEATS,  ... 
3.
eval(lhs, parent, parent) 
2.
eval(lhs, parent, parent) 
1.
system.time(result.permutations <- model.permutations.list %>% 
    pmap(train_model_permutations, data_set = dataset, algorithm_list = algorithm.list, 
        training_configuration = trainControl(method = "repeatedcv", 
            number = 10, repeats = CV.REPEATS), cv_repeats = CV.REPEATS,  ... 

0 Likes

#2

detectCores() will return the number of cores of the host machine, not the number of cores available to your project.

For now, just as projects are limited to 1 GB of RAM, they are limited to 1 core. We are looking to offer the option for higher limits in the future.

You have tried explicitly running with 1 and it still crashes?

0 Likes

#3

Ohhh that of course explains it.
Incredible, I have tried every option except only one single core.
Thanks a lot for the answer.
Yet with this situation, I can't really offer it to my students as a single core is way too slow for machine learning even with the smallest datasets.

0 Likes

closed #4

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.

0 Likes