The R markdown code attached below:
### get the data
#library(readr)
#temp = tempfile()
#download.file(url="https://www.kaggle.com/russellyates88/suicide-rates-overview-1985-to-2016/downloads/master.csv/1", destfile=temp )
rm(list=ls())
setwd("/cloud/project")
suicideData = read.csv(file="./raw-suicide-data.csv", header=TRUE, as.is=FALSE)
suicideData$gdp_for_year.... = as.numeric(gsub(",", "", suicideData$gdp_for_year....))
suicideData$HDI.for.year[is.na(suicideData$HDI.for.year)] = mean(suicideData$HDI.for.year, na.rm = TRUE)
suicideData$classVar = cut(suicideData$suicides.100k.pop, breaks = c(0, 0.92, 5.99, 16.62, 225), labels = c(0, 1, 2, 3), include.lowest = TRUE)
suicideData$classVar = as.numeric(suicideData$classVar)
suicideData$suicides.100k.pop = NULL
DTtrainingCount = 500
DTtestCount = 500
set.seed(123)
DTtraining_indices = sample(seq_len(nrow(suicideData)), size=DTtrainingCount)
DTtrainSet = suicideData[DTtraining_indices,]
RemainingSet = suicideData[-DTtraining_indices,]
DTtest_indices = sample (seq_len(nrow(RemainingSet)), size = DTtestCount)
DTtestSet = RemainingSet[DTtest_indices,]
library(party)
#> Loading required package: grid
#> Loading required package: mvtnorm
#> Loading required package: modeltools
#> Loading required package: stats4
#> Loading required package: strucchange
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#> Loading required package: sandwich
# this step crash each time with more than 1,000 records
treeModel = ctree(classVar ~ ., data=DTtrainSet)