Hi everyone.
I use very little R, but I have to do phylogenetic analysis using the Random Forest test. Unfortunately, and after 4 days of fumbling around, I can't run any analysis.
I manage to obtain a matrix of data (called "data") containing only factors. But it's impossible to go any further. I attach a preview of my code, as well as some errors announced by R that I can't solve.
I admit that you are a bit my last hope, hoping to find a solution here.
> library(ggplot2)
> library(cowplot)
> library(randomForest)
> library(caTools)
> library(readr)
> data <- read_delim("data.csv", ";", escape_double = FALSE,
+ col_types = cols(`Angle of divergence of card. Proc.` = col_factor(levels = c("0",
+ "1")), `Apertural section` = col_factor(levels = c("0",
+ "1", "2", "3")), `Apical angle` = col_factor(levels = c("0",
+ "1")), `Cardinal shield` = col_factor(levels = c("0",
+ "1")), `Clavicules shape` = col_factor(levels = c("0",
+ "1")), `Cross-section shape` = col_factor(levels = c("0",
+ "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", "4.6", "0.6", "1.7")),
+ `Curvature of the conch` = col_factor(levels = c("0",
+ "1", "2")), `Divergence angle` = col_factor(levels = c("0",
+ "1")), `Dorsal median ridge` = col_factor(levels = c("0",
+ "1")), `Dorsum ornamentation` = col_factor(levels = c("0",
+ "1")), `Form of the conch` = col_factor(levels = c("0",
+ "1", "2")), `Form of the dorsum flanks` = col_factor(levels = c("0",
+ "1")), `Lateral edges` = col_factor(levels = c("0",
+ "1")), `Lateral sinuses` = col_factor(levels = c("0",
+ "1")), Ligula = col_factor(levels = c("0",
+ "1")), `Number of clavicules` = col_factor(levels = c("0",
+ "1", "2", "3")), Order = col_factor(levels = c("0",
+ "1")), `Orientation of dorsum ornamentation` = col_factor(levels = c("0",
+ "1", "2")), `Orientation of venter ornamentation` = col_factor(levels = c("0",
+ "1", "2")), `Outline of the operculum` = col_factor(levels = c("0",
+ "1", "2", "3", "4", "5", "2.3")),
+ `Presence of cardinal processes` = col_factor(levels = c("0",
+ "1")), `Presence of clavicles` = col_factor(levels = c("0",
+ "1")), `Presence of operculum` = col_factor(levels = c("0",
+ "1")), `Presence of ornamentation` = col_factor(levels = c("0",
+ "1")), `Regularity of ornamentation` = col_factor(levels = c("0",
+ "1")), `Shape of the dorsum` = col_factor(levels = c("0",
+ "1", "2")), `Shape of the venter` = col_factor(levels = c("0",
+ "1", "2")), `Size of cardinal processes` = col_factor(levels = c("0",
+ "1")), `Space between ornamentation` = col_factor(levels = c("0",
+ "1")), `Type of dorsum ornamentation` = col_factor(levels = c("0",
+ "1", "2", "3", "4", "5", "6",
+ "2.4", "0.3.5", "3.5", "0.1",
+ "0.2", "0.2.3")), `Type of ligula` = col_factor(levels = c("0",
+ "1")), `Type of ornamentation` = col_factor(levels = c("0",
+ "1", "2", "3", "4", "5", "6",
+ "0.1.3", "0.3", "3.4.5", "2.3.4",
+ "0.6")), `Type of venter ornamentation` = col_factor(levels = c("0",
+ "1", "2", "3", "4", "5", "6",
+ "0.6", "2.4", "3.5", "0.4", "2.3",
+ "3.4")), `Venter ornamentation` = col_factor(levels = c("0",
+ "1"))), trim_ws = TRUE)
Warning: 1184 parsing failures.
row col expected actual file
1 Form of the conch value in level set ? 'data.csv'
1 Cross-section shape value in level set - 'data.csv'
1 Divergence angle value in level set ? 'data.csv'
1 Space between ornamentation value in level set ? 'data.csv'
1 Number of clavicules value in level set ? 'data.csv'
... ........................... .................. ...... ..........
See problems(...) for more details.
> View(data)
> head(data)
# A tibble: 6 x 34
`Form of the co~ `Curvature of t~ `Cross-section ~ `Shape of the d~ `Shape of the v~
<fct> <fct> <fct> <fct> <fct>
1 NA 0 NA 2 2
2 0 NA NA 2 0
3 NA NA 4 0 1
4 NA NA 7 2 2
5 NA 0 3 2 0
6 0 1 4 2 2
# ... with 29 more variables: `Apertural section` <fct>, `Divergence angle` <fct>, `Apical
# angle` <fct>, Ligula <fct>, `Type of ligula` <fct>, `Lateral edges` <fct>, `Lateral
# sinuses` <fct>, `Form of the dorsum flanks` <fct>, `Dorsal median ridge` <fct>, `Dorsum
# ornamentation` <fct>, `Orientation of dorsum ornamentation` <fct>, `Type of dorsum
# ornamentation` <fct>, `Venter ornamentation` <fct>, `Orientation of venter
# ornamentation` <fct>, `Type of venter ornamentation` <fct>, `Space between
# ornamentation` <fct>, `Regularity of ornamentation` <fct>, `Presence of operculum` <fct>,
# `Outline of the operculum` <fct>, `Presence of ornamentation` <fct>, `Type of
# ornamentation` <fct>, `Cardinal shield` <fct>, `Presence of clavicles` <fct>, `Number of
# clavicules` <fct>, `Clavicules shape` <fct>, `Presence of cardinal processes` <fct>, `Size
# of cardinal processes` <fct>, `Angle of divergence of card. Proc.` <fct>, Order <fct>
> summary(data)
Form of the conch Curvature of the conch Cross-section shape Shape of the dorsum
0 :35 0 :30 5 :12 0 : 2
1 : 7 1 :22 7 :11 1 : 0
2 : 2 2 : 2 4 :10 2 :62
NA's:32 NA's:22 0 : 6 NA's:12
3 : 5
(Other):17
NA's :15
Shape of the venter Apertural section Divergence angle Apical angle Ligula Type of ligula
0 :25 0 :24 0 :14 0 :44 0 :17 0 :30
1 :10 1 : 5 1 : 9 1 : 8 1 :45 1 :12
2 :26 2 :15 NA's:53 NA's:24 NA's:14 NA's:34
NA's:15 3 : 2
NA's:30
Lateral edges Lateral sinuses Form of the dorsum flanks Dorsal median ridge
0 :33 0 : 6 0 :10 0 : 7
1 :13 1 :20 1 :28 1 :36
NA's:30 NA's:50 NA's:38 NA's:33
Dorsum ornamentation Orientation of dorsum ornamentation Type of dorsum ornamentation
0 :11 0 : 8 0 :24
1 :60 1 :42 2 :19
NA's: 5 2 : 7 0.2 : 4
NA's:19 1 : 3
4 : 3
(Other): 5
NA's :18
Venter ornamentation Orientation of venter ornamentation Type of venter ornamentation
0 :17 0 : 7 0 :30
1 :54 1 :36 2 :10
NA's: 5 2 : 9 1 : 2
NA's:24 4 : 2
2.3 : 2
(Other): 6
NA's :24
Space between ornamentation Regularity of ornamentation Presence of operculum
0 :17 0 :17 0:44
1 :12 1 :17 1:32
NA's:47 NA's:42
Outline of the operculum Presence of ornamentation Type of ornamentation Cardinal shield
1 : 7 0 : 1 0 : 8 0 : 6
4 : 7 1 :26 2 : 4 1 : 9
0 : 3 NA's:49 0.3 : 4 NA's:61
2 : 3 0.6 : 2
3 : 2 1 : 1
(Other): 2 (Other): 5
NA's :52 NA's :52
Presence of clavicles Number of clavicules Clavicules shape Presence of cardinal processes
0 : 2 0 : 2 0 : 1 0 : 1
1 :16 1 : 6 1 : 5 1 :16
NA's:58 2 : 3 NA's:70 NA's:59
3 : 2
NA's:63
Size of cardinal processes Angle of divergence of card. Proc. Order
0 : 6 0 : 2 0:48
1 : 7 1 : 2 1:28
NA's:63 NA's:72
> data[data$Order == 0]$Order <- "Hyolithids"
Error: Must subset columns with a valid subscript vector.
i Logical subscripts must match the size of the indexed input.
x Input has size 34 but subscript `data$Order == 0` has size 76.
Run `rlang::last_error()` to see where the error occurred.
> data$Order <- ifelse(test = data$Order == 0, yes = "Hyolithids", no = "Orthothecids")
> str(data)
tibble [76 x 34] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Form of the conch : Factor w/ 3 levels "0","1","2": NA 1 NA NA NA 1 NA NA NA NA ...
$ Curvature of the conch : Factor w/ 3 levels "0","1","2": 1 NA NA NA 1 2 3 1 NA 2 ...
$ Cross-section shape : Factor w/ 13 levels "0","1","2","3",..: NA NA 5 8 4 5 11 7 NA 1 ...
$ Shape of the dorsum : Factor w/ 3 levels "0","1","2": 3 3 1 3 3 3 3 3 NA 3 ...
$ Shape of the venter : Factor w/ 3 levels "0","1","2": 3 1 2 3 1 3 3 3 NA 3 ...
$ Apertural section : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 2 NA 3 ...
$ Divergence angle : Factor w/ 2 levels "0","1": NA NA NA NA 1 1 NA 2 NA 2 ...
$ Apical angle : Factor w/ 2 levels "0","1": 2 1 NA NA NA NA 1 NA NA 1 ...
$ Ligula : Factor w/ 2 levels "0","1": 2 2 2 2 1 2 2 2 NA 1 ...
$ Type of ligula : Factor w/ 2 levels "0","1": 1 1 1 1 NA 2 1 2 NA NA ...
$ Lateral edges : Factor w/ 2 levels "0","1": 1 2 NA 1 1 1 1 2 NA NA ...
$ Lateral sinuses : Factor w/ 2 levels "0","1": 2 NA 2 2 2 2 2 2 NA NA ...
$ Form of the dorsum flanks : Factor w/ 2 levels "0","1": 1 1 NA NA 1 2 2 2 NA NA ...
$ Dorsal median ridge : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 NA NA ...
$ Dorsum ornamentation : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 NA 2 ...
$ Orientation of dorsum ornamentation: Factor w/ 3 levels "0","1","2": 2 NA 2 1 NA 2 2 3 NA 2 ...
$ Type of dorsum ornamentation : Factor w/ 13 levels "0","1","2","3",..: 1 8 1 3 3 1 9 10 NA 1 ...
$ Venter ornamentation : Factor w/ 2 levels "0","1": 2 2 2 NA 2 2 1 2 NA 2 ...
$ Orientation of venter ornamentation: Factor w/ 3 levels "0","1","2": 3 NA 2 NA NA 2 NA 3 NA 2 ...
$ Type of venter ornamentation : Factor w/ 13 levels "0","1","2","3",..: 8 9 1 NA 3 1 NA 10 NA 1 ...
$ Space between ornamentation : Factor w/ 2 levels "0","1": NA 2 1 NA 2 NA NA NA NA 1 ...
$ Regularity of ornamentation : Factor w/ 2 levels "0","1": 1 NA NA NA NA NA 2 1 NA 2 ...
$ Presence of operculum : Factor w/ 2 levels "0","1": 2 1 1 2 1 2 1 2 2 2 ...
$ Outline of the operculum : Factor w/ 7 levels "0","1","2","3",..: 3 NA NA NA NA NA NA 2 2 1 ...
$ Presence of ornamentation : Factor w/ 2 levels "0","1": 2 NA NA 2 NA 1 NA 2 2 2 ...
$ Type of ornamentation : Factor w/ 12 levels "0","1","2","3",..: 8 NA NA 3 NA 9 NA 10 3 3 ...
$ Cardinal shield : Factor w/ 2 levels "0","1": 1 NA NA 2 NA 2 NA 1 NA NA ...
$ Presence of clavicles : Factor w/ 2 levels "0","1": 2 NA NA 2 NA 2 NA NA 2 NA ...
$ Number of clavicules : Factor w/ 4 levels "0","1","2","3": NA NA NA 3 NA 3 NA NA NA NA ...
$ Clavicules shape : Factor w/ 2 levels "0","1": NA NA NA 2 NA 2 NA NA NA NA ...
$ Presence of cardinal processes : Factor w/ 2 levels "0","1": 2 NA NA NA NA 2 NA NA 1 2 ...
$ Size of cardinal processes : Factor w/ 2 levels "0","1": 2 NA NA NA NA 2 NA NA NA 1 ...
$ Angle of divergence of card. Proc. : Factor w/ 2 levels "0","1": NA NA NA NA NA NA NA NA NA NA ...
$ Order : chr [1:76] "Hyolithids" "Hyolithids" "Hyolithids" "Hyolithids" ...
- attr(*, "problems")= tibble [1,184 x 5] (S3: tbl_df/tbl/data.frame)
..$ row : int [1:1184] 1 1 1 1 1 1 1 2 2 2 ...
..$ col : chr [1:1184] "Form of the conch" "Cross-section shape" "Divergence angle" "Space between ornamentation" ...
..$ expected: chr [1:1184] "value in level set" "value in level set" "value in level set" "value in level set" ...
..$ actual : chr [1:1184] "?" "-" "?" "?" ...
..$ file : chr [1:1184] "'data.csv'" "'data.csv'" "'data.csv'" "'data.csv'" ...
- attr(*, "spec")=
.. cols(
.. `Form of the conch` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Curvature of the conch` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Cross-section shape` = col_factor(levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "4.6", "0.6",
.. "1.7"), ordered = FALSE, include_na = FALSE),
.. `Shape of the dorsum` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Shape of the venter` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Apertural section` = col_factor(levels = c("0", "1", "2", "3"), ordered = FALSE, include_na = FALSE),
.. `Divergence angle` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Apical angle` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. Ligula = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Type of ligula` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Lateral edges` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Lateral sinuses` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Form of the dorsum flanks` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Dorsal median ridge` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Dorsum ornamentation` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Orientation of dorsum ornamentation` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Type of dorsum ornamentation` = col_factor(levels = c("0", "1", "2", "3", "4", "5", "6", "2.4", "0.3.5", "3.5", "0.1",
.. "0.2", "0.2.3"), ordered = FALSE, include_na = FALSE),
.. `Venter ornamentation` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Orientation of venter ornamentation` = col_factor(levels = c("0", "1", "2"), ordered = FALSE, include_na = FALSE),
.. `Type of venter ornamentation` = col_factor(levels = c("0", "1", "2", "3", "4", "5", "6", "0.6", "2.4", "3.5", "0.4",
.. "2.3", "3.4"), ordered = FALSE, include_na = FALSE),
.. `Space between ornamentation` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Regularity of ornamentation` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Presence of operculum` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Outline of the operculum` = col_factor(levels = c("0", "1", "2", "3", "4", "5", "2.3"), ordered = FALSE, include_na = FALSE),
.. `Presence of ornamentation` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Type of ornamentation` = col_factor(levels = c("0", "1", "2", "3", "4", "5", "6", "0.1.3", "0.3", "3.4.5",
.. "2.3.4", "0.6"), ordered = FALSE, include_na = FALSE),
.. `Cardinal shield` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Presence of clavicles` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Number of clavicules` = col_factor(levels = c("0", "1", "2", "3"), ordered = FALSE, include_na = FALSE),
.. `Clavicules shape` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Presence of cardinal processes` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Size of cardinal processes` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. `Angle of divergence of card. Proc.` = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE),
.. Order = col_factor(levels = c("0", "1"), ordered = FALSE, include_na = FALSE)
.. )
> set.seed(42)
> data.imputed <- rfImpute(Order ~ ., data = data, iter=6)
Error in y - ymean : non-numeric argument to binary operator
In addition: Warning messages:
1: In randomForest.default(xf, y, ntree = ntree, ..., do.trace = ntree, :
The response has five or fewer unique values. Are you sure you want to do regression?
2: In mean.default(y) : argument is not numeric or logical: returning NA
> sample = sample.split(data$Order, SplitRatio = 0.75)
> train = subset(data, sample == TRUE)
> test = subset(data, sample == FALSE)
> dim(train)
[1] 57 34
> dim(test)
[1] 19 34
> rf <- randomForest(Order ~ ., data = train)
Error in na.fail.default(list(Order = c("Hyolithids", "Hyolithids", "Hyolithids", :
missing values in object
> na.action = na.roughfix
> rf <- randomForest(Order ~ ., data = train)
Error in na.fail.default(list(Order = c("Hyolithids", "Hyolithids", "Hyolithids", :
missing values in object
> set.seed(101)
> library(nlme)
> fit_rf<-randomForest(store~., data=store_train, importance=TRUE, prOximity=TRUE, na.action = na.roughfix)
Error in eval(m$data, parent.frame()) : object 'store_train' not found
> fit_rf<-randomForest(store~., data=data, importance=TRUE, prOximity=TRUE, na.action = na.roughfix)
Error in eval(predvars, data, env) : object 'store' not found
> fit_rf<-randomForest(Order~., data=data, importance=TRUE, prOximity=TRUE, na.action = na.roughfix)
Error in na.roughfix.data.frame(list(Order = c("Hyolithids", "Hyolithids", :
na.roughfix only works for numeric or factor
> data$Order <- ifelse(test = data$Order == "Hyolithids", yes = "0", no = "1")
> data.imputed <- rfImpute(Order ~ ., data = data, iter=6)
Error in y - ymean : non-numeric argument to binary operator
In addition: Warning messages:
1: In randomForest.default(xf, y, ntree = ntree, ..., do.trace = ntree, :
The response has five or fewer unique values. Are you sure you want to do regression?
2: In mean.default(y) : argument is not numeric or logical: returning NA
> data$Order = factor(data$Order)
> rf <- randomForest(Order ~ ., data=data)
Error in na.fail.default(list(Order = c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, :
missing values in object
> na.action = na.pass
> rf <- randomForest(Order ~ ., data=data)
Error in na.fail.default(list(Order = c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, :
missing values in object
> fit_rf<-randomForest(store~., data=store_train, importance=TRUE, prOximity=TRUE, na.action=na.roughfix)
Error in eval(m$data, parent.frame()) : object 'store_train' not found
> fit_rf<-randomForest(Order~., data=data, importance=TRUE, prOximity=TRUE, na.action=na.roughfix)
Error in eval(predvars, data, env) : object 'Form of the conch' not found