Forrest Plot-changing column names

Hello everyone,

I'm having some trouble with the forest_model package

library(forestmodel)
#> Loading required package: ggplot2
#> Registered S3 methods overwritten by 'ggplot2':
#>   method         from 
#>   [.quosures     rlang
#>   c.quosures     rlang
#>   print.quosures rlang
set.seed(500)
Data1 <- data.frame(
    TXT_MoD = sample(0:1,20, replace = TRUE),
    W_Male = sample(0:1,20, replace = TRUE),
    Tumor_Stage = sample(1:3,20, replace = TRUE),
    W_AGE_60 = sample(c(1, 0), 20, replace = TRUE)
)
print(forest_model(glm(TXT_MoD ~ W_Male + W_AGE_60 + Tumor_Stage, data =Data1, family = "binomial")))
#> Warning: Ignoring unknown aesthetics: x

Created on 2019-06-17 by the reprex package (v0.3.0)

Here is the output:-

How can I change the column names so that I can include something with a space or has a character?

I want to rename the columns
"Male Gender"
"Age>60"
"Tumor Stage"

I tried renaming the columns using colnames but it didn't work:

colnames(Data1)[colnames(Data1)=="W_Male"] <- "Male Gender"
#> Error in colnames(Data1)[colnames(Data1) == "W_Male"] <- "Male Gender": object 'Data1' not found
colnames(Data1)[colnames(Data1)=="W_AGE_60"] <- "Age>60"
#> Error in colnames(Data1)[colnames(Data1) == "W_AGE_60"] <- "Age>60": object 'Data1' not found

Created on 2019-06-17 by the reprex package (v0.3.0)

print(forest_model(glm(TXT_MoD ~ 'Male Gender' + 'Age>60' + Tumor_Stage, data =Data1, family = "binomial")))
#> Error in forest_model(glm(TXT_MoD ~ "Male Gender" + "Age>60" + Tumor_Stage, : could not find function "forest_model"

Created on 2019-06-17 by the reprex package (v0.3.0)

Ideas?

To use a non-syntactic name in R (such as one containing a space), you need to surround it with backticks, e.g.

`Male Gender`

See the section (2.2.1) on Non-syntactic names in Advanced R, below
https://adv-r.hadley.nz/names-values.html#binding-basics

I have tried to rename using Male Gender

however, the glm function still doesn't recognize it

 colnames(Data1)[colnames(Data1)=="W_Male"] <- 'Male Gender'
#> Error in colnames(Data1)[colnames(Data1) == "W_Male"] <- "Male Gender": object 'Data1' not found
colnames(Data1)[colnames(Data1)=="W_AGE_60"] <- 'Age>60'
#> Error in colnames(Data1)[colnames(Data1) == "W_AGE_60"] <- "Age>60": object 'Data1' not found
print(forest_model(glm(TXT_MoD ~ 'Male Gender' + 'Age>60' + Tumor_Stage, data =Data1, family = "binomial")))
#> Error in forest_model(glm(TXT_MoD ~ "Male Gender" + "Age>60" + Tumor_Stage, : could not find function "forest_model"

Created on 2019-06-18 by the reprex package (v0.3.0)

It works perfectly. Your problem is that you're using the old column names even after renaming.

While you rename the columns, you can use normal dpuble or single quotes. While calling them, for example using Data1$..., you need to use backticks, not single quotes. See below:

library(forestmodel)
#> Loading required package: ggplot2
    
set.seed(seed = 33317)
    
Data1 <- data.frame(TXT_MoD = sample(x = 0:1,size = 20, replace = TRUE),
                    W_Male = sample(x = 0:1,size = 20, replace = TRUE),
                    Tumor_Stage = sample(x = 1:3,size = 20, replace = TRUE),
                    W_AGE_60 = sample(x = c(1, 0), size = 20, replace = TRUE))
    
Data1
#>    TXT_MoD W_Male Tumor_Stage W_AGE_60
#> 1        0      1           1        1
#> 2        1      1           2        1
#> 3        0      0           3        1
#> 4        0      0           3        1
#> 5        1      1           2        0
#> 6        1      1           3        1
#> 7        0      0           2        1
#> 8        1      1           3        0
#> 9        1      0           3        1
#> 10       0      1           2        1
#> 11       1      0           3        0
#> 12       1      1           1        1
#> 13       1      1           2        1
#> 14       0      1           2        1
#> 15       0      0           2        1
#> 16       1      0           2        0
#> 17       1      0           2        0
#> 18       0      0           3        0
#> 19       0      1           1        1
#> 20       1      1           3        1
    
colnames(x = Data1)[colnames(x = Data1)== "W_Male"] <- 'Male Gender'
colnames(x = Data1)[colnames(x = Data1) == "Tumor_Stage"] <- 'Tumor Stage'
colnames(x = Data1)[colnames(x = Data1) == "W_AGE_60"] <- 'Age > 60'
    
Data1
#>    TXT_MoD Male Gender Tumor Stage Age > 60
#> 1        0           1           1        1
#> 2        1           1           2        1
#> 3        0           0           3        1
#> 4        0           0           3        1
#> 5        1           1           2        0
#> 6        1           1           3        1
#> 7        0           0           2        1
#> 8        1           1           3        0
#> 9        1           0           3        1
#> 10       0           1           2        1
#> 11       1           0           3        0
#> 12       1           1           1        1
#> 13       1           1           2        1
#> 14       0           1           2        1
#> 15       0           0           2        1
#> 16       1           0           2        0
#> 17       1           0           2        0
#> 18       0           0           3        0
#> 19       0           1           1        1
#> 20       1           1           3        1
    
forest_model(model = glm(formula = (TXT_MoD ~ `Male Gender` + `Age > 60` + `Tumor Stage`),
                         data = Data1,
                         family = "binomial"))
#> Warning: Ignoring unknown aesthetics: x

1 Like

I was able to reproduce the reprex, but I'm going crazy trying to apply it to my data. Something is off and I can't figure it out:

colnames(x=logit)[colnames(x=logit)=="W_Male"] <- 'Male Gender'

colnames(x=logit)[colnames(x=logit)=="W_AGE_60"] <- 'Age>60'

colnames(x=logit)[colnames(x=logit)=="W_FACILITY_ACADEMIC"] <- 'Academic Facility'

colnames(x=logit)[colnames(x=logit)=="W_RACE_WHITE"] <- 'White Race'

colnames(x=logit)[colnames(x=logit)=="W_RURAL_250"] <- 'Rural'

colnames(x=logit)[colnames(x=logit)=="W_DIST_60"] <- 'Distance >60 Miles'

colnames(x=logit)[colnames(x=logit)=="W_EOE_Max"] <- 'Extraocular Extension'

colnames(x=logit)[colnames(x=logit)=="Z_year2010"] <- 'Diagnosed after 2010'

colnames(x=logit)[colnames(x=logit)=="W_INSURANCE_UN"] <- 'Uninsured'

colnames(x=logit)[colnames(x=logit)=="W_HS_13"] <- 'Uneducated Zip Code'

colnames(x=logit)[colnames(x=logit)=="W_MED_INC_48"] <- 'Zip code income'

colnames(x=logit)[colnames(x=logit)=="W_CharlsonScore"] <- 'Charlson/Deyo score'

colnames(x=logit)[colnames(x=logit)=="W_SITE_CILIARY"] <- 'Ciliary body involvement'

colnames(x=logit)[colnames(x=logit)=="Z_SZ_class"] <- 'T stage'

forest_model(model = glm(formula = (T_TXT_NGP ~'Male Gender'+'Age>60'+'Academic Facility'+'White Race'+'Rural'+'Distance >60 Miles'+'Extraocular Extension'+'Diagnosed after 2010'+'Uninsured'+'Uneducated Zip Code'+'Zip code income'+'Charlson/Deyo score'+'Ciliary body involvement'+'T stage'), data = logit, family = "binomial"))
Error in terms.formula(formula, data = data) :
invalid model formula in ExtractVars

You missed the main point that Mara said:

I also mentioned the same, but without appropriate emphasis.

  1. When you rename a column with a string, you use double or single quotes, not backticks.
  2. When you want to use it later, then you can't use quotes. You should use backticks.

So, you should use:

Data1$`Male Gender`

And, not this:

Data1$'Male Gender'

This creates the problem. In the formula, you're using single quotes, which is wrong, and you should use backticks.

Compare the output of following two:

forest_model(model = glm(formula = (TXT_MoD ~ 'Male Gender' + 'Age > 60' + 'Tumor Stage'), data = Data1, family = "binomial"))

vs

forest_model(model = glm(formula = (TXT_MoD ~ `Male Gender` + `Age > 60` + `Tumor Stage`), data = Data1, family = "binomial"))

The first one should give:

Error in terms.formula(formula, data = data) :
invalid model formula in ExtractVars

The second one should give the same output that was in the reprex.

Hence, for your data, you need to use this:

forest_model(model = glm(formula = (T_TXT_NGP ~ `Male Gender` + `Age>60` + `Academic Facility` + `White Race` + `Rural` + `Distance >60 Miles` + `Extraocular Extension` + `Diagnosed after 2010` + `Uninsured` + `Uneducated Zip Code` + `Zip code income` + `Charlson/Deyo score` + `Ciliary body involvement` + `T stage`),
                         data = logit,
                         family = "binomial"))

Hope this helps.

1 Like

Thank you so much. It makes sense now, however, I'm getting this error now for colnames:

colnames(x=logit)[colnames(x=logit)=="W_Male"] <- `Male Gender`
Error: object 'Male Gender' not found

This is because it's not surrounded by backticks or quotation marks.

the backtick key on the keyboard is the one above tab, right? I guess on this website when use that it just changes the format

You're supposed to use single or double quotes while renaming, and backticks while calling later.

Use:

colnames(x=logit)[colnames(x=logit)=="W_Male"] <- 'Male Gender'
# similarly for other columns

# then this
# I haven't really checked all the column names, just substituted backticks with single quotes in your code
forest_model(model = glm(formula = (T_TXT_NGP ~ `Male Gender` + `Age>60` + `Academic Facility` + `White Race` + `Rural` + `Distance >60 Miles` + `Extraocular Extension` + `Diagnosed after 2010` + `Uninsured` + `Uneducated Zip Code` + `Zip code income` + `Charlson/Deyo score` + `Ciliary body involvement` + `T stage`),
                         data = logit,
                         family = "binomial"))
1 Like

It worked, thank you! very subtle it's so strange

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.