Error creating density class

I'm trying to make this calculation but values = 1 are classified as "0" label and not "1-5". Any idea what i'm doing wrong?

df$DensClass <- cut(df$x2023m02_2,breaks = c(0,1,5,10,30,50,100,500,Inf), include.lowest = T,
labels = c('0','1-5','6-10','11-30','31-50','51-100','100-200','> 500'))

df <- structure(list(Código = c("24196", "24209", "24009", "24019",
"24027", "24038", "24041", "24049", "24057", "24070", "24071",
"24171", "24100", "24102", "24103", "24109", "24110", "24112",
"24115", "24119", "24122", "24143", "24169", "24170", "24198",
"24206", "24007", "24011", "24014", "24016", "24022", "24030",
"24034", "24036", "24059", "24064", "24083", "24165"),
x2023m02_2 = c(17,17, 3, 1, 0, 4, 8, 3, 3, 3, 4, 1, 6, 2, 1, 4, 4, 2, 134, 2, 2,
0, 3, 0, 5, 1, 7, 1, 5, 1, 2, 5, 7, 3, 2, 0, 1, 0)))

Hi @jynusmac
You just need to specify the group boundaries at intermediate values to get the result you want:

# Added some extra data on the group "boundaries" for checking
df <- structure(list(Código = c("24196", "24209", "24009", "24019",
"24027", "24038", "24041", "24049", "24057", "24070", "24071",
"24171", "24100", "24102", "24103", "24109", "24110", "24112",
"24115", "24119", "24122", "24143", "24169", "24170", "24198",
"24206", "24007", "24011", "24014", "24016", "24022", "24030",
"24034", "24036", "24059", "24064", "24083", "24165", 
"99999", "99999", "99999", "99999", "99999", "99999"),
x2023m02_2 = c(17, 17, 3, 1, 0, 4, 8, 3, 3, 3, 4, 1, 6, 2, 1, 4, 4, 2, 134, 2, 2,
0, 3, 0, 5, 1, 7, 1, 5, 1, 2, 5, 7, 3, 2, 0, 1, 0, 520, 10, 11, 31, 51, 200)))

df <- as.data.frame(df)

df$DensClass <- cut(df$x2023m02_2,
                    #breaks = c(0,1,5,10,30,50,100,500,Inf),
                    breaks = c(0,0.99,5.99,10.99,30.99,50.99,100.99,200.99,Inf),
                    include.lowest = TRUE,
                    labels = c('0',
                               '1-5',
                               '6-10',
                               '11-30',
                               '31-50',
                               '51-100',
                               '100-200',
                               '> 500'))
df
#>    Código x2023m02_2 DensClass
#> 1   24196         17     11-30
#> 2   24209         17     11-30
#> 3   24009          3       1-5
#> 4   24019          1       1-5
#> 5   24027          0         0
#> 6   24038          4       1-5
#> 7   24041          8      6-10
#> 8   24049          3       1-5
#> 9   24057          3       1-5
#> 10  24070          3       1-5
#> 11  24071          4       1-5
#> 12  24171          1       1-5
#> 13  24100          6      6-10
#> 14  24102          2       1-5
#> 15  24103          1       1-5
#> 16  24109          4       1-5
#> 17  24110          4       1-5
#> 18  24112          2       1-5
#> 19  24115        134   100-200
#> 20  24119          2       1-5
#> 21  24122          2       1-5
#> 22  24143          0         0
#> 23  24169          3       1-5
#> 24  24170          0         0
#> 25  24198          5       1-5
#> 26  24206          1       1-5
#> 27  24007          7      6-10
#> 28  24011          1       1-5
#> 29  24014          5       1-5
#> 30  24016          1       1-5
#> 31  24022          2       1-5
#> 32  24030          5       1-5
#> 33  24034          7      6-10
#> 34  24036          3       1-5
#> 35  24059          2       1-5
#> 36  24064          0         0
#> 37  24083          1       1-5
#> 38  24165          0         0
#> 39  99999        520     > 500
#> 40  99999         10      6-10
#> 41  99999         11     11-30
#> 42  99999         31     31-50
#> 43  99999         51    51-100
#> 44  99999        200   100-200

Created on 2023-05-10 with reprex v2.0.2

1 Like

You need to inlude right = FALSE argument, as its default value is TRUE:

cut(df$`x2023m02_2`, 
  breaks = c(0,1,5,10,30,50,100,500,Inf),
  include.lowest = TRUE,
  right = FALSE,
  labels = c('0','1-5','6-10','11-30','31-50','51-100','100-200','> 500')
)

2 Likes

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.