Here are some examples to illustrate several options.
library(tidyverse)
# Set up example data
# Add a second factor column to the iris data frame
set.seed(2)
d = iris %>%
mutate(group = c(rep("D",3),
sample(c("A","B","C"), nrow(iris) - 3, replace=TRUE)),
group = factor(group))
glimpse(d)
#> Rows: 150
#> Columns: 6
#> $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4…
#> $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3…
#> $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1…
#> $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0…
#> $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, …
#> $ group <fct> D, D, D, A, C, B, B, A, A, A, A, A, B, C, A, C, B, B, C,…
Three ways to count groups:
d %>%
count(Species, group)
#> Species group n
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 virginica A 20
#> 9 virginica B 13
#> 10 virginica C 17
d %>%
group_by(Species, group) %>%
tally
#> # A tibble: 10 x 3
#> # Groups: Species [3]
#> Species group n
#> <fct> <fct> <int>
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 virginica A 20
#> 9 virginica B 13
#> 10 virginica C 17
d %>%
group_by(Species, group) %>%
summarise(n = n())
#> `summarise()` regrouping output by 'Species' (override with `.groups` argument)
#> # A tibble: 10 x 3
#> # Groups: Species [3]
#> Species group n
#> <fct> <fct> <int>
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 virginica A 20
#> 9 virginica B 13
#> 10 virginica C 17
To retain empty levels, add .drop=FALSE
to any of the above:
d %>%
count(Species, group, .drop=FALSE)
#> Species group n
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 versicolor D 0
#> 9 virginica A 20
#> 10 virginica B 13
#> 11 virginica C 17
#> 12 virginica D 0
d %>%
group_by(Species, group, .drop=FALSE) %>%
tally
#> # A tibble: 12 x 3
#> # Groups: Species [3]
#> Species group n
#> <fct> <fct> <int>
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 versicolor D 0
#> 9 virginica A 20
#> 10 virginica B 13
#> 11 virginica C 17
#> 12 virginica D 0
But .drop=FALSE
only works for grouping columns that are coded as factors
d %>%
mutate(group=as.character(group)) %>%
count(Species, group, .drop=FALSE)
#> Species group n
#> 1 setosa A 18
#> 2 setosa B 14
#> 3 setosa C 15
#> 4 setosa D 3
#> 5 versicolor A 21
#> 6 versicolor B 17
#> 7 versicolor C 12
#> 8 virginica A 20
#> 9 virginica B 13
#> 10 virginica C 17
Use the complete()
function to add back empty levels for non-factor columns:
d %>%
mutate(group=as.character(group)) %>%
count(Species, group, .drop=FALSE) %>%
ungroup() %>%
complete(group, nesting(Species), fill=list(n=0))
#> # A tibble: 12 x 3
#> group Species n
#> <chr> <fct> <dbl>
#> 1 A setosa 18
#> 2 A versicolor 21
#> 3 A virginica 20
#> 4 B setosa 14
#> 5 B versicolor 17
#> 6 B virginica 13
#> 7 C setosa 15
#> 8 C versicolor 12
#> 9 C virginica 17
#> 10 D setosa 3
#> 11 D versicolor 0
#> 12 D virginica 0
Note that you can use the nesting()
argument to complete only some group combinations:
# There are 36 possible combinations of grouping columns, but only 11 are present in the data
mtcars %>%
count(carb, cyl, vs)
#> carb cyl vs n
#> 1 1 4 1 5
#> 2 1 6 1 2
#> 3 2 4 0 1
#> 4 2 4 1 5
#> 5 2 8 0 4
#> 6 3 8 0 3
#> 7 4 6 0 2
#> 8 4 6 1 2
#> 9 4 8 0 6
#> 10 6 6 0 1
#> 11 8 8 0 1
Complete all combinations of grouping columns:
mtcars %>%
count(carb, cyl, vs) %>%
ungroup %>%
complete(carb, cyl, vs, fill=list(n=0))
#> # A tibble: 36 x 4
#> carb cyl vs n
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 4 0 0
#> 2 1 4 1 5
#> 3 1 6 0 0
#> 4 1 6 1 2
#> 5 1 8 0 0
#> 6 1 8 1 0
#> 7 2 4 0 1
#> 8 2 4 1 5
#> 9 2 6 0 0
#> 10 2 6 1 0
#> # … with 26 more rows
Complete only vs
within existing combinations of carb
and cyl
:
mtcars %>%
count(carb, cyl, vs) %>%
ungroup %>%
complete(vs, nesting(carb, cyl), fill=list(n=0))
#> # A tibble: 18 x 4
#> vs carb cyl n
#> <dbl> <dbl> <dbl> <dbl>
#> 1 0 1 4 0
#> 2 0 1 6 0
#> 3 0 2 4 1
#> 4 0 2 8 4
#> 5 0 3 8 3
#> 6 0 4 6 2
#> 7 0 4 8 6
#> 8 0 6 6 1
#> 9 0 8 8 1
#> 10 1 1 4 5
#> 11 1 1 6 2
#> 12 1 2 4 5
#> 13 1 2 8 0
#> 14 1 3 8 0
#> 15 1 4 6 2
#> 16 1 4 8 0
#> 17 1 6 6 0
#> 18 1 8 8 0