Here's a tidy eval approach in which we create a function to do the grouping and summary. As usual with tidy eval, I'm not sure if this is the "right" or "intended" approach, but it works for a single grouping column:
# Data set-up
set.seed(2)
sex <- sample(c("M", "F"), size=100, replace=TRUE)
age <- rnorm(n=100, mean=20 + 4*(sex=="F"), sd=0.1)
dsn <- data.frame(sex, age)
library("tidyverse")
fnc = function(data, group) {
# Add a dummy "grouping" column for All data
data = data %>%
mutate(all="All")
# Convert grouping column to factor to avoid error when grouping var is numeric
data = data %>%
mutate_at(group, as.factor)
c("all", group) %>%
map_df(
~data %>%
group_by_at(vars(.x)) %>%
mutate(n=n()) %>%
group_by_at(vars(.x, n)) %>%
summarise(across(is.numeric, list(mean=mean))) %>%
rename(!!group:=!!.x)
)
}
Now run the function on a few data sets:
fnc(dsn, "sex")
#> sex n age_mean
#> 1 All 100 22.0
#> 2 F 51 24.0
#> 3 M 49 20.0
fnc(iris, "Species")
#> Species n Sepal.Length_me… Sepal.Width_mean Petal.Length_me…
#> 1 All 150 5.84 3.06 3.76
#> 2 setosa 50 5.01 3.43 1.46
#> 3 versic… 50 5.94 2.77 4.26
#> 4 virgin… 50 6.59 2.97 5.55
#> # … with 1 more variable: Petal.Width_mean <dbl>
fnc(mtcars, "cyl")
#> cyl n mpg_mean disp_mean hp_mean drat_mean wt_mean qsec_mean vs_mean
#> 1 All 32 20.1 231. 147. 3.60 3.22 17.8 0.438
#> 2 4 11 26.7 105. 82.6 4.07 2.29 19.1 0.909
#> 3 6 7 19.7 183. 122. 3.59 3.12 18.0 0.571
#> 4 8 14 15.1 353. 209. 3.23 4.00 16.8 0
#> # … with 3 more variables: am_mean <dbl>, gear_mean <dbl>, carb_mean <dbl>
Here's another version that will summarise by group for each of a vector of grouping variables:
fnc2 = function(data, groups) {
# Add a dummy "grouping" column for All data
data = data %>%
mutate(all="All")
# Convert grouping column to factor to avoid error when grouping var is numeric
data = data %>%
mutate_at(groups, as.factor)
c("all", groups) %>%
map_df(
~data %>%
group_by_at(vars(.x)) %>%
mutate(n=n()) %>%
group_by_at(vars(.x, n)) %>%
summarise(across(is.numeric, list(mean=mean))) %>%
rename(group=!!.x) %>%
mutate(group.var=.x)
) %>%
select(group.var, group, everything())
}
fnc2(mtcars, c("cyl", "gear"))
#> group.var group n mpg_mean disp_mean hp_mean drat_mean wt_mean qsec_mean
#> 1 all All 32 20.1 231. 147. 3.60 3.22 17.8
#> 2 cyl 4 11 26.7 105. 82.6 4.07 2.29 19.1
#> 3 cyl 6 7 19.7 183. 122. 3.59 3.12 18.0
#> 4 cyl 8 14 15.1 353. 209. 3.23 4.00 16.8
#> 5 gear 3 15 16.1 326. 176. 3.13 3.89 17.7
#> 6 gear 4 12 24.5 123. 89.5 4.04 2.62 19.0
#> 7 gear 5 5 21.4 202. 196. 3.92 2.63 15.6
#> # … with 3 more variables: vs_mean <dbl>, am_mean <dbl>, carb_mean <dbl>
fnc2(diamonds, diamonds %>% select_if(~!is.numeric(.)) %>% names)
#> group.var group n carat_mean depth_mean table_mean price_mean x_mean
#> 1 all All 53940 0.798 61.7 57.5 3933. 5.73
#> 2 cut Fair 1610 1.05 64.0 59.1 4359. 6.25
#> 3 cut Good 4906 0.849 62.4 58.7 3929. 5.84
#> 4 cut Very… 12082 0.806 61.8 58.0 3982. 5.74
#> 5 cut Prem… 13791 0.892 61.3 58.7 4584. 5.97
#> 6 cut Ideal 21551 0.703 61.7 56.0 3458. 5.51
#> 7 color D 6775 0.658 61.7 57.4 3170. 5.42
#> 8 color E 9797 0.658 61.7 57.5 3077. 5.41
#> 9 color F 9542 0.737 61.7 57.4 3725. 5.61
#> 10 color G 11292 0.771 61.8 57.3 3999. 5.68
#> # … with 11 more rows, and 2 more variables: y_mean <dbl>, z_mean <dbl>