With the reprex (below) you can see what's happening a bit better:
library(tidyverse)
options(stringsAsFactors = FALSE)
data(gss_cat)
mydf <- gss_cat %>% select(year, marital, race, age)
group_by <- c("year", "marital", "race")
test_func <- function(mydf, var) {
var = enquo(var)
mydf %>%
group_by(!!var) %>%
summarise(n = n())
}
# Test my function by grouping by year
test_func(mydf, year)
#> # A tibble: 8 x 2
#> year n
#> <int> <int>
#> 1 2000 2817
#> 2 2002 2765
#> 3 2004 2812
#> 4 2006 4510
#> 5 2008 2023
#> 6 2010 2044
#> 7 2012 1974
#> 8 2014 2538
test_func(mydf, marital)
#> # A tibble: 6 x 2
#> marital n
#> <fct> <int>
#> 1 No answer 17
#> 2 Never married 5416
#> 3 Separated 743
#> 4 Divorced 3383
#> 5 Widowed 1807
#> 6 Married 10117
test_func(mydf, race)
#> # A tibble: 4 x 2
#> race n
#> <fct> <int>
#> 1 Other 1959
#> 2 Black 3129
#> 3 White 16395
#> 4 Not applicable 0
# Bring purrr into the equation by trying to group by the vector searchby
pmap(list(var = group_by), test_func, mydf = mydf)
#> [[1]]
#> # A tibble: 1 x 2
#> `"year"` n
#> <chr> <int>
#> 1 year 21483
#>
#> [[2]]
#> # A tibble: 1 x 2
#> `"marital"` n
#> <chr> <int>
#> 1 marital 21483
#>
#> [[3]]
#> # A tibble: 1 x 2
#> `"race"` n
#> <chr> <int>
#> 1 race 21483
Created on 2018-10-11 by the reprex package (v0.2.1.9000)
Note that in the output for the list you're grouping by variables with quotation marks in them (e.g. "race"
as opposed to race
), which is why they have backticks around them.
Because dplyr::group_by()
already quotes its input, you're (in effect) quoting twice.*
This works with rlang::ensym()
:
test_func2 <- function(mydf, var) {
var <- rlang::ensym(var)
mydf %>%
group_by(!!var) %>%
summarise(n = n())
}
# Bring purrr into the equation by trying to group by the vector searchby
pmap(list(var = group_by), test_func2, mydf = mydf)
#> [[1]]
#> # A tibble: 8 x 2
#> year n
#> <int> <int>
#> 1 2000 2817
#> 2 2002 2765
#> 3 2004 2812
#> 4 2006 4510
#> 5 2008 2023
#> 6 2010 2044
#> 7 2012 1974
#> 8 2014 2538
#>
#> [[2]]
#> # A tibble: 6 x 2
#> marital n
#> <fct> <int>
#> 1 No answer 17
#> 2 Never married 5416
#> 3 Separated 743
#> 4 Divorced 3383
#> 5 Widowed 1807
#> 6 Married 10117
#>
#> [[3]]
#> # A tibble: 4 x 2
#> race n
#> <fct> <int>
#> 1 Other 1959
#> 2 Black 3129
#> 3 White 16395
#> 4 Not applicable 0
*
Edit: With the quotation marks, you're effectively making a new column with one group, because of R's recycling rules, which is why you were getting the same n
for all three variables before!
library(tidyverse)
data(gss_cat)
mydf <- gss_cat %>% select(year, marital, race, age)
group_by(mydf, "year")
#> # A tibble: 21,483 x 5
#> # Groups: "year" [1]
#> year marital race age `"year"`
#> <int> <fct> <fct> <int> <chr>
#> 1 2000 Never married White 26 year
#> 2 2000 Divorced White 48 year
#> 3 2000 Widowed White 67 year
#> 4 2000 Never married White 39 year
#> 5 2000 Divorced White 25 year
#> 6 2000 Married White 25 year
#> 7 2000 Never married White 36 year
#> 8 2000 Divorced White 44 year
#> 9 2000 Married White 44 year
#> 10 2000 Married White 47 year
#> # … with 21,473 more rows
Created on 2018-10-11 by the reprex package (v0.2.1.9000)