getting error while creating summary for quantile,average

I am trying to create a function to calculate quantile and average mean but getting error

also its not grouping variable for "group_by(!!grp_var)"

dat<-data.frame(
  aa = c("q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c","q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c"),
  col1=c(1,2,3,2,1,2,3,4,4,4,5,3,4,2,1,2,5,3,2,1,2,4,2,1,3,2,1,2,3,1,2,2,4,4,4,1,2,5,3,5),
  col2=c(250,1100,100,750,400,100,200,700,500,700,200,600,200,200,600,300,400,300,200,500,700,500,600,400,400,600,500,600,400,100,700,300,200,700,700,200,300,700,200,400),
  col3= c(2150,3213,2580,4335,2228,3795,2319,2363,2252,3015,2978,2127,3938,3013,3063,4202,4340,4247,3755,4145,3300,3739,3294,2944,4152,2898,2500,3164,2384,2824,3431,2864,3752,2265,3332,3321,3418,3521,2689,2186)
) 
dat$P1 <- dat$aa
dataset = dat
grp_var = "P1"
var = "col2"
footer = " "  
decimal = TRUE  


#tab_value_grp_by<-function(dataset,grp_var,var,footer)  
 
  grp_var <- enquo(grp_var)
  var <- enquo(var)
  numdig <- if (decimal == TRUE) {1} else {0}
  
  bygrps_table<-dataset %>% filter(!is.na(!!var),!is.na(!!grp_var)) %>%
    select(!! grp_var, !!var) %>% group_by(!!grp_var) %>% 
     summarise(
      q25 = format(round(quantile(!! var,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = 1),
      Median = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = 1),
      Average = format(round( mean(!! var, na.rm=TRUE),digits = numdig),nsmall = 1),
      q75 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = 1),
      N = sum(!is.na(!!var)))
    

Here is a modified version of your code that may do what you want. I only checked one result in the output table.
I did the minimum changes to get the code to run. Your calculation of q25, Median, and q75 seem overly complicated but perhaps you have a reason to do it that way.

library(rlang)
library(dplyr, warn.conflicts = FALSE)
dat<-data.frame(
  aa = c("q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c","q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c"),
  col1=c(1,2,3,2,1,2,3,4,4,4,5,3,4,2,1,2,5,3,2,1,2,4,2,1,3,2,1,2,3,1,2,2,4,4,4,1,2,5,3,5),
  col2=c(250,1100,100,750,400,100,200,700,500,700,200,600,200,200,600,300,400,300,200,500,700,500,600,400,400,600,500,600,400,100,700,300,200,700,700,200,300,700,200,400),
  col3= c(2150,3213,2580,4335,2228,3795,2319,2363,2252,3015,2978,2127,3938,3013,3063,4202,4340,4247,3755,4145,3300,3739,3294,2944,4152,2898,2500,3164,2384,2824,3431,2864,3752,2265,3332,3321,3418,3521,2689,2186)
) 
dat$P1 <- dat$aa
#dataset = dat
#grp_var = "P1"
#var = "col2"
footer = " "  
decimal = TRUE  


tab_value_grp_by<-function(dataset,grp_var,var,footer)   {
  
  grp_var <- enquo(grp_var)
  var <- enquo(var)
  numdig <- if (decimal == TRUE) {1} else {0}
  
  bygrps_table<-dataset %>% filter(!is.na(!!var),!is.na(!!grp_var)) %>%
    select(!! grp_var, !!var) %>% group_by(!!grp_var) %>% 
    summarise(
      q25 = format(round(quantile(!! var,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = 1),
      Median = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = 1),
      Average = format(round( mean(!! var, na.rm=TRUE),digits = numdig),nsmall = 1),
      q75 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = 1),
      N = sum(!is.na(!!var)))
  return(bygrps_table)
}
tab_value_grp_by(dat, P1, col2, " ")
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 15 x 6
#>    P1    q25   Median Average q75        N
#>    <chr> <chr> <chr>  <chr>   <chr>  <int>
#>  1 c     400.0 450.0  450.0   500.0      2
#>  2 d     225.0 400.0  400.0   575.0      4
#>  3 g     400.0 400.0  400.0   400.0      2
#>  4 h     300.0 500.0  500.0   700.0      2
#>  5 k     100.0 400.0  400.0   700.0      2
#>  6 l     300.0 350.0  350.0   400.0      2
#>  7 n     425.0 550.0  550.0   675.0      4
#>  8 q     250.0 475.0  475.0   700.0      2
#>  9 r     500.0 800.0  800.0   1100.0     2
#> 10 s     200.0 400.0  425.0   675.0      4
#> 11 t     200.0 450.0  450.0   700.0      2
#> 12 u     200.0 250.0  250.0   300.0      2
#> 13 v     400.0 575.0  575.0   750.0      2
#> 14 x     200.0 200.0  200.0   200.0      2
#> 15 y     100.0 400.0  383.3   625.0      6

Created on 2020-08-19 by the reprex package (v0.3.0)

i am still getting error

bygrps_table<-dataset %>% filter(!is.na(!!var),!is.na(!!grp_var)) %>%
  select(!! grp_var, !!var) %>% group_by(!!grp_var)

this line should give two column for grp_var and var but i am getting one more column P1 with all values P1

after running all lines i am getting below error

Error: Problem with `summarise()` input `q25`.
x non-numeric argument to mathematical function
i Input `q25` is `format(...)`.
i The error occurred in group 1: "P1" = "P1".

I posted a complete example, starting with the calls to library(), that runs for me. Does that code run for you? If it does but your code throws an error, please post your complete code so others can reproduce your problem.

I have tried like this

library(rlang)
library(dplyr, warn.conflicts = FALSE)
dat<-data.frame(
  aa = c("q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c","q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c"),
  col1=c(1,2,3,2,1,2,3,4,4,4,5,3,4,2,1,2,5,3,2,1,2,4,2,1,3,2,1,2,3,1,2,2,4,4,4,1,2,5,3,5),
  col2=c(250,1100,100,750,400,100,200,700,500,700,200,600,200,200,600,300,400,300,200,500,700,500,600,400,400,600,500,600,400,100,700,300,200,700,700,200,300,700,200,400),
  col3= c(2150,3213,2580,4335,2228,3795,2319,2363,2252,3015,2978,2127,3938,3013,3063,4202,4340,4247,3755,4145,3300,3739,3294,2944,4152,2898,2500,3164,2384,2824,3431,2864,3752,2265,3332,3321,3418,3521,2689,2186)
) 


dat$P1 <- dat$aa
dataset = dat
grp_var = "P1"
var = "col2"
footer = " "  
decimal = TRUE  


#tab_value_grp_by<-function(dataset,grp_var,var,footer)  
 
grp_var <- enquo(grp_var)
var <- enquo(var)
numdig <- if (decimal == TRUE) {1} else {0}

bygrps_table<-dataset %>% filter(!is.na(!!var),!is.na(!!grp_var)) %>%
  select(!! grp_var, !!var) %>% group_by(!!grp_var) %>% 
  summarise(
    q25 = format(round(quantile(!! var,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = 1),
    Median = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = 1),
    Average = format(round( mean(!! var, na.rm=TRUE),digits = numdig),nsmall = 1),
    q75 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = 1),
    N = sum(!is.na(!!var)))
    return(bygrps_table)
    

Error i was getting:

Error: Problem with summarise() input q25.
x non-numeric argument to mathematical function
i Input q25 is format(...).
i The error occurred in group 1: "P1" = "P1".

I understood from your first post that you want to write a function that does the calculations. Notice that in the code I posted almost all of the code is inside of the tab_value_grp_by function. If you want to run the code outside of a function, there is no need to use enquo or !!. The code would then look like this:

library(dplyr, warn.conflicts = FALSE)
dat<-data.frame(
  aa = c("q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c","q","r","y","v","g","y","d","s","n","k","y","d","s","t","n","u","l","h","x","c"),
  col1=c(1,2,3,2,1,2,3,4,4,4,5,3,4,2,1,2,5,3,2,1,2,4,2,1,3,2,1,2,3,1,2,2,4,4,4,1,2,5,3,5),
  col2=c(250,1100,100,750,400,100,200,700,500,700,200,600,200,200,600,300,400,300,200,500,700,500,600,400,400,600,500,600,400,100,700,300,200,700,700,200,300,700,200,400),
  col3= c(2150,3213,2580,4335,2228,3795,2319,2363,2252,3015,2978,2127,3938,3013,3063,4202,4340,4247,3755,4145,3300,3739,3294,2944,4152,2898,2500,3164,2384,2824,3431,2864,3752,2265,3332,3321,3418,3521,2689,2186)
) 
dat$P1 <- dat$aa
dataset = dat
#grp_var = "P1"
#var = "col2"
footer = " "  
decimal = TRUE  


#tab_value_grp_by<-function(dataset,grp_var,var,footer)  

#grp_var <- enquo(grp_var)
#var <- enquo(var)
numdig <- if (decimal == TRUE) {1} else {0}

bygrps_table <- dataset %>% filter(!is.na(col2),!is.na(P1)) %>%
  select(P1, col2) %>% group_by(P1) %>% 
  summarise(
    q25 = format(round(quantile(col2,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = 1),
    Median = format(round(quantile(col2, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = 1),
    Average = format(round( mean(col2, na.rm=TRUE),digits = numdig),nsmall = 1),
    q75 = format(round(quantile(col2, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = 1),
    N = sum(!is.na(col2)))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.