Thank you for the help, aosmith.
I changed the code at bit more in an attempt to address issues I found while applying the function to some of my problematic variables.
library(tidyverse)
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl,
#> flatten_int, flatten_lgl, flatten_raw, invoke, list_along,
#> modify, prepend, splice
bar_totals = function(.data, value, group_var = Y) {
value = enquo(value)
val = as_label(value)
bar_labs = data.frame( table(.data[[val]] ) ) %>% arrange(desc(Freq))
print(bar_labs)
.data %>%
mutate(value = {{ value }} %>% fct_infreq() %>% fct_rev()) %>%
ggplot(aes(x = value,
y = stat(count),
fill= {{ group_var }})) +
geom_bar() +
theme_minimal()+
geom_text(stat = "count",
aes(label = stat(count),
y = stat(count) ),
position = position_stack(0.5),
vjust =0,
size = 4) +
geom_text(data = bar_labs,
aes(x = Var1, y = Freq, label = Freq),
inherit.aes = FALSE,
size = 6,
vjust = 1.5,
hjust = .5,
fontface = "bold") +
theme(legend.position = "top",
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank())+
labs(fill = "")+
coord_flip()
}
When I apply the function to a complete variable, then I get plot ordered according the frequency for each category.
perfect.dt <- data.frame(
X = sample(c("Black","Blue","Red","Green","Violet"), 1000, replace = TRUE),
Y = sample(c("Yes", "No","Maybe"), 1000, replace = TRUE)
)
bar_totals(perfect.dt, X)
#> Var1 Freq
#> 1 Blue 215
#> 2 Black 200
#> 3 Red 200
#> 4 Green 196
#> 5 Violet 189

However, when applied to a variable containing factor levels with 0 observations (the case for many of my variables), I get an unordered plot. Arranging the bar_labs doesn’t seem to fix the problem.
my.dt = data.frame(
X = sample(c("Black","Blue","Red","Green","Violet"), 1000, prob=c(0.10, 0.20, 0.30, 0.39, 0.01), replace = TRUE),
Y = sample(c("Yes", "No","Maybe", NA), 1000, prob=c(0.08, 0.50, 0.20, 0.12), replace = TRUE)
)
my.dt$X = factor(my.dt$X, c("Black","Blue","Red","Green","Violet","Pink","Yellow"))
bar_totals(my.dt, X)
#> Var1 Freq
#> 1 Green 385
#> 2 Red 303
#> 3 Blue 198
#> 4 Black 99
#> 5 Violet 15
#> 6 Pink 0
#> 7 Yellow 0

Do you have any suggestions?