# Add number of observations and mean to ggplot2

Hi!
I want to add the number of observations and the mean per group to my graph. I followed this article, but nothing happens: https://medium.com/@gscheithauer/how-to-add-number-of-observations-to-a-ggplot2-boxplot-b22710f7ef80.

I'd like to have it like this:

That is my code:

``````library(ggplot2)
library(dplyr)
library(tidyverse)

#sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6,6,3,3,3,1,9,1,7,NA,2,4,9,3,3,9,2),
gleich_entf_fakt = c(16.00,66.67,50.00,14.29,NA,42.86,17.14,33.33,116.67,200.00,20.00,66.67,40.00,70.00,NA,33.33,102.00)
)
Vergleich <- mutate(Vergleich,gleich_entf_kat = cut(gleich_entf_fakt,breaks =  c(0,30,50,70,90,110,130,150,170,200000),labels = c("5","4","3","2","1","2","3","4","5")))

#function for counting the number of observations and the mean
stat_box_data04.01 <- function(y,upper_limit = max(Vergleich\$gleich_entf_fakt)*1.15,na.rm=TRUE){
return(
data.frame(
y=0.95*upper_limit,
label = paste('count =', format(length(y),big.mark = " "),
'\n',
'mean = ', format(round(mean(y),1),big.mark = " "))
)
)
}

#geom_point
Vergleich %>%
drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
ggplot(Vergleich, mapping = aes(x=as.numeric(mean_hh_wohnbdl),y=as.numeric(gleich_entf_fakt))) +
geom_point(aes(color = factor(gleich_entf_kat))) +
scale_color_manual(values=c("red4","red3","orange","green3","green4"),labels=c("sehr schlecht","schlecht","mäßig","gut","sehr gut")) +
scale_x_continuous(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
scale_fill_manual(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
labs(title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
color = "gleich_entf_kat") +
stat_summary(
fun.data = stat_box_data04.01,
geom = "text",
hjust = 0.5,
vjust = 0.9
)+
theme(plot.title = element_text(hjust = 0.5,face = "bold"))
``````

That's how my graph looks like (the number of observations and the mean is missing):

Hope I was clear enough in explaining my problem.

Do you want to see 9 boxplots, or 5 ?

I want to see 9 point charts. Each group of the point chart is another province ("Bundesland"). The values in the legend are about the quality of the factor on the y-axis fits. The only thing, I want to add is the number of observations (count = ) and the mean (mean = ) for each group ("Bundesland").

``````library(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
Vergleich %>% na.omit(),
gleich_entf_kat
) %>% summarise(
textlabel = paste(
c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")[gleich_entf_kat], "\n",
"count =", format(length(gleich_entf_fakt), big.mark = " "),
"\n",
"mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
),
mean_hh_wohnbdl = mean(mean_hh_wohnbdl, na.rm = TRUE),
gleich_entf_fakt = max(gleich_entf_fakt, na.rm = TRUE)
)

Vergleich %>%
drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
ggplot(Vergleich, mapping = aes(
x = mean_hh_wohnbdl,
y = gleich_entf_fakt,
color = gleich_entf_kat
)) +
geom_point() +
geom_text(
data = text_labels,
aes(label = textlabel)
) +
scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
labs(
title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
color = "gleich_entf_kat"
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
``````

Unfortunately, this doesn't work with my big data set (over 17000 cases). I get this warning when executing the `text_labels` function:

``````Warnmeldung:
In max(gleich_entf_fakt, na.rm = TRUE) :
kein nicht-fehlendes Argument für max; gebe -Inf zurück
``````

After that comes the same plot as before, only the symbols in the legend changed:

Because I actually wanted to add the information about the count and the mean for each of the 9 groups on the x-axis, I tried to replace in the `text_labels` the `gleich_entf_kat` function by `mean_hh_wohnbdl`:

``````text_labels <- group_by(
Vergleich %>% na.omit(),
mean_hh_wohnbdl
) %>% summarise(
textlabel = paste(
c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")[mean_hh_wohnbdl], "\n",
"count =", format(length(gleich_entf_fakt), big.mark = " "),
"\n",
"mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
),
mean_hh_wohnbdl = mean(mean_hh_wohnbdl, na.rm = TRUE),
gleich_entf_fakt = max(gleich_entf_fakt, na.rm = TRUE)
)

Vergleich %>%
drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
ggplot(Vergleich, mapping = aes(
x = mean_hh_wohnbdl,
y = gleich_entf_fakt,
color = gleich_entf_kat
)) +
geom_point() +
geom_text(
data = text_labels,
aes(label = textlabel)
) +
scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
labs(
title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
color = "gleich_entf_kat"
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
``````

But this leads to two other warnings/errors:

1. When executing the `text_labels` function comes:
``````Warnmeldung:
In max(gleich_entf_fakt, na.rm = TRUE) :
kein nicht-fehlendes Argument für max; gebe -Inf zurück
``````
1. When executing the ggplot function, comes:
``````Fehler in FUN(X[[i]], ...) : Objekt 'gleich_entf_kat' nicht gefunden
``````
1. R doesn't create a plot, because of 2)

To clarify, what I'm trying to do, I painted it with PowerPoint:

I hope, this makes it a bit clearer.

``````ibrary(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
Vergleich %>% na.omit(),
mean_hh_wohnbdl
) %>% summarise(
textlabel = paste(
"count =", format(n(), big.mark = " "),
"\n",
"mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
),
#options for how to vertically place the text
y=max(Vergleich\$gleich_entf_fakt,na.rm = TRUE)
)

Vergleich %>%
drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
ggplot(Vergleich, mapping = aes(
x = mean_hh_wohnbdl,
y = gleich_entf_fakt,
color = gleich_entf_kat
)) +
geom_point() +
geom_text(
data = text_labels,
aes(label = textlabel,color=NULL,
y=y),
size=3
) +
scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
labs(
title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
color = "gleich_entf_kat"
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
``````

1 Like

Thanks a lot for your help! I replaced the `na.omit()` by `na_drop(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt)`, and now it's working.

Here is the final solution of the problem:

``````library(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
Vergleich %>% drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt),
mean_hh_wohnbdl
) %>% summarise(
textlabel = paste(
"count =", format(n(), big.mark = " "),
"\n",
"mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
),
#options for how to vertically place the text
y=max(Vergleich\$gleich_entf_fakt,na.rm = TRUE)
)

Vergleich %>%
drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
ggplot(Vergleich, mapping = aes(
x = mean_hh_wohnbdl,
y = gleich_entf_fakt,
color = gleich_entf_kat
)) +
geom_point() +
geom_text(
data = text_labels,
aes(label = textlabel,color=NULL,
y=y),
size=3
) +
scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
labs(
title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
I have an additional question: Is it possible to fill the label by the colors of the `gleich_entf_kat`, so that a mean of e.g. 80 would be `green3` and a mean of 160 `red3` (like in the first solution of @nirgrahamuk, but with the labels as they are right now)?