Add number of observations and mean to ggplot2

Hi!
I want to add the number of observations and the mean per group to my graph. I followed this article, but nothing happens: How to add number of observations to a ggplot2 boxplot | by Dr. Gregor Scheithauer | Medium.

I'd like to have it like this:

That is my code:

library(ggplot2)
library(dplyr)
library(tidyverse)

#sample data
Vergleich <- data.frame(
  mean_hh_wohnbdl = c(6,6,3,3,3,1,9,1,7,NA,2,4,9,3,3,9,2),
  gleich_entf_fakt = c(16.00,66.67,50.00,14.29,NA,42.86,17.14,33.33,116.67,200.00,20.00,66.67,40.00,70.00,NA,33.33,102.00)
  )
Vergleich <- mutate(Vergleich,gleich_entf_kat = cut(gleich_entf_fakt,breaks =  c(0,30,50,70,90,110,130,150,170,200000),labels = c("5","4","3","2","1","2","3","4","5")))

#function for counting the number of observations and the mean
stat_box_data04.01 <- function(y,upper_limit = max(Vergleich$gleich_entf_fakt)*1.15,na.rm=TRUE){
  return(
    data.frame(
      y=0.95*upper_limit,
      label = paste('count =', format(length(y),big.mark = " "),
                    '\n',
                    'mean = ', format(round(mean(y),1),big.mark = " "))
    )
  )
}

#geom_point
Vergleich %>% 
  drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
  ggplot(Vergleich, mapping = aes(x=as.numeric(mean_hh_wohnbdl),y=as.numeric(gleich_entf_fakt))) + 
  geom_point(aes(color = factor(gleich_entf_kat))) + 
  scale_color_manual(values=c("red4","red3","orange","green3","green4"),labels=c("sehr schlecht","schlecht","mäßig","gut","sehr gut")) +
  scale_x_continuous(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) + 
  scale_fill_manual(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) + 
  labs(title = "Übereinstimmung nach Bundesland",
       x = "Bundesland", 
       y = "Übereinstimmungsgrad",
       color = "gleich_entf_kat") +
  stat_summary(
    fun.data = stat_box_data04.01,
    geom = "text",
    hjust = 0.5,
    vjust = 0.9
  )+
  theme(plot.title = element_text(hjust = 0.5,face = "bold"))

That's how my graph looks like (the number of observations and the mean is missing):

Hope I was clear enough in explaining my problem.
Thanks in advance.

Do you want to see 9 boxplots, or 5 ?

I want to see 9 point charts. Each group of the point chart is another province ("Bundesland"). The values in the legend are about the quality of the factor on the y-axis fits. The only thing, I want to add is the number of observations (count = ) and the mean (mean = ) for each group ("Bundesland").

library(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
  mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
  gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
  Vergleich %>% na.omit(),
  gleich_entf_kat
) %>% summarise(
  textlabel = paste(
    c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")[gleich_entf_kat], "\n",
    "count =", format(length(gleich_entf_fakt), big.mark = " "),
    "\n",
    "mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
  ),
  mean_hh_wohnbdl = mean(mean_hh_wohnbdl, na.rm = TRUE),
  gleich_entf_fakt = max(gleich_entf_fakt, na.rm = TRUE)
)

Vergleich %>%
  drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
  ggplot(Vergleich, mapping = aes(
    x = mean_hh_wohnbdl,
    y = gleich_entf_fakt,
    color = gleich_entf_kat
  )) +
  geom_point() +
  geom_text(
    data = text_labels,
    aes(label = textlabel)
  ) +
  scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
  scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  labs(
    title = "Übereinstimmung nach Bundesland",
    x = "Bundesland",
    y = "Übereinstimmungsgrad",
    color = "gleich_entf_kat"
  ) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

Unfortunately, this doesn't work with my big data set (over 17000 cases). I get this warning when executing the text_labels function:

Warnmeldung:
In max(gleich_entf_fakt, na.rm = TRUE) :
  kein nicht-fehlendes Argument für max; gebe -Inf zurück

After that comes the same plot as before, only the symbols in the legend changed:

Because I actually wanted to add the information about the count and the mean for each of the 9 groups on the x-axis, I tried to replace in the text_labels the gleich_entf_kat function by mean_hh_wohnbdl:

text_labels <- group_by(
  Vergleich %>% na.omit(),
  mean_hh_wohnbdl
) %>% summarise(
  textlabel = paste(
    c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")[mean_hh_wohnbdl], "\n",
    "count =", format(length(gleich_entf_fakt), big.mark = " "),
    "\n",
    "mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
  ),
  mean_hh_wohnbdl = mean(mean_hh_wohnbdl, na.rm = TRUE),
  gleich_entf_fakt = max(gleich_entf_fakt, na.rm = TRUE)
)

Vergleich %>%
  drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
  ggplot(Vergleich, mapping = aes(
    x = mean_hh_wohnbdl,
    y = gleich_entf_fakt,
    color = gleich_entf_kat
  )) +
  geom_point() +
  geom_text(
    data = text_labels,
    aes(label = textlabel)
  ) +
  scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
  scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  labs(
    title = "Übereinstimmung nach Bundesland",
    x = "Bundesland",
    y = "Übereinstimmungsgrad",
    color = "gleich_entf_kat"
  ) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

But this leads to two other warnings/errors:

  1. When executing the text_labels function comes:
Warnmeldung:
In max(gleich_entf_fakt, na.rm = TRUE) :
  kein nicht-fehlendes Argument für max; gebe -Inf zurück
  1. When executing the ggplot function, comes:
Fehler in FUN(X[[i]], ...) : Objekt 'gleich_entf_kat' nicht gefunden
  1. R doesn't create a plot, because of 2)

To clarify, what I'm trying to do, I painted it with PowerPoint:

I hope, this makes it a bit clearer.

ibrary(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
  mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
  gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
  Vergleich %>% na.omit(),
  mean_hh_wohnbdl
) %>% summarise(
  textlabel = paste(
    "count =", format(n(), big.mark = " "),
    "\n",
    "mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
  ),
  #options for how to vertically place the text
 y=max(Vergleich$gleich_entf_fakt,na.rm = TRUE)
)

Vergleich %>%
  drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
  ggplot(Vergleich, mapping = aes(
    x = mean_hh_wohnbdl,
    y = gleich_entf_fakt,
    color = gleich_entf_kat
  )) +
  geom_point() +
  geom_text(
    data = text_labels,
    aes(label = textlabel,color=NULL,
        y=y),
    size=3
  ) +
  scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
  scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  labs(
    title = "Übereinstimmung nach Bundesland",
    x = "Bundesland",
    y = "Übereinstimmungsgrad",
    color = "gleich_entf_kat"
  ) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

1 Like

Thanks a lot for your help! I replaced the na.omit() by na_drop(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt), and now it's working.

Here is the final solution of the problem:

library(ggplot2)
library(dplyr)
library(tidyverse)

# sample data
Vergleich <- data.frame(
  mean_hh_wohnbdl = c(6, 6, 3, 3, 3, 1, 9, 1, 7, NA, 2, 4, 9, 3, 3, 9, 2),
  gleich_entf_fakt = c(16.00, 66.67, 50.00, 14.29, NA, 42.86, 17.14, 33.33, 116.67, 200.00, 20.00, 66.67, 40.00, 70.00, NA, 33.33, 102.00)
)
Vergleich <- mutate(Vergleich, gleich_entf_kat = cut(gleich_entf_fakt, breaks = c(0, 30, 50, 70, 90, 110, 130, 150, 170, 200000), labels = c("5", "4", "3", "2", "1", "2", "3", "4", "5")))

text_labels <- group_by(
  Vergleich %>% drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt),
  mean_hh_wohnbdl
) %>% summarise(
  textlabel = paste(
    "count =", format(n(), big.mark = " "),
    "\n",
    "mean = ", format(round(mean(gleich_entf_fakt), 1), big.mark = " ")
  ),
  #options for how to vertically place the text
  y=max(Vergleich$gleich_entf_fakt,na.rm = TRUE)
)

Vergleich %>%
  drop_na(gleich_entf_kat, mean_hh_wohnbdl, gleich_entf_fakt) %>%
  ggplot(Vergleich, mapping = aes(
    x = mean_hh_wohnbdl,
    y = gleich_entf_fakt,
    color = gleich_entf_kat
  )) +
  geom_point() +
  geom_text(
    data = text_labels,
    aes(label = textlabel,color=NULL,
        y=y),
    size=3
  ) +
  scale_color_manual(values = c("red4", "red3", "orange", "green3", "green4"), labels = c("sehr schlecht", "schlecht", "mäßig", "gut", "sehr gut")) +
  scale_x_continuous(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  scale_fill_manual(breaks = 1:9, name = "Bundesland", labels = c("B", "K", "NÖ", "OÖ", "S", "ST", "T", "V", "W")) +
  labs(
    title = "Übereinstimmung nach Bundesland",
    x = "Bundesland",
    y = "Übereinstimmungsgrad",
    color = "gleich_entf_kat"
  ) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

I have an additional question: Is it possible to fill the label by the colors of the gleich_entf_kat, so that a mean of e.g. 80 would be green3 and a mean of 160 red3 (like in the first solution of @nirgrahamuk, but with the labels as they are right now)?

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.