Fixed position of labels generated by stat_summary

Hi,

I could not find the way how to get labels generated by stat_summary() for all groups at, say, y = 0. If you also knew how to change the decimal mark to ",", I would be very grateful.

many thanks,

Jakub

finis <- structure(list(red_izo = c("600084302", "600084027", "600084485", 
                                    "600084345", "600084540", "600084558", "600106144", "600115372", 
                                    "600107787", "600112624", "600135811", "600135829", "600135837", 
                                    "600038017", "600038009", "600038114", "600037991", "600037983", 
                                    "600036049", "600036006"), pocet_pracovniku = c(3.5, 3, 1.1671, 
                                                                                    3.5, 2.6809, 2.35, 4.3968, 4.3036, 2.7649, 1.1, 1.9, 4.4935, 
                                                                                    2.4645, 3.4166, 2.9128, 2, 3.2917, 2.5, 3.875, 5.0834), tercil_MS = c("nad 70 žáků", 
                                                                                                                                                          "nad 70 žáků", "mezi 35 a 70 žáky", "nad 70 žáků", "nad 70 žáků", 
                                                                                                                                                          "mezi 35 a 70 žáky", "nad 70 žáků", "nad 70 žáků", "nad 70 žáků", 
                                                                                                                                                          "mezi 35 a 70 žáky", "mezi 35 a 70 žáky", "nad 70 žáků", 
                                                                                                                                                          "nad 70 žáků", "nad 70 žáků", "nad 70 žáků", "nad 70 žáků", 
                                                                                                                                                          "nad 70 žáků", "nad 70 žáků", "nad 70 žáků", "nad 70 žáků"
                                                                                    ), zriz = c("2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
                                                                                                "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"), zrizovatel = c("veřejné", 
                                                                                                                                                                  "veřejné", "veřejné", "veřejné", "veřejné", "veřejné", 
                                                                                                                                                                  "veřejné", "veřejné", "veřejné", "veřejné", "veřejné", 
                                                                                                                                                                  "veřejné", "veřejné", "veřejné", "veřejné", "veřejné", 
                                                                                                                                                                  "veřejné", "veřejné", "veřejné", "veřejné"), tercil = structure(c(3L, 
                                                                                                                                                                                                                                    3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                                                                                                                                    3L, 3L, 3L), .Label = c("do 35 žáků", "mezi 35 a 70 žáky", 
                                                                                                                                                                                                                                                            "nad 70 žáků"), class = "factor")), row.names = c(NA, -20L
                                                                                                                                                                                                                                                            ), class = c("tbl_df", "tbl", "data.frame"))





library(stringr)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
library(ggplot2) 

plot <- ggplot(finis, aes(x = tercil_MS, y = pocet_pracovniku, fill = tercil_MS,
                          group = tercil_MS)) +
  geom_boxplot()+
  stat_summary(aes(label=str_c("průměr = ", round(..y..,1)), fontface = "bold", ), 
               fun=mean, geom="text", color = "#981E3D")+
  scale_fill_manual(values=c("#F2CE6E", "#EF726B","#74496B", "#67A88F")) +
  theme_minimal() +
  theme(legend.position = 'none',
        legend.title = element_blank(),
        text=element_text(size=15, family="Fira Sans Condensed"),
        plot.title = element_text(margin=margin(0,0,15,0)),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.spacing = unit(2, "lines"),
        panel.grid.major.x = element_blank() 
  ) +
  scale_y_continuous(label=scales::comma_format(accuracy = 1, scale = 1, prefix = "", suffix = "",
                                                big.mark = " ", decimal.mark = ","),
                     limits = c(0,5)) +
  
  ggtitle("Průměrný počet nepedagogických zaměstnanců ve veřejných MŠ")
        
plot
#> Warning: Removed 1 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 1 rows containing non-finite values (stat_summary).

image

Created on 2022-08-02 by the reprex package (v2.0.1)

Is this what you mean?

library(stringr)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
library(ggplot2)

finis <- data.frame(
  stringsAsFactors = FALSE,
           red_izo = c("600084302","600084027",
                       "600084485","600084345","600084540","600084558",
                       "600106144","600115372","600107787","600112624","600135811",
                       "600135829","600135837","600038017","600038009",
                       "600038114","600037991","600037983","600036049","600036006"),
  pocet_pracovniku = c(3.5,3,1.1671,3.5,2.6809,
                       2.35,4.3968,4.3036,2.7649,1.1,1.9,4.4935,2.4645,
                       3.4166,2.9128,2,3.2917,2.5,3.875,5.0834),
         tercil_MS = c("nad 70 žáků","nad 70 žáků",
                       "mezi 35 a 70 žáky","nad 70 žáků","nad 70 žáků",
                       "mezi 35 a 70 žáky","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                       "mezi 35 a 70 žáky","mezi 35 a 70 žáky","nad 70 žáků",
                       "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                       "nad 70 žáků","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                       "nad 70 žáků"),
              zriz = c("2","2","2","2","2","2",
                       "2","2","2","2","2","2","2","2","2","2","2","2",
                       "2","2"),
        zrizovatel = c("veřejné","veřejné","veřejné",
                       "veřejné","veřejné","veřejné","veřejné","veřejné",
                       "veřejné","veřejné","veřejné","veřejné","veřejné",
                       "veřejné","veřejné","veřejné","veřejné","veřejné",
                       "veřejné","veřejné"),
            tercil = as.factor(c("nad 70 žáků","nad 70 žáků","mezi 35 a 70 žáky",
                                 "nad 70 žáků","nad 70 žáků","mezi 35 a 70 žáky",
                                 "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                                 "mezi 35 a 70 žáky","mezi 35 a 70 žáky",
                                 "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                                 "nad 70 žáků","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                                 "nad 70 žáků","nad 70 žáků"))
)

ggplot(finis, aes(x = tercil_MS, y = pocet_pracovniku, fill = tercil_MS,
                          group = tercil_MS)) +
    geom_boxplot( )+
    stat_summary(aes(label = str_c("průměr = ", round(..y.., 1)), y = 0), 
                 fun = mean,
                 geom = "text",
                 color = "#981E3D",
                 fontface = "bold") +
    scale_fill_manual(values = c("#F2CE6E", "#EF726B","#74496B", "#67A88F")) +
    theme_minimal() +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text = element_text(size = 15, family = "Fira Sans Condensed"),
          plot.title = element_text(margin = margin(0,0,15,0)),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.spacing = unit(2, "lines"),
          panel.grid.major.x = element_blank() 
    ) +
    scale_y_continuous(label = scales::comma_format(accuracy = 1,
                                                    scale = 1,
                                                    prefix = "",
                                                    suffix = "",
                                                    big.mark = " ",
                                                    decimal.mark = ","),
                       limits = c(0,5)) +
    ggtitle("Průměrný počet nepedagogických zaměstnanců ve veřejných MŠ")
#> Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Created on 2022-08-02 by the reprex package (v2.0.1)

Not really, I want the position to be at y=0 but I do not want the value to be 0 (it should be 1.6 & 3.3).

Screenshot 2022-08-03 at 8.43.12

vs.

Screenshot 2022-08-03 at 8.43.47

My general advice is to compute as much as possible before plotting, or in other words, try to limit the plotting to pure plotting, and not embed computations via things like stat_summary as much as possible. While its certainly possible to embed the computations, I find that its inelegant and harder to reason about. My advice is akin to more generic programming advice to break down large complicated code into smaller modules / functions.

That said, here is how I would approach the task. I assume finis is provided as in the above posts. I load library(tidyverse) (rather than load stringr ggplot2 and now dplyr separately).

(mean_smry_df <- finis |> group_by(tercil_MS) |> summarise(mean_y=mean(pocet_pracovniku),
                                                           label_txt = str_c("průměr = ", str_replace(string = round(mean_y,1),
                                                                                                      pattern = "\\.",
                                                                                                      replacement = ","))))

ggplot(finis, aes(x = tercil_MS, y = pocet_pracovniku, fill = tercil_MS,
                  group = tercil_MS)) +
  geom_boxplot() +
  scale_fill_manual(values = c("#F2CE6E", "#EF726B","#74496B", "#67A88F")) +
  theme_minimal() +
  theme(legend.position = 'none',
        legend.title = element_blank(),
        text = element_text(size = 15, family = "Fira Sans Condensed"),
        plot.title = element_text(margin = margin(0,0,15,0)),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.spacing = unit(2, "lines"),
        panel.grid.major.x = element_blank() 
  ) +
  scale_y_continuous(label = scales::comma_format(accuracy = 1,
                                                  scale = 1,
                                                  prefix = "",
                                                  suffix = "",
                                                  big.mark = " ",
                                                  decimal.mark = ","),
                     limits = c(0,5)) +
  ggtitle("Průměrný počet nepedagogických zaměstnanců ve veřejných MŠ") +
  geom_text(data=mean_smry_df,
            mapping=aes(x=tercil_MS,
                        y=0,
                        label=label_txt),fontface = "bold", color = "#981E3D")

image

1 Like

Thank you very much? By the way, do you know whether there is any shortcut for ' |>' on the Mac keyboard?

Sorry, I overlooked that you were using a calculated variable, you can use the stage() function for that.

library(stringr)                                                                                       
library(ggplot2)

finis <- data.frame(
    stringsAsFactors = FALSE,
    red_izo = c("600084302","600084027",
                "600084485","600084345","600084540","600084558",
                "600106144","600115372","600107787","600112624","600135811",
                "600135829","600135837","600038017","600038009",
                "600038114","600037991","600037983","600036049","600036006"),
    pocet_pracovniku = c(3.5,3,1.1671,3.5,2.6809,
                         2.35,4.3968,4.3036,2.7649,1.1,1.9,4.4935,2.4645,
                         3.4166,2.9128,2,3.2917,2.5,3.875,5.0834),
    tercil_MS = c("nad 70 žáků","nad 70 žáků",
                  "mezi 35 a 70 žáky","nad 70 žáků","nad 70 žáků",
                  "mezi 35 a 70 žáky","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                  "mezi 35 a 70 žáky","mezi 35 a 70 žáky","nad 70 žáků",
                  "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                  "nad 70 žáků","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                  "nad 70 žáků"),
    zriz = c("2","2","2","2","2","2",
             "2","2","2","2","2","2","2","2","2","2","2","2",
             "2","2"),
    zrizovatel = c("veřejné","veřejné","veřejné",
                   "veřejné","veřejné","veřejné","veřejné","veřejné",
                   "veřejné","veřejné","veřejné","veřejné","veřejné",
                   "veřejné","veřejné","veřejné","veřejné","veřejné",
                   "veřejné","veřejné"),
    tercil = as.factor(c("nad 70 žáků","nad 70 žáků","mezi 35 a 70 žáky",
                         "nad 70 žáků","nad 70 žáků","mezi 35 a 70 žáky",
                         "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                         "mezi 35 a 70 žáky","mezi 35 a 70 žáky",
                         "nad 70 žáků","nad 70 žáků","nad 70 žáků",
                         "nad 70 žáků","nad 70 žáků","nad 70 žáků","nad 70 žáků",
                         "nad 70 žáků","nad 70 žáků"))
)

ggplot(finis, aes(x = tercil_MS, y = pocet_pracovniku, fill = tercil_MS,
                  group = tercil_MS)) +
    geom_boxplot( )+
    stat_summary(aes(label = str_c("průměr = ", round(..y.., 1)),
                     y = stage(pocet_pracovniku, after_stat = 0)), 
                 fun = mean,
                 geom = "text",
                 color = "#981E3D",
                 fontface = "bold") +
    scale_fill_manual(values = c("#F2CE6E", "#EF726B","#74496B", "#67A88F")) +
    theme_minimal() +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text = element_text(size = 15, family = "Fira Sans Condensed"),
          plot.title = element_text(margin = margin(0,0,15,0)),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.spacing = unit(2, "lines"),
          panel.grid.major.x = element_blank() 
    ) +
    scale_y_continuous(label = scales::comma_format(accuracy = 1,
                                                    scale = 1,
                                                    prefix = "",
                                                    suffix = "",
                                                    big.mark = " ",
                                                    decimal.mark = ","),
                       limits = c(0,5)) +
    ggtitle("Průměrný počet nepedagogických zaměstnanců ve veřejných MŠ")
#> Warning: Removed 1 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 1 rows containing non-finite values (stat_summary).

Created on 2022-08-03 by the reprex package (v2.0.1)

1 Like