geom histogram labeling 25th,50th,75th percentiles

hello I have data that looks like this:

years n
1.11  19
1.33  3882
1.44. 321
1.91  1000
2.11  6844
2.33  2343 
3.44  348
...
...

I am seeking to make a histogram and label the 25th quantile, 50th(median), and 75th quantile with the numbers on the graph too. this is what i have so far:

clean_data <- data %>% uncount(weights = n)
ggplot(clean_data, aes(x = years)) +
geom_histogram() +
# this is code I found online to add 25th, 50th, and 75th percentiles with lines but the exact values are not present next to the lines
stat_summary(
aes(y = 1, xintercept = stat(x)),
fun.data = function(x) {
data.frame(
y = quantile(x, probs = c(0.25, 0.5, 0.75)) ) },
geom = "vline",
orientation = "y")

is there any better way to do this? thank you so much

Could this be useful to you?

library(tidyverse)

data <- tribble(
  ~years, ~n,
  1.11,  19,
  1.33, 3882,
  1.44, 321,
  1.91, 1000,
  2.11, 6844,
  2.33, 2343,
  3.44, 348,
)

clean_data <- data %>% 
  uncount(weights = n)

q <- quantile(clean_data$years, probs = c(0.25, 0.5, 0.75))

Then you can make this plot

# plot 1
data %>% 
  ggplot(aes(x = years, y = n)) +
  geom_col() +
  geom_vline(xintercept = q[1]) +
  geom_vline(xintercept = q[2]) +
  geom_vline(xintercept = q[3]) +
  geom_label(aes(x = q[1], y = 7300, label = q[1])) +
  geom_label(aes(x = q[2], y = 7300, label = q[2])) +
  geom_label(aes(x = q[3], y = 7300, label = q[3])) +
  labs(x = "Years",
       y = "Count",
       title = "Title") +
  theme_classic()

Or something like this:

# plot 2
data %>% 
  ggplot(aes(x = years, y = n)) +
  geom_col() +
  geom_vline(xintercept = q[1]) +
  geom_vline(xintercept = q[2]) +
  geom_vline(xintercept = q[3]) +
  geom_label(aes(x = q[1], y = 7400, label = paste0("25%\n", q[1]))) +
  geom_label(aes(x = q[2], y = 7400, label = paste0("50%\n", q[2]))) +
  geom_label(aes(x = q[3], y = 7400, label = paste0("75%\n", q[3]))) +
  labs(x = "Years",
       y = "Count",
       title = "Title") +
  theme_classic()

1 Like

thank u so much! is there a way to do this also with facet_wrap included so theres two different quantiles for each graph

which variable would you like to be divided by facet_wrap()?

if I had a binary variable called animal that was either yes or no, thank u so much

... +
facet_wrap(~animal, ncol = 1, strip.position = "r")

data %>% 
  ggplot(aes(x = years, y = n)) +
  geom_col() +
  geom_vline(xintercept = q[1]) +
  geom_vline(xintercept = q[2]) +
  geom_vline(xintercept = q[3]) +
  geom_label(aes(x = q[1], y = 7400, label = paste0("25%\n", q[1]))) +
  geom_label(aes(x = q[2], y = 7400, label = paste0("50%\n", q[2]))) +
  geom_label(aes(x = q[3], y = 7400, label = paste0("75%\n", q[3]))) +
  labs(x = "Years",
       y = "Count",
       title = "Title") +
  theme_classic() +
  facet_wrap(~animal, ncol = 1, strip.position = "r") +
  ylim(0, 8000)
1 Like

thanks, but there will be a different percentile for the one graph vs another tho? not the same one exactly

Perhaps slightly more idiomatic would be:

ggplot(...) +
  ... +
  geom_vline(xintercept = q) +
  annotate(geom = 'label', x = q, y = 7400, label = paste(c("25%", "50%", "75%"), q, sep = '\n'))
1 Like

oh thank you axeman that makes sense

however I am still wondering how to get two different quantiles(25 percentile, 50th, 75) labeled on two different graphs with facet wrap. like each graph will have its own distribution

thank u all so much

maybe not the cleanest way, but it should work

library(tidyverse)
library(patchwork)

data <- tribble(
  ~years, ~n, ~animal,
  1.11,  19, "Y",
  1.33, 3882, "N",
  1.44, 321, "Y",
  1.91, 1000, "N",
  2.11, 6844, "N",
  2.33, 2343, "Y",
  3.44, 348, "Y"
)

clean_data <- data %>% 
  uncount(weights = n)


pl <- function(i){
  
  q <- clean_data %>% 
    filter(animal == i) %>%
    summarise(quantile(years, probs = c(0.25, 0.5, 0.75))) %>%
    pull()
  
  
  data %>% 
    filter(animal == i) %>%
    ggplot(aes(x = years, y = n)) +
    geom_col() +
    geom_vline(xintercept = q[1]) +
    geom_vline(xintercept = q[2]) +
    geom_vline(xintercept = q[3]) +
    geom_label(aes(x = q[1], y = max(n)+300, label = q[1])) +
    geom_label(aes(x = q[2], y = max(n)+300, label = q[2])) +
    geom_label(aes(x = q[3], y = max(n)+300, label = q[3])) +
    labs(x = "Years",
         y = "Count",
         title = paste("Type:", i)) +
    theme_classic() +
    theme(plot.title = element_text(hjust = .5, size = 18, face = "bold"))

}

pl("N") + pl("Y")

1 Like

thank u so much! is this possible to do with geom_histogram too

Replica geom_col()


geom_histogram(stat = "identity")

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.