How to add mean and SD values (actual number) to the plot output (plot_grid)

I made a loop that outputs 2 graphs per variable with 1 fixed variable (control vs PAD). With the function plot_grid I made 2 rows (1 for the title) which is not very intuitive but it works (as seen in the picture) but now I want to add the representative means and SDs for both groups as a value (preferably below the legend) I made a vector for the means and SD but I'm a bit lost in adding these to the final plot. Maybe there is also a much easier way in doing this.

I have made an example code below. I also attached a picture that describes how I like to have it.

Creating the dataframe

rm(list=ls())
library(ggplot2)
library(purrr)
library(ggbeeswarm)
library(cowplot)
library(plotly)
library(dplyr)
library(tidyr)
library(plyr)

group <- c("Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD")
b <- round(runif(15, 1, 7)) 
c <- round(runif(15, 1, 3)) 
d <- round(runif(15, 3, 8)) 
e <- round(runif(15, 1, 5))
event <- c("no event", "event" , "no event" , "no event" , "no event", "no event", "event", "no event", "no event" , "no event" , "no event" , "no event", "no event", "event", "event")

df <- data.frame(group, b,c,d, e, event)
df

rm(group, b, c, d, e, event)

Define color to the groups used for the boxplot

df$color <- "color"
for (i in 1:dim(df)[1]){
  if (df$group[i]=="Control") {
    df$color[i] <- "Control" # in de column PAD, if the control is control give the color the string "control"
  }
}
for (i in 1:dim(df)[1]){
  if (df$group[i] == "PAD" && df$event[i] == "event") {
    df$color[i] <- "PAD with event" # in de column PAD, if the PAD has event give the color the string "event"
  }
}
for (i in 1:dim(df)[1]){
  if (df$group[i] == "PAD" && df$event[i] == "no event") {
    df$color[i] <- "PAD without event"
  }
}
rm(i)

used for looping through the columns

expl = names(df[1]) 
response = names(df[2:5]) 

response = set_names(response)
response

expl = set_names(expl)
expl

I made a loop that outputs 2 graphs per variable with 1 fixed variable (control vs PAD). With the function plot_grid I made 2 rows (1 for the title) which is not very intuitive but it works (as seen in the picture) but now I want to add the representative means and SDs for both groups as a value (preferably below the legend) I made a vector for the means and SD but I'm a bit lost in adding these to the final plot. Maybe there is also a much easier way in doing this.

I have made an example code below. I also attached a picture that describes how I like to have it.

Creating the dataframe

rm(list=ls())
library(ggplot2)
library(purrr)
library(ggbeeswarm)
library(cowplot)
library(plotly)
library(dplyr)
library(tidyr)
library(plyr)

group <- c("Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD")
b <- round(runif(15, 1, 7)) 
c <- round(runif(15, 1, 3)) 
d <- round(runif(15, 3, 8)) 
e <- round(runif(15, 1, 5))
event <- c("no event", "event" , "no event" , "no event" , "no event", "no event", "event", "no event", "no event" , "no event" , "no event" , "no event", "no event", "event", "event")

df <- data.frame(group, b,c,d, e, event)
df

rm(group, b, c, d, e, event)

Define color to the groups used for the boxplot

df$color <- "color"
for (i in 1:dim(df)[1]){
  if (df$group[i]=="Control") {
    df$color[i] <- "Control" # in de column PAD, if the control is control give the color the string "control"
  }
}
for (i in 1:dim(df)[1]){
  if (df$group[i] == "PAD" && df$event[i] == "event") {
    df$color[i] <- "PAD with event" # in de column PAD, if the PAD has event give the color the string "event"
  }
}
for (i in 1:dim(df)[1]){
  if (df$group[i] == "PAD" && df$event[i] == "no event") {
    df$color[i] <- "PAD without event"
  }
}
rm(i)

used for looping through the columns

expl = names(df[1]) 
response = names(df[2:5]) 

response = set_names(response)
response

expl = set_names(expl)
expl

subset PAD and control used in plot 2 to get the mean and sd

PADonly <- subset(df, group == "PAD" )
Ctrlonly <- subset(df, group == "Control" )

explPAD = names(df[7]) #the explanatory value is event vs no event)
response = names(DBPAD[2:5])

un the loop

pdf("all_plots.pdf", width = 15)
 xPAD <- PADonly[,i]


xPADmean <- mean(xPAD, na.rm=TRUE)  
  xPADSD <- sd(xPAD, na.rm = TRUE)

  xctrl <- Ctrlonly[,i]
  xctrlmean <- mean(xctrl, na.rm=TRUE)  
  xctrlSD <- sd(xctrl, na.rm = TRUE)

  p1 <- ggplot(df, aes_string("group",i)) + 
    geom_boxplot(show.legend = F) + 
    geom_beeswarm(aes(color = color), size=2) +
    scale_color_manual(values= c("Control"="#107f40", "PAD with event" = "#D85622", "PAD without event"="#2D416D")) # color the values as as you please



  p2 <- ggplot(df, aes_string(x= i, fill="group")) + 
    geom_histogram(bins=15, aes(y=..density..), position = "identity", alpha=0.15, color="black") +
    scale_fill_manual(values= c("PAD"="#2D416D","Control" = "#D85622")) + # color the values as as you please
    scale_y_continuous(labels = scales::percent) +
    stat_density(aes_string(i, color="group"), na.rm=TRUE, bw = "nrd0", geom = "line", position = "identity", size=1, linetype=2) +
    geom_rug(aes_string(x=i, color="group")) +
    scale_color_manual(values= c("PAD"="#2D416D","Control" = "#D85622"))+
    geom_vline(xintercept=xPADmean, linetype=2, color="#2D416D")+
    geom_vline(xintercept=xctrlmean, linetype=2, color="#D85622")

  p_all <- plot_grid(p1, p2)


  # create a title vector for i
  title <- ggdraw() +
    draw_label(i, 
               fontface = "bold",
               x = 0,
               hjust = 0,
               size = 25) +
    theme(
      # add margin on the left of the drawing canvas
      plot.margin = margin(0,0,0,7,))


  p_all <- plot_grid(title, p_all, ncol=1, rel_heights = c(0.1,1))
  print(p_all)
} 

Hi @jensposma. You may try the following code. I didn't make it so decent but the code is working. You can modify it to plot what style you wanted.

rm(list=ls())
library(ggplot2)
library(purrr)
library(ggbeeswarm)
library(cowplot)
library(plotly)
library(dplyr)
library(tidyr)
library(plyr)

group <- c("Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD")
b <- round(runif(15, 1, 7)) 
c <- round(runif(15, 1, 3)) 
d <- round(runif(15, 3, 8)) 
e <- round(runif(15, 1, 5))
event <- c("no event", "event" , "no event" , "no event" , "no event", "no event", "event", "no event", "no event" , "no event" , "no event" , "no event", "no event", "event", "event")

df <- data.frame(group, b,c,d, e, event)

df %>%
  mutate(color = ifelse(group == "Control", "Control", ifelse(event == "event", "PAD with event", "PAD without event"))) %>%
  gather(sample, value, -group, -event, -color) %>%
  dplyr::group_by(sample, group) %>%
  dplyr::mutate(mean = format(mean(value), digits = 4), sd = format(sd(value), digits = 4)) %>%
  dplyr::group_by(sample) %>%
  nest() %>%
  {`names<-`(.$data, .$sample)} %>%
  map2(., names(.), ~{
    p1 <- ggplot(.x, aes(x = group, y = value)) +
      geom_boxplot() +
      geom_point(aes(x = group, y = value, color = color))
    
    p2 <- ggplot(.x, aes(x = value)) +
      geom_histogram(bins = 15)
    
    dat <- .x %>%
      distinct(group, .keep_all = TRUE)
    
    cowplot::plot_grid(p1, p2, labels = .y) %>%
      ggdraw() +
      draw_text(paste0("Control\nMean: ", dat$mean[1], "\nSD: ", dat$sd[1], "\n\nPAD\nMean: ", dat$mean[2], "\nSD: ", dat$sd[2]), hjust = 0,, x = 0.25, y = 0.2)
  })

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.