Mixing geom_boxplot and geom_step, how can I get a line in legend for geom_step rather than boxplot glyph?

Hi,

I'm sure there must be a simple solution to this. I'm forever getting hung up on legends of mixed plots. I'm hoping @mara will come to my rescue again.

I have model outcomes - two models, summaries of annual predictions for each - which I display as boxplots within year; and annual observations for the first years - which I display using geom_step.

If you look at the following reprex (made up data, and some stuff omitted) I have the observations in a separate column and use the var factor to control fill and colour.

So the structure of the graph looks fine, but in the legend I would like to get a line for the observed, rather than a boxplot symbol. Is this possible?

I tried playing with key_glyph, but couldn't seem to get it do the job.

I'm happy to rejig my data into another layout, if necessary, but my colleagues and I are set on this boxplot and step type of graphic.

A secondary question, in TBL you may notice that the last observation (2002) is repeated for 2003 to force the step to span 2002. Is there a more elegant way to do this?

Any other comments on my plotting code welcomed.

Thanks for your help,
Ron.

library(tidyverse)

TBL <- structure(list(model = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, NA, NA, NA, NA), .Label = c("with", "without"), class = "factor"),
    med = c(10, 9, 8, 7, 6, 11, 10, 9, 8, 7, NA, NA, NA, NA),
    year = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
    1L, 2L, 3L, 4L), .Label = c("2000", "2001", "2002", "2003",
    "2004"), class = "factor"), bot = c(5, 4, 3, 2, 1, 6, 5,
    4, 3, 2, NA, NA, NA, NA), low = c(8.5, 7.5, 6.5, 5.5, 4.5,
    9.5, 8.5, 7.5, 6.5, 5.5, NA, NA, NA, NA), upp = c(11.5, 10.5,
    9.5, 8.5, 7.5, 12.5, 11.5, 10.5, 9.5, 8.5, NA, NA, NA, NA
    ), top = c(15, 14, 13, 12, 11, 16, 15, 14, 13, 12, NA, NA,
    NA, NA), obs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    12, 8, 10, 10), var = structure(c(2L, 2L, 2L, 2L, 2L, 3L,
    3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("obs", "with",
    "without"), class = "factor")), row.names = c(NA, -14L), class = c("tbl_df",
"tbl", "data.frame"))

pp <- ggplot() +
    geom_boxplot(data = TBL %>% filter(is.na(obs)),
                 aes(x = year, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     group = year:model, fill = var, colour = var), alpha = 0.5, stat = 'identity') +
    geom_step(data = TBL, aes(x = as.numeric(year)-0.5, y = obs, colour = var, fill = var)) +
    scale_fill_manual(values = c('with' = 'purple','without' = 'red', 'obs' = NA)) +
    scale_colour_manual(values = c('with' = 'purple', 'without' = 'red', 'obs' = 'black'))
#> Warning: Ignoring unknown aesthetics: fill

print(pp)
#> Warning: Removed 10 row(s) containing missing values (geom_path).

Created on 2020-08-24 by the reprex package (v0.3.0)

This is probably not quite what you want but it is as close as I could get.

TBL <- structure(list(model = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
                                          2L, 2L, 2L, NA, NA, NA, NA), .Label = c("with", "without"), class = "factor"),
                      med = c(10, 9, 8, 7, 6, 11, 10, 9, 8, 7, NA, NA, NA, NA),
                      year = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
                                         1L, 2L, 3L, 4L), .Label = c("2000", "2001", "2002", "2003",
                                                                     "2004"), class = "factor"), bot = c(5, 4, 3, 2, 1, 6, 5,
                                                                                                         4, 3, 2, NA, NA, NA, NA), low = c(8.5, 7.5, 6.5, 5.5, 4.5,
                                                                                                                                           9.5, 8.5, 7.5, 6.5, 5.5, NA, NA, NA, NA), upp = c(11.5, 10.5,
                                                                                                                                                                                             9.5, 8.5, 7.5, 12.5, 11.5, 10.5, 9.5, 8.5, NA, NA, NA, NA
                                                                                                                                           ), top = c(15, 14, 13, 12, 11, 16, 15, 14, 13, 12, NA, NA,
                                                                                                                                                      NA, NA), obs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                                                                                                                                                       12, 8, 10, 10), var = structure(c(2L, 2L, 2L, 2L, 2L, 3L,
                                                                                                                                                                                                         3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("obs", "with",
                                                                                                                                                                                                                                                     "without"), class = "factor")), row.names = c(NA, -14L), class = c("tbl_df",
                                                                                                                                                                                                                                                                                                                        "tbl", "data.frame"))

library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
OBS <- filter(TBL, !is.na(obs))
pp <- ggplot(mapping = aes(colour = var)) +
  geom_boxplot(data = TBL %>% filter(is.na(obs)),
               aes(x = year, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                   group = year:model, fill = var), alpha = 0.5, stat = 'identity') +
  geom_step(data = OBS, aes(x = as.numeric(year) - 0.5, y = obs, group = var, linetype = var)) +
  scale_fill_manual(values = c('with' = 'purple','without' = 'red', 'obs' = NA)) +
  scale_colour_manual(values = c('with' = 'purple', 'without' = 'red', 'obs' = 'black')) +
  guides(colour = "none") +
  labs(linetype = "")
print(pp)

Created on 2020-08-24 by the reprex package (v0.3.0)

Thanks @FJCC. As you say, it's not quite there is it?

If I follow this reply by @mara to an old question of mine, I think I get a little closer. I'd discarded it at first, as it didn't cover the need to have multiple boxplots per year, but it seems a bit better. But now the problem would be how to stop the colour being overwritten (ie the second call to geom_boxplot sets the outline colour for the fill legend).

library(tidyverse)

TBL <- structure(list(model = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, NA, NA, NA, NA), .Label = c("with", "without"), class = "factor"),
    med = c(10, 9, 8, 7, 6, 11, 10, 9, 8, 7, NA, NA, NA, NA),
    year = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
    1L, 2L, 3L, 4L), .Label = c("2000", "2001", "2002", "2003",
    "2004"), class = "factor"), bot = c(5, 4, 3, 2, 1, 6, 5,
    4, 3, 2, NA, NA, NA, NA), low = c(8.5, 7.5, 6.5, 5.5, 4.5,
    9.5, 8.5, 7.5, 6.5, 5.5, NA, NA, NA, NA), upp = c(11.5, 10.5,
    9.5, 8.5, 7.5, 12.5, 11.5, 10.5, 9.5, 8.5, NA, NA, NA, NA
    ), top = c(15, 14, 13, 12, 11, 16, 15, 14, 13, 12, NA, NA,
    NA, NA), obs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    12, 8, 10, 10), var = structure(c(2L, 2L, 2L, 2L, 2L, 3L,
    3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("obs", "with",
    "without"), class = "factor")), row.names = c(NA, -14L), class = c("tbl_df",
"tbl", "data.frame"))

pp <- ggplot() +
    geom_boxplot(data = TBL %>% filter(is.na(obs)),
                 aes(x = year, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     group = year:model, fill = var, colour = var), alpha = 0.5, stat = 'identity') +
    geom_step(data = TBL, aes(x = as.numeric(year)-0.5, y = obs, colour = var, fill = var)) +
    scale_fill_manual(values = c('with' = 'purple','without' = 'red', 'obs' = NA)) +
    scale_colour_manual(values = c('with' = 'purple', 'without' = 'red', 'obs' = 'black'))
#> Warning: Ignoring unknown aesthetics: fill

#print(pp)

wTBL <- TBL %>%
    filter(var == 'with') %>%
    mutate(year = as.character(year) %>% as.integer)

woTBL <- TBL %>%
    filter(var == 'without') %>%
    mutate(year = as.character(year) %>% as.integer)

OBS <- TBL %>%
    filter(var == 'obs') %>%
    mutate(year = as.character(year) %>% as.integer)

yr_width <- 0.9
box_width <- yr_width*0.5*0.95
x_shift <- yr_width*0.25
box_colour <- c('With' = 'purple', 'Without' = 'red')
ww <- ggplot() +
    geom_boxplot(data = wTBL,
                 aes(x = year-x_shift, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     fill = 'With', group = year),
                 colour=box_colour['With'], alpha = 0.5, stat = 'identity', width = box_width) +
    geom_boxplot(data = woTBL,
                 aes(x = year+x_shift, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     fill = 'Without', group = year),
                 colour = box_colour['Without'], alpha = 0.5, stat = 'identity', width = box_width) +
    geom_step(data = OBS, aes(x = year-0.5, y = obs, colour = 'Observed')) +
    scale_fill_manual(values = box_colour) +
    scale_colour_manual(values = c('Observed' = 'black')) +
    labs(x='year',fill = NULL, colour = NULL)

print(ww)

Created on 2020-08-25 by the reprex package (v0.3.0)

It needs a bit more work to tidy it up, and the plot itself probably doesn't need to be done as here, but I have a solution using gtable and draw_key_* functions to create my own legend as a grob and display it. Yay me.

Thanks again for your help @FJCC

I'm going to mark this as a solution. But feel free to chip in if you have an alternative, or something to improve it, I'm always open to better ideas.

library(tidyverse)
library(grid)
library(gtable)

TBL <- structure(list(model = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, NA, NA, NA, NA), .Label = c("with", "without"), class = "factor"),
    med = c(10, 9, 8, 7, 6, 11, 10, 9, 8, 7, NA, NA, NA, NA),
    year = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
    1L, 2L, 3L, 4L), .Label = c("2000", "2001", "2002", "2003",
    "2004"), class = "factor"), bot = c(5, 4, 3, 2, 1, 6, 5,
    4, 3, 2, NA, NA, NA, NA), low = c(8.5, 7.5, 6.5, 5.5, 4.5,
    9.5, 8.5, 7.5, 6.5, 5.5, NA, NA, NA, NA), upp = c(11.5, 10.5,
    9.5, 8.5, 7.5, 12.5, 11.5, 10.5, 9.5, 8.5, NA, NA, NA, NA
    ), top = c(15, 14, 13, 12, 11, 16, 15, 14, 13, 12, NA, NA,
    NA, NA), obs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    12, 8, 10, 10), var = structure(c(2L, 2L, 2L, 2L, 2L, 3L,
    3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("obs", "with",
    "without"), class = "factor")), row.names = c(NA, -14L), class = c("tbl_df",
"tbl", "data.frame"))

wTBL <- TBL %>%
    filter(var == 'with') %>%
    mutate(year = as.character(year) %>% as.integer)

woTBL <- TBL %>%
    filter(var == 'without') %>%
    mutate(year = as.character(year) %>% as.integer)

OBS <- TBL %>%
    filter(var == 'obs') %>%
    mutate(year = as.character(year) %>% as.integer)

yr_width <- 0.9
box_width <- yr_width*0.5*0.95
x_shift <- yr_width*0.25
box_colour <- c('With' = 'purple', 'Without' = 'red')
ww <- ggplot() +
    geom_boxplot(data = wTBL,
                 aes(x = year-x_shift, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     fill = 'With', group = year),
                 colour=box_colour['With'], alpha = 0.5, stat = 'identity', width = box_width) +
    geom_boxplot(data = woTBL,
                 aes(x = year+x_shift, ymin = bot, lower = low, middle = med, upper = upp, ymax = top,
                     fill = 'Without', group = year),
                 colour = box_colour['Without'], alpha = 0.5, stat = 'identity', width = box_width) +
    geom_step(data = OBS, aes(x = year-0.5, y = obs, colour = 'Observed')) +
    scale_fill_manual(values = box_colour) +
    scale_colour_manual(values = c('Observed' = 'black')) +
    labs(x='year',fill = NULL, colour = NULL)

# draw a legend by hand
my_legend <- gtable(widths = unit(c(0.15,1), 'in'), heights = unit(c(0.25,0.25,0.25), 'in'))
obsdf<-data.frame(colour = 'black', fill = 'black',
                  alpha = 1, linetype = 1, stringsAsFactors = FALSE)
bx1df<-data.frame(colour = box_colour['With'], fill = box_colour['With'], alpha = 0.5, stringsAsFactors = FALSE)
bx2df<-data.frame(colour = box_colour['Without'], fill = box_colour['Without'], alpha = 0.5, stringsAsFactors = FALSE)
my_legend <- gtable_add_grob(my_legend, draw_key_path(data = obsdf, params = list(arrow=NULL)),
                             t = 1, b = 1, l = 1, r = 1)
my_legend <- gtable_add_grob(my_legend, draw_key_boxplot(data = bx1df), t = 2, b = 2, l = 1, r = 1)
my_legend <- gtable_add_grob(my_legend, draw_key_boxplot(data = bx2df), t = 3, b = 3, l = 1, r = 1)
my_legend <- gtable_add_grob(my_legend, textGrob('Obs', gp = gpar(fontsize = 8), just = 'left'),
                             t = 1, b = 1, l = 2, r = 2)
my_legend <- gtable_add_grob(my_legend, textGrob('With', gp = gpar(fontsize = 8), just = 'left'),
                             t = 2, b = 2, l = 2, r = 2)
my_legend <- gtable_add_grob(my_legend, textGrob('Without', gp = gpar(fontsize = 8), just = 'left'),
                             t = 3, b = 3, l = 2, r = 2)

my_plot <- gtable(widths = unit(c(5.75,1.25), 'in'), heights = unit(7, 'in'))

my_plot <- gtable_add_grob(my_plot, ggplotGrob(ww + theme(legend.position = 'none')), t = 1, b = 1, l = 1, r = 1)
my_plot <- gtable_add_grob(my_plot, my_legend, t = 1, b = 1, l = 2, r = 2)

grid.newpage()
grid.draw(my_plot)

Created on 2020-08-25 by the reprex package (v0.3.0)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.