Missing values not plotting with custom fill aesthetic scale

Hello RStudio.Community! I’m hoping you can help me with this issue I ran into while creating a custom color scale for ggplot2. I’m sorry for the long-winded post–I followed the excellent posts from Robert Franssen, Simon Jackson, Maddie Pickens, and the well-written documentation for ggplot2 :pray: :pray:

Load packages

Below are the packages I’m using:

library(tidyverse)
library(scales)
library(grDevices)
library(NHANES)
library(janitor)

Colors, palettes, scales

# define colors ----
pckg_colors <- c(
  ## dark blues ----
  `dark_blue_t3` = "#325777",
  `dark_blue_t2` = "#194266",
    `dark_blue_primary` = "#002E56", # primary
  `dark_blue_s2` = "#00294d",
  `dark_blue_s3` = "#002444",
  ## purples ----
  `purple_t3` = "#5d63ac",
  `purple_t2` = "#4950a2",
      `purple_primary` = "#353D98", # primary
  `purple_s2` = "#2f3688",
  `purple_s3` = "#2a3079",
  ## blues ----
  `blue_t3` = "#3272af",
  `blue_t2` = "#1961a5",
      `blue_primary` = "#00509C", # primary
  `blue_s2` = "#00488c",
  `blue_s3` = "#00407c",
  ## violets ----
  `violet_t3` = "#ba4da4",
  `violet_t2` = "#b13799",
      `violet_primary` = "#A9218E", # primary
  `violet_s2` = "#981d7f",
  `violet_s3` = "#871a71",
  ## reds ----
  `red_t3` = "#f15971",
  `red_t2` = "#ef445f",
      `red_primary` = "#EE304E", # primary
  `red_s2` = "#d62b46", 
  `red_s3` = "#be263e",
  ## oranges ----
  `orange_t3` = "#f4845a",
  `orange_t2` = "#f37545",
      `orange_primary` = "#F26631", # primary
  `orange_s2` = "#d95b2c",
  `orange_s3` = "#c15127")
# Define function for extracting colors ----
pckg_cols <- function(...) {
  cols <- c(...)
  if (is.null(cols))
    return(pckg_colors)
  pckg_colors[cols]
}
# Define list of palettes ----
pckg_palettes <- list(
  `primary` = pckg_cols(
    "dark_blue_primary", "purple_primary", "blue_primary", 
    "violet_primary", "red_primary", "orange_primary"
  ),
  `light` = pckg_cols(
    "dark_blue_t3", "dark_blue_t2", "purple_t3", "purple_t2", 
    "blue_t3", "blue_t2", "violet_t3", "violet_t2", 
    "red_t3", "red_t2", "orange_t3", "orange_t2"
  ),
  `dark` = pckg_cols(
    "dark_blue_s2", "dark_blue_s3", "purple_s2", "purple_s3", 
    "blue_s2", "blue_s3", "violet_s2", "violet_s3", 
    "red_s2", "red_s3", "orange_s2", "orange_s3"
  ),
  `all` = pckg_cols(
    "dark_blue_t3", "dark_blue_t2", "dark_blue_primary", 
    "dark_blue_s2", "dark_blue_s3", 
    "purple_t3", "purple_t2", "purple_primary", 
    "purple_s2", "purple_s3", 
    "blue_t3", "blue_t2", "blue_primary", 
    "blue_s2", "blue_s3", 
    "violet_t3", "violet_t2", "violet_primary", 
    "violet_s2", "violet_s3", 
    "red_t3", "red_t2", "red_primary", 
    "red_s2", "red_s3",
    "orange_t3", "orange_t2", "orange_primary", 
    "orange_s2", "orange_s3"
  )
)
# Define function for extracting colors ----
pckg_pal <- function(palette = "main", reverse = FALSE, ...) {
  pal <- pckg_palettes[[palette]]
  if (reverse) pal <- rev(pal)
  grDevices::colorRampPalette(colors = pal, ...)
}
# Define color scale 
scale_color_pckg <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
    pal <- pckg_pal(palette = palette, reverse = reverse)
    
    if (discrete) {
      ggplot2::discrete_scale(aesthetics = "colour", 
        scale_name = paste0("pckg_", palette), palette = pal, ...)
    } else {
      ggplot2::scale_color_gradientn(colours = pal(256), ...)
    }
}
# Define fill scale 
scale_fill_pckg <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
  pal <- pckg_pal(palette = palette, reverse = reverse)
  if (discrete) {
    ggplot2::discrete_scale(aesthetics = "fill", 
      scale_name = paste0("pckg_", palette), palette = pal, ...)
  } else {
    ggplot2::scale_fill_gradientn(colours = pal(256), ...)
  }
}

Test these below using the nhanes data (a subset of the NHANES data).

nhanes <- NHANES::NHANES %>% janitor::clean_names()
ggplot(data = nhanes, 
  mapping = aes(x = height, y = weight)) +
  geom_point(alpha = 1/5, color = pckg_cols("red_s2")) 
## Warning: Removed 366 rows containing missing values (geom_point).

test-red_s2-1

ggplot(data = nhanes, 
  mapping = aes(x = height, y = weight)) +
  geom_point(alpha = 1/5, color = pckg_cols("purple_t2")) 
## Warning: Removed 366 rows containing missing values (geom_point).

test-purple_t2-1

On both of these I see:
## Warning: Removed 366 rows containing missing values (geom_point).

Color aesthetic

ggplot(data = nhanes, 
  mapping = aes(x = height, y = weight, color = age)) +
  geom_point() + 
  scale_color_pckg(discrete = FALSE, palette = "light")
## Warning: Removed 366 rows containing missing values (geom_point).

test-scale_color_pckg-discrete-1

I also see
## Warning: Removed 366 rows containing missing values (geom_point).

Fill aesthetic

When I test the fill aesthetic, I pass it two categorical variables (no missing values).

nhanes %>% 
  count(race1)
## # A tibble: 5 × 2
##   race1        n
##   <fct>    <int>
## 1 Black     1197
## 2 Hispanic   610
## 3 Mexican   1015
## 4 White     6372
## 5 Other      806
ggplot(data = nhanes, 
  mapping = aes(x = race1, fill = race1)) +
    geom_bar() +
    scale_fill_pckg(palette = "light", guide = "none")

test-scale_fill_pckg-light-1

But when I can pass it two variables with missing values…

nhanes %>% 
  count(race3)
## # A tibble: 7 × 2
##   race3        n
##   <fct>    <int>
## 1 Asian      288
## 2 Black      589
## 3 Hispanic   350
## 4 Mexican    480
## 5 White     3135
## 6 Other      158
## 7 <NA>      5000
ggplot(data = nhanes, 
  mapping = aes(x = race3, fill = race3)) +
    geom_bar() +
    scale_fill_pckg(palette = "primary", guide = "none")

test-scale_fill_pckg-primary-1

Here I see the missing values for race3 aren’t plotted with this scale? Do I need to provide an argument to na.translate or na.value in the discrete_scale() function?

Any help you can provide would be greatly appreciated! :smiley: :smiley:

good question, would love to see a reply to this!

1 Like

Hi RStudio.Community!

I just wanted to follow up and say I figured this out by adding the na.value = "grey50" argument to both scale functions.

scale_color_pckg <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
    pal <- pckg_pal(palette = palette, reverse = reverse)
    
    if (discrete) {
      ggplot2::discrete_scale(aesthetics = "colour", 
        scale_name = paste0("pckg_", palette), palette = pal, na.value = "grey50", ...)
    } else {
      ggplot2::scale_color_gradientn(colours = pal(256), na.value = "grey50", ...)
    }
}
scale_fill_pckg <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
  pal <- pckg_pal(palette = palette, reverse = reverse)
  if (discrete) {
    ggplot2::discrete_scale(aesthetics = "fill", 
      scale_name = paste0("pckg_", palette), palette = pal, na.value = "grey50", ...)
  } else {
    ggplot2::scale_fill_gradientn(colours = pal(256), na.value = "grey50", ...)
  }
}
ggplot(data = nhanes, 
  mapping = aes(x = race3, fill = race3)) +
    geom_bar() +
    scale_fill_pckg(palette = "primary", guide = "none")