Hi! If I'm understanding you correctly, reshaping your data as follows might be what you're looking for.
The second left_join() isn't strictly necessary, but without NAs the columns in your plot might have different widths.
library(tidyverse)
set.seed(123)
gene_to_subtype <- list(Gfra2= c("Tyrosine Hydroxylase", "Non-peptidergic 2"),
Mrgpra3 = c("Non-peptidergic 2"),
Mrgprd = c("Non-peptidergic 1"),
Sst = c("Non-peptidergic 3"),
Piezo2 = c("Tyrosine Hydroxylase"),
Cacna1h = c("Neurofilament 1", "Neurofilament 2"),
Necab2 = c("Neurofilament 2"),
Fam19a1 = c("Neurofilament 3", "Peptidergic 2"),
Ldhb = c("Neurofilament 1", "Neurofilament 2", "Neurofilament 3", "Neurofilament 4", "Neurofilament 5")) %>%
enframe(name = 'gene_id', value = 'subtype') %>%
unnest()
dummy_data <- distinct(gene_to_subtype, gene_id) %>%
mutate(log2.FC. = rnorm(n = length(gene_id), 1))
(df <- left_join(dummy_data, gene_to_subtype) %>%
left_join(expand(gene_to_subtype, gene_id, subtype), .)) # explicit NAs
#> Joining, by = "gene_id"
#> Joining, by = c("gene_id", "subtype")
#> # A tibble: 90 x 3
#> gene_id subtype log2.FC.
#> <chr> <chr> <dbl>
#> 1 Cacna1h Neurofilament 1 2.72
#> 2 Cacna1h Neurofilament 2 2.72
#> 3 Cacna1h Neurofilament 3 NA
#> 4 Cacna1h Neurofilament 4 NA
#> 5 Cacna1h Neurofilament 5 NA
#> 6 Cacna1h Non-peptidergic 1 NA
#> 7 Cacna1h Non-peptidergic 2 NA
#> 8 Cacna1h Non-peptidergic 3 NA
#> 9 Cacna1h Peptidergic 2 NA
#> 10 Cacna1h Tyrosine Hydroxylase NA
#> # … with 80 more rows
# ggplot(df, aes(x = gene_id, y = log2.FC.)) +
# geom_col(aes(fill = subtype), position = 'dodge')
Created on 2019-03-13 by the reprex package (v0.2.1)