Mosaic Plot in ggplot2

Hello,

I am a very beginner in R.
I want to create a plot with individuals A, B and C in y, position in x (it's a position not a value, so I don't want a proportional representation on the graph). Each individual has value 0, 1 or 2 for each position.
I tried to draw a mosaic plot in R-studio but I received an error message:
"Aesthetics must be either length 1 or the same as the data (62): y, fill"

library(ggplot2)
example <- read.csv("EXAMPLE.csv", header = TRUE, sep=";")
ggplot(data = example, mapping = aes(x = position, y = factor(c("A", "B", "C")), fill = c("0","1","2"))) +
  geom_tile() +
  scale_fill_manual(
    name = NULL,
    breaks = c(0, 1, 2),
    labels = c("0: NC", "1: M", "2: S"),
    values = c("grey", "red", "green")
  )+
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(
    x = "Position ...",
    y = NULL,
    title = "Test ..."
  )!

Can you help me, please?

Please find attached, the result that I want to get and the data.

Many thanks for your answers!

Hi, welcome!
We don't have access to your local files, so we can't reproduce your issue, to help us help you, could you please share sample data on a copy/paste friendly format?

You can take a look to this reprex guide to learn how to do it

1 Like

Thank you for your answer!

Here is a minimal dataset:

data.frame(
    position = c(24175527L, 24179406L, 24190977L, 24231758L, 24233489L,
                 24233909L, 31011515L, 31028176L, 31029290L, 31033137L,
                 31038728L, 31138589L, 31218515L, 31227699L, 31237762L, 31244405L,
                 31290566L, 31385324L, 31422419L, 31442307L, 31868578L, 31879397L,
                 31911466L, 31911845L, 31919659L, 31976319L, 31978675L, 32020257L,
                 32107294L, 32110438L, 32186858L, 32211661L, 32218886L, 32227081L,
                 32230762L, 32257621L, 32259471L, 32278235L, 32290801L,
                 32293091L, 32303015L, 32317364L, 32380996L, 32384863L, 32400177L,
                 32439812L, 32450026L, 32454346L, 32561619L, 32563263L, 32575661L,
                 32580914L, 32609944L, 32698919L, 32808805L, 32887091L, 32890041L,
                 32909341L, 32943611L, 33004667L, 33025319L, 33026756L),
           A = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L),
           B = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L),
           C = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 0L,
                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)
)

This will take you very close

library(tidyverse)

sample_df <- data.frame(
    position = c(24175527L, 24179406L, 24190977L, 24231758L, 24233489L,
                 24233909L, 31011515L, 31028176L, 31029290L, 31033137L,
                 31038728L, 31138589L, 31218515L, 31227699L, 31237762L, 31244405L,
                 31290566L, 31385324L, 31422419L, 31442307L, 31868578L, 31879397L,
                 31911466L, 31911845L, 31919659L, 31976319L, 31978675L, 32020257L,
                 32107294L, 32110438L, 32186858L, 32211661L, 32218886L, 32227081L,
                 32230762L, 32257621L, 32259471L, 32278235L, 32290801L,
                 32293091L, 32303015L, 32317364L, 32380996L, 32384863L, 32400177L,
                 32439812L, 32450026L, 32454346L, 32561619L, 32563263L, 32575661L,
                 32580914L, 32609944L, 32698919L, 32808805L, 32887091L, 32890041L,
                 32909341L, 32943611L, 33004667L, 33025319L, 33026756L),
    A = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L),
    B = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L),
    C = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 0L,
          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)
)

sample_df %>% 
    gather(variable, value, A:C) %>% 
    ggplot(aes(x = as.factor(position),
               y = reorder(variable, desc(variable)),
               fill = factor(value, levels = c(2, 1, 0)))
           ) +
    geom_tile() +
    theme_minimal() +
    theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1),
          axis.title = element_blank(),
          legend.title = element_blank())

Created on 2019-11-24 by the reprex package (v0.3.0.9000)

2 Likes

Great job! Many thanks!
Just an other little question if you have time:
How to represent just one position every five positions to make it more readable?

Well, this is a dirty hack but it works

sample_df %>% 
    gather(variable, value, A:C) %>% 
    ggplot(aes(x = as.factor(position),
               y = reorder(variable, desc(variable)),
               fill = factor(value, levels = c(2, 1, 0)))
    ) +
    geom_tile() +
    theme_minimal() +
    theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1),
          axis.title = element_blank(),
          legend.title = element_blank()) +
    scale_x_discrete(breaks = sample_df$position[c(TRUE, FALSE, FALSE, FALSE, FALSE)])

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.