Visualise 2x2 table in ggplot with mosaic plot.

Hi All,

I would like to visualise 2x2 table in ggplot2. Here is the code below how I did it in vcd package, but can't create it in ggplot2. Any ideas would be much appreciated as I should probably use ggmosaic with geom_mosaic, but I am stuck a bit trying to figure it out. Please help.

``` r
library(vcd)
#> Loading required package: grid

 
data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))

dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                       Measurement_2 = c("Time2_yes", "Time2_no"))

percentages <- round(100*prop.table(data1), 2)

etiquettes <- as.table(matrix(paste0(data1, "; ", percentages, "%"), 2, 2))

dimnames(etiquettes) <- dimnames(data1)

mosaic(data1, 
       pop = F, shade = T, colorize = T, 
       gp = gpar(fill = matrix(c("grey", "#756bb1", "#756bb1", "grey"), 2, 2))) 

labeling_cells(text = etiquettes, margin = 0)(data1)

Created on 2020-03-07 by the reprex package (v0.3.0)


My desired output would be like this, with labels (numbers, percentages) in each rectangle:

Here's a first draft (missing correct proportions and sub-labels) using the ggmosaic package

library(ggmosaic)
#> Loading required package: ggplot2

data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))

dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                       Measurement_2 = c("Time2_yes", "Time2_no"))

percentages <- round(100*prop.table(data1), 2)

etiquettes <- as.table(matrix(paste0(data1, "; ", percentages, "%"), 2, 2))

dimnames(etiquettes) <- dimnames(data1)

to_plot <- as.data.frame(etiquettes)

ggplot(data = to_plot) +
   geom_mosaic(aes(x = product(Measurement_1,Measurement_2), fill=Freq), na.rm=TRUE) + 
   labs(x = "Measurement_1", y = "Measurement_2")

Created on 2020-03-07 by the reprex package (v0.3.0)

Hi @Andrzej: Hopefully between @technocrat's code and mine, you can find a solution?

data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))
dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                        Measurement_2 = c("Time2_yes", "Time2_no"))
library(tidyverse)
library(ggmosaic)
p <- 
ggplot(data1 %>% as_tibble()) +
  geom_mosaic(
    aes(weight = n, x = product(Measurement_1), fill = Measurement_2)
  ) 
p +
  geom_text(
    # extract rectangle centers, add labels
    data = 
      # 'layer_data(p, 1)' extracts data frame with data from 1st layer of p
      layer_data(p, 1) %>% 
      select(xmin:ymax) %>% 
      mutate(m.x = (xmin + xmax)/2, m.y =  (ymin + ymax)/2) %>% 
      select(m.x, m.y)  %>% 
      mutate(string = c(letters[1:4])),
    # set label locations to centers, set labels to strings
    aes(x = m.x, y = m.y, label = string)
  )

Created on 2020-03-07 by the reprex package (v0.3.0)

1 Like

Thank you very much indeed @technocrat and @dromano.

Is it possible somehow to place an x-axis on the top of ggplot:

`

In general, you should be able add + scale_x_continuous(position = "top"), or *discrete* instead of *continuous*, but I can't seem to get it to work. What do you think, @technocrat? Could it be a clash between ggmosiac and ggplot2?

1 Like

I'm a bit stuck there, too. I've only nibbled away at the other edges. The lack of x and y axis titles is a reprex artificat

# reproducing @drromano's solution
suppressPackageStartupMessages(library(ggmosaic)) 
suppressPackageStartupMessages(library(tidyverse)) 
data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))
dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                        Measurement_2 = c("Time2_yes", "Time2_no"))

p <- ggplot(data1 %>% as_tibble()) +
  geom_mosaic(
    aes(weight = n, x = product(Measurement_1), fill = Measurement_2)
  ) 
q <- p +
  geom_text(
    # extract rectangle centers, add labels
    data = 
      # 'layer_data(p, 1)' extracts data frame with data from 1st layer of p
      layer_data(p, 1) %>% 
      select(xmin:ymax) %>% 
      mutate(m.x = (xmin + xmax)/2, m.y =  (ymin + ymax)/2) %>% 
      select(m.x, m.y)  %>% 
      mutate(string = c(letters[1:4])),
    # set label locations to centers, set labels to strings
    aes(x = m.x, y = m.y, label = string)
  )

# embellishment

# In `ggplot2`, the `theme()` function gives very fine-grained control.

# extract names of two x axes
m1_lab <- attributes(attributes(data1)$dimnames[1])[[1]]
m2_lab <- attributes(attributes(data1)$dimnames[2])[[1]]

# stuck here: move x axes to top

# remove gray background

q + theme_minimal() +

# remove spurious x and y axes label

labs (x = NULL, y = NULL) +

# remove unneeded legend

guides(fill = FALSE) +

# remove grid lines
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank() 
)

Created on 2020-03-07 by the reprex package (v0.3.0)

Going back to the original question, this takes care of the axes (almost; would need to tweak font appearance, plus replace letters with percentages)

# reproducing @drromano's solution
suppressPackageStartupMessages(library(ggmosaic)) 
suppressPackageStartupMessages(library(tidyverse)) 
data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))
dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                        Measurement_2 = c("Time2_yes", "Time2_no"))

p <- ggplot(data1 %>% as_tibble()) +
  geom_mosaic(
    aes(weight = n, x = product(Measurement_1), fill = Measurement_2)
  ) 
q <- p +
  geom_text(
    # extract rectangle centers, add labels
    data = 
      # 'layer_data(p, 1)' extracts data frame with data from 1st layer of p
      layer_data(p, 1) %>% 
      select(xmin:ymax) %>% 
      mutate(m.x = (xmin + xmax)/2, m.y =  (ymin + ymax)/2) %>% 
      select(m.x, m.y)  %>% 
      mutate(string = c(letters[1:4])),
    # set label locations to centers, set labels to strings
    aes(x = m.x, y = m.y, label = string)
  )

# embellishment

# In `ggplot2`, the `theme()` function gives very fine-grained control.

# extract names of two x axes
m1_lab <- attributes(attributes(data1)$dimnames[1])[[1]]
m2_lab <- attributes(attributes(data1)$dimnames[2])[[1]]


# remove gray background

q + theme_minimal() +

# place Measurement_2 as y-axis, omit x-axis

labs (y = m2_lab, x = NULL) +

# place Measurement_1 as title

ggtitle(m1_lab) + theme(plot.title = element_text(hjust = 0.5)) +

# remove unneeded legend

guides(fill = FALSE) +

# remove grid lines
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank() 
)

Created on 2020-03-07 by the reprex package (v0.3.0)

It turns out there was a clash between the packages, and ggmosaic requires its own special axis-placement command: scale_x_productlist()!

data1 <- as.table(matrix(c(212, 256, 144, 707), 2, 2))
dimnames(data1) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                        Measurement_2 = c("Time2_yes", "Time2_no"))
library(ggmosaic)
#> Loading required package: ggplot2
library(tidyverse)
p <- 
  ggplot(data1 %>% as_tibble()) +
  geom_mosaic(
    aes(weight = n, x = product(Measurement_1), fill = Measurement_2)
  ) + 
  # move x-axis to top
  scale_x_productlist(position = 'top')
p +
  geom_text(
    # extract rectangle centers, add labels
    data = 
      # extract rectangle centers, add strings for labels
      ## 'layer_data(p, 1)' extracts data frame with data from 1st layer of p
      layer_data(p, 1) %>% 
      select(xmin:ymax) %>% 
      mutate(m.x = (xmin + xmax)/2, m.y =  (ymin + ymax)/2) %>% 
      select(m.x, m.y)  %>% 
      mutate(string = c(letters[1:4])),
    # set label locations to centers, set labels to strings
    aes(x = m.x, y = m.y, label = string)
  )

Created on 2020-03-07 by the reprex package (v0.3.0)

1 Like

Thank you @technocrat and @dromano for your inspiration.
I have found another alternative example:

library(reshape2)
library(rvg)
library(officer)
#> Warning: package 'officer' was built under R version 3.6.3
library(ggmosaic)
#> Loading required package: ggplot2
library(ggrepel)


x <- matrix(c(212, 256, 144, 707), 2, 2)


dimnames(x) <- list(Measurement_1 = c("Time1_yes", "Time1_no"),
                    Measurement_2 = c("Time2_yes", "Time2_no"))

df <- reshape2::melt(x)

p <- ggplot(df) + 
  geom_mosaic(aes(weight = value, 
                  x = product(Measurement_1, Measurement_2)),
              fill = c("#6b5dd5", "grey80", "grey80", "#6b5dd5")) + 
  labs(x = "Measurement_2", y = "Measurement_1")

df2 <- ggplot_build(p)$data[[1]]
#> New names:
#> * `1` -> `1...1`
#> * `1.1` -> `1.1...2`
#> * `1` -> `1...3`
#> * `1.1` -> `1.1...4`
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `1...1` -> `1...3`
#> * `1.1...2` -> `1.1...4`
#> * `1...3` -> `1...5`
#> * ...

df2$pr <- round(100*df2$.wt/sum(df2$.wt), 2)

df2$lab <- paste0(df2$.wt, "; ", df2$pr, "%")

p + geom_label(data = df2, 
              aes(x = (xmin+xmax)/2, y = (ymin+ymax)/2, label = lab))
#> New names:
#> * `1` -> `1...1`
#> * `1.1` -> `1.1...2`
#> * `1` -> `1...3`
#> * `1.1` -> `1.1...4`
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `1...1` -> `1...3`
#> * `1.1...2` -> `1.1...4`
#> * `1...3` -> `1...5`
#> * ...

p + geom_label(
  data = df2,
  aes(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2, label = lab)
) +
  geom_label(
    data = df2,
    aes(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2, label = label)
  ) +
  geom_label_repel(aes(x = (df2$xmin + df2$xmax) / 2, y = (df2$ymin + df2$ymax) / 2, label = df2$lab),
    nudge_x = 0.09,
    direction = "y",
    hjust = 0
  )
#> New names:
#> * `1` -> `1...1`
#> * `1.1` -> `1.1...2`
#> * `1` -> `1...3`
#> * `1.1` -> `1.1...4`
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `1...1` -> `1...3`
#> * `1.1...2` -> `1.1...4`
#> * `1...3` -> `1...5`
#> * ...


### The above graph will be saved now as gg object:
  gg <- p + geom_label(
  data = df2,
  aes(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2, label = lab)
) +
  geom_label(
    data = df2,
    aes(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2, label = label)
  ) +
  geom_label_repel(aes(x = (df2$xmin + df2$xmax) / 2, y = (df2$ymin + df2$ymax) / 2, label = df2$lab),
    nudge_x = 0.09,
    direction = "y",
    hjust = 0
  )

### and then I exported it to PowerPoint to do some "improvements":
read_pptx() %>% 
  add_slide(layout = "Title and Content", master = "Office Theme") %>% 
  ph_with(dml(ggobj = gg), location = ph_location_type(type = "body")) %>% 
  print(target = "assets_demo_rvg.pptx")

Created on 2020-03-08 by the reprex package (v0.3.0)

which gave me this:

that looks pretty similar to initial 2x2 table:

obraz

1 Like

Hi, one additional interesting thing,
I am not able to add breaks description on the mosaic plot.
Even when I try the code from the author's webiste:
https://github.com/haleyjeppson/ggmosaic

this code gives this plot (with breaks description):

but in my RStudio the same code gives me this plot (without brakes description):

Any idea why is that ? I use windows 10 x64, R 3.6.2, RStudio 1.4.52 and 1.3.911 (preview).

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.