ggplot2 Viewport/Axis Issue

I'm having a couple of issues plotting a line graph in ggplot2. I'm plotting a very wide range of values (lowest is near zero, highest in the millions) and noticing that the line ends before touching the edge of the plot area. This is causing me problems because I need to add brackets to the plot in the positions indicated by the dots. I don't want to do this manually, so I want the brackets to be placed using quantile values that I've generated as the x,y coordinates.

The problem seems to be that there is "padding" around the plot area; the x,y coordinates of the plot do not correspond with the plotted values. The coordinates are all "off" by a bit due to the margins around the plot area. If you look at the reprex, the bottom left bracket goes too far to the left(past the plotted line) and the upper right bracket goes too far up.

How can I fix this? My ideal solution would be to have the line touch the edges of the plot area and have the quantile values usable as x,y coordinates.

library(ggplot2)
library(ggrepel)
library(pBrackets)

bracketsGrob <- function(...){
  l <- list(...)
  e <- new.env()
  e$l <- l
  grid:::recordGrob(  {
    do.call(grid.brackets, l)
  }, e)
}

 set.seed(1)

purchased <- rnorm(10000, mean=5000, sd=2500)
purchased <- purchased^2

customer.num <- 1:10000

test_df <- data.frame(customer.num,purchased)

test_df$purchased <- sort(test_df$purchased, decreasing = FALSE)

test_df$cuml.purchased <- cumsum(test_df$purchased)
test_df$purch.pct.rank <- percent_rank(test_df$purchased)
test_df$cuml.purch.pct <- cumsum(test_df$purchased) / sum(test_df$purchased)


quants.x <- quantile(test_df$purch.pct.rank, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)
quants.y <- quantile(test_df$cuml.purch.pct, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)

curve <- ggplot(test_df, aes(x=purch.pct.rank, y=cuml.purch.pct, label=sprintf("%0.2f", round(cuml.purch.pct, digits = 2))))+geom_line(size=2, color="red")

curve <- curve+geom_point(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                          aes(x=purch.pct.rank, y=cuml.purch.pct))+
  
  geom_label_repel(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                   aes(label=sprintf("%0.2f", round(cuml.purch.pct,digits=2), hjust=0, vjust=0.5)))

test.y <- test_df[test_df$cuml.purch.pct %in% quants.y,]
test.x <- test_df[test_df$purch.pct.rank %in% quants.x,]


br1 <- bracketsGrob(0, quants.y[1], quants.x[1], quants.y[1], h=0.05,  lwd=2, col="blue")
br2 <- bracketsGrob(quants.x[2], quants.y[2], quants.x[2], 1, h=0.05,  lwd=2, col="blue")

curve+annotation_custom(br1)+annotation_custom(br2)

Could you please run this through reprex (short for reproducible example)? That way we can see the output images as well, which will make it easier to troubleshoot.

Thanks!

install.packages("reprex")

If you've never heard of a reprex before, you might want to start by reading the tidyverse.org help page. The reprex dos and don'ts are also useful.

There's also a nice FAQ on how to do a minimal reprex for beginners, below:

What to do if you run into clipboard problems

If you run into problems with access to your clipboard, you can specify an outfile for the reprex, and then copy and paste the contents into the forum.

reprex::reprex(input = "fruits_stringdist.R", outfile = "fruits_stringdist.md")

For pointers specific to the community site, check out the reprex FAQ.

Thanks for the pointer! Improved reprex below.

library(ggplot2)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggrepel)
library(pBrackets)

bracketsGrob <- function(...){
  l <- list(...)
  e <- new.env()
  e$l <- l
  grid:::recordGrob(  {
    do.call(grid.brackets, l)
  }, e)
}

 set.seed(1)

purchased <- rnorm(10000, mean=5000, sd=2500)
purchased <- purchased^2

customer.num <- 1:10000

test_df <- data.frame(customer.num,purchased)

test_df$purchased <- sort(test_df$purchased, decreasing = FALSE)

test_df$cuml.purchased <- cumsum(test_df$purchased)
test_df$purch.pct.rank <- percent_rank(test_df$purchased)
test_df$cuml.purch.pct <- cumsum(test_df$purchased) / sum(test_df$purchased)


quants.x <- quantile(test_df$purch.pct.rank, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)
quants.y <- quantile(test_df$cuml.purch.pct, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)

curve <- ggplot(test_df, aes(x=purch.pct.rank, y=cuml.purch.pct, label=sprintf("%0.2f", round(cuml.purch.pct, digits = 2))))+geom_line(size=2, color="red")

curve <- curve+geom_point(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                          aes(x=purch.pct.rank, y=cuml.purch.pct))+
  
  geom_label_repel(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                   aes(label=sprintf("%0.2f", round(cuml.purch.pct,digits=2), hjust=0, vjust=0.5)))

test.y <- test_df[test_df$cuml.purch.pct %in% quants.y,]
test.x <- test_df[test_df$purch.pct.rank %in% quants.x,]


br1 <- bracketsGrob(x1=0, y1=quants.y[1], x2=quants.x[1], y2=quants.y[1], h=0.05,  lwd=2, col="blue")
br2 <- bracketsGrob(x1=quants.x[2], y1=quants.y[2], x2=quants.x[2], y2=1, h=0.05,  lwd=2, col="blue")

curve+annotation_custom(br1)+annotation_custom(br2)

Created on 2019-05-06 by the reprex package (v0.2.1)

Have you tried plotting with clip = "off"? The issue below is closed, as it has been implemented, but it'll give you a sense of what it should do.

https://twitter.com/ClausWilke/status/991542952802619392

You might also look at the margin adjustment options:

I tried setting clip to 'off' and playing with the margins, but unless I'm not understanding something, I don't think either quite does what I'm looking for. Clip allows for text/etc. to be displayed even when it spills outside the plot area, and margin adjustments tend to be used in a similar way-to expand the plot area, give titles/labels breathing room, etc.

What I am trying to do is constrain the plot area so that the grid coordinates conform to the plotted X,Y values. I had never noticed this before running into this issue, but it looks like ggplot pads the plot area. For example, in the lower left of the plot, there's an area to the left of zero on the x-axis and below 0 on the y-axis. If you click down there using the grid.locator() function from grid with units set to 'npc', you can see how the coordinates are off just a bit. Playing with the margins does not appear to change this.

What did end up working was supplying a set of xmin/xmax ymin/ymax arguments to annotation_custom(). If you set those using the same units as your data, it constrains the viewport to those limits and the brackets plot where they should! I figured this out just now as I was typing up a response, so thanks, your response was the catalyst for me to figure this out.

New reprex with arguments added to cause brackets to plot correctly:

library(ggplot2)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggrepel)
library(pBrackets)

bracketsGrob <- function(...){
  l <- list(...)
  e <- new.env()
  e$l <- l
  grid:::recordGrob(  {
    do.call(grid.brackets, l)
  }, e)
}

 set.seed(1)

purchased <- rnorm(10000, mean=5000, sd=2500)
purchased <- purchased^2

customer.num <- 1:10000

test_df <- data.frame(customer.num,purchased)

test_df$purchased <- sort(test_df$purchased, decreasing = FALSE)

test_df$cuml.purchased <- cumsum(test_df$purchased)
test_df$purch.pct.rank <- percent_rank(test_df$purchased)
test_df$cuml.purch.pct <- cumsum(test_df$purchased) / sum(test_df$purchased)


quants.x <- quantile(test_df$purch.pct.rank, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)
quants.y <- quantile(test_df$cuml.purch.pct, probs = c(0.5,0.9,0.95,0.99), na.rm=FALSE, names=TRUE, 3)

curve <- ggplot(test_df, aes(x=purch.pct.rank, y=cuml.purch.pct, label=sprintf("%0.2f", round(cuml.purch.pct, digits = 2))))+geom_line(size=2, color="red")

curve <- curve+geom_point(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                          aes(x=purch.pct.rank, y=cuml.purch.pct))+
  
  geom_label_repel(data=test_df[test_df$cuml.purch.pct %in% quants.y,],
                   aes(label=sprintf("%0.2f", round(cuml.purch.pct,digits=2), hjust=0, vjust=0.5)))

test.y <- test_df[test_df$cuml.purch.pct %in% quants.y,]
test.x <- test_df[test_df$purch.pct.rank %in% quants.x,]


br1 <- bracketsGrob(x1=0, y1=quants.y[1], x2=quants.x[1], y2=quants.y[1], h=0.05,  lwd=2, col="blue")
br2 <- bracketsGrob(x1=quants.x[2], y1=quants.y[2], x2=quants.x[2], y2=1, h=0.05,  lwd=2, col="blue")

curve+annotation_custom(br1,xmin= 0, xmax=1, ymin=0, ymax=1)+
  annotation_custom(br2, xmin= 0, xmax=1, ymin=0, ymax=1)

Created on 2019-05-07 by the reprex package (v0.2.1)

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.