stack order of geom_bar reversing order

I'm plotting the results of a state assessment of student learning. I have a stacked bar plot that shows the percent of students categorized into the test's 5 different proficiency levels-- c("Did Not Yet Meet", "Partially Met", "Approached", "Met","Exceeded" ). Percents of the three categories that did not meet the benchmark ("Met") are computed as negative percents so that the following plot is displayed.

All is well when plotting as a dodged bar, but when when they are stacked, the "Exceeded" category is plotted before the "Met" category. The legend displays in the correct order, but the plot does not. ggplot seems to be ordering the bar with negative values in the reverse order of those with positive values.

I have tried the kludge of mis-ordering my factor (setting factor 5 level as preceding factor 4 level) to get the bars to stack in the proper sequence. Of course, that works, but it puts my legend out of order.

This will be dynamically generated in shiny (it's a tiny part of a very large app) with options for stacking/dodging, slicing data with facets and filters, etc., so manually fabricating a legend will make for messy code.

Anyone know of a way to bypass ggplot's need to reverse stacking order when moving from negative to positive values? ...or of any way to make this work? Many thanks.

Here is a reprex
library(tidyverse)

create datafile ---------------------------------------------------------

ProficiencyLevel <- c("Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Met", "Exceeded", "Met", "Exceeded", "Met", "Exceeded" , "Met", "Exceeded" )
categoryPercent <- c(-0.10466272, -0.15630047, -0.25162533, -0.10447221, -0.16393284, -0.26625820, -0.09994016, -0.15219629, -0.25163375, -0.09522626, -0.14517377, -0.24564893, 0.38606153, 0.10134995, 0.38090988, 0.08442686, 0.39767804, 0.09855177, 0.40330958, 0.11064147)
EndYear <- c(2015, 2015, 2015, 2016, 2016, 2016, 2017, 2017, 2017, 2018, 2018, 2018, 2015, 2015, 2016, 2016, 2017, 2017, 2018, 2018)

testSummary <- tibble(ProficiencyLevel = ProficiencyLevel,
percentUpDn = categoryPercent,
EndYear = EndYear) %>%
mutate(ProficiencyLevel = factor(ProficiencyLevel,
levels = c("Did Not Yet Meet", "Partially Met", "Approached", "Met", "Exceeded"),
labels = c("1.Did Not Yet Meet", "2.Partially Met", "3.Approached", "4.Met", "5.Exceeded")))

Locking colors to factor values -----------------------------------------

colors5prof <- c("#C55859", "#ff7f0e", "#E7CD6A", "#2ca02c", "#1f77b4" )
names(colors5prof) <- c("1.Did Not Yet Meet", "2.Partially Met", "3.Approached", "4.Met", "5.Exceeded")

Plotting ----------------------------------------------------------------

ggplot()+
geom_bar(data = testSummary, aes(y=percentUpDn, x=EndYear, fill=ProficiencyLevel), stat="identity", alpha =.8)+
labs(title = "CMAS ELA: District",
y= "Proficiency Level",
x = "End Year")+
theme_minimal()+
theme(axis.ticks.y = element_line(size =1),
panel.grid = element_blank(),
plot.title = element_text(size = 25, color = "darkgrey"),
axis.title = element_text(size = 18, color = "darkgrey"),
legend.position = "bottom")+
scale_y_continuous(breaks = c(-1, 0, 1))+
geom_hline(yintercept=0)+
scale_fill_manual(values = colors5prof)+
coord_flip()

I am not proud of it and I do not really understand why it works, but....

library(ggplot2)
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
ProficiencyLevel <- c("Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Did Not Yet Meet", "Partially Met", "Approached", "Met", "Exceeded", "Met", "Exceeded", "Met", "Exceeded" , "Met", "Exceeded" )
categoryPercent <- c(-0.10466272, -0.15630047, -0.25162533, -0.10447221, -0.16393284, -0.26625820, -0.09994016, -0.15219629, -0.25163375, -0.09522626, -0.14517377, -0.24564893, 0.38606153, 0.10134995, 0.38090988, 0.08442686, 0.39767804, 0.09855177, 0.40330958, 0.11064147)
EndYear <- c(2015, 2015, 2015, 2016, 2016, 2016, 2017, 2017, 2017, 2018, 2018, 2018, 2015, 2015, 2016, 2016, 2017, 2017, 2018, 2018)

Levs <- c("Did Not Yet Meet", "Partially Met", "Approached", "Met", "Exceeded")
Labs <- c("1.Did Not Yet Meet", "2.Partially Met", "3.Approached", "4.Met", "5.Exceeded")
testSummary <- tibble(ProficiencyLevel = ProficiencyLevel,
                      percentUpDn = categoryPercent,
                      EndYear = EndYear)

colors5prof <- c("#C55859", "#ff7f0e", "#E7CD6A", "#2ca02c", "#1f77b4" )
names(colors5prof) <- c("1.Did Not Yet Meet", "2.Partially Met", "3.Approached", "4.Met", "5.Exceeded")

Neg <- filter(testSummary, percentUpDn < 0) %>% 
  mutate(ProficiencyLevel = factor(ProficiencyLevel, levels = Levs, labels = Labs, ordered = TRUE))
Pos <- filter(testSummary, percentUpDn > 0) %>% 
  mutate(ProficiencyLevel = factor(ProficiencyLevel, levels = Levs[5:1], labels = Labs[5:1], ordered = TRUE))

ggplot()+
  geom_col(data = Neg, aes(y=percentUpDn, x=EndYear, fill=ProficiencyLevel), alpha =.8) +
  geom_col(data = Pos, aes(y=percentUpDn, x=EndYear, fill=ProficiencyLevel), alpha =.8) +
  labs(title = "CMAS ELA: District",
       y= "Proficiency Level",
       x = "End Year")+
  theme_minimal()+
  theme(axis.ticks.y = element_line(size =1),
        panel.grid = element_blank(),
        plot.title = element_text(size = 25, color = "darkgrey"),
        axis.title = element_text(size = 18, color = "darkgrey"),
        legend.position = "bottom")+
  scale_y_continuous(breaks = c(-1, 0, 1)) +
  geom_hline(yintercept=0)+
  scale_fill_manual(values = colors5prof) +
  coord_flip()

Created on 2019-07-27 by the reprex package (v0.2.1)

Thanks, FJCC! This is fantastic.

I had tried separate plotting of the positive and negative values, but hadn't reversed the order of factor levels for the positive side.

I'm with you, in that I'm not sure I understand how this works. Seems like this would create something of a collision of factor orders. Regardless, it is easily implemented in Shiny. It does create make the dodged version of plot a little funky, but resetting the factor in a conditional is a pretty minor deviation in the code.

Again, many thanks for the solution.
Steve

...and if anyone understands how/why this works, I'd love to better understand what's happening.

Did you try using specialized packages for that? I've used https://www.rdocumentation.org/packages/likert/versions/1.3.5 a lot with success and there is also https://www.rdocumentation.org/packages/sjPlot/versions/2.6.3/topics/plot_likert which I never used, but just in general it seems to come up quite often with Social Science type of work.

2 Likes

Hi,

I found another workaround too, just for fun...
I asked @steveL how the code for the level change worked, and all I then did was manually flip the labels in the plot by rearranging the breaks. So this is what I did:

#Switch level 4 and 5
testSummary <- tibble(ProficiencyLevel = ProficiencyLevel,
                      percentUpDn = categoryPercent,
                      EndYear = EndYear) %>%
  mutate(ProficiencyLevel = factor(ProficiencyLevel,
                                   levels = c("Did Not Yet Meet", "Partially Met", "Approached", "Exceeded", "Met"),
                                   labels = c("1.Did Not Yet Meet", "2.Partially Met", "3.Approached", "5.Exceeded", "4.Met")))

#Switch the labels back in the legend
ggplot()+
  geom_bar(data = testSummary, aes(y=percentUpDn, x=EndYear, fill=ProficiencyLevel), stat="identity", alpha =.8)+
  labs(title = "CMAS ELA: District",
       y= "Proficiency Level",
       x = "End Year")+
  theme_minimal()+
  theme(axis.ticks.y = element_line(size =1),
        panel.grid = element_blank(),
        plot.title = element_text(size = 25, color = "darkgrey"),
        axis.title = element_text(size = 18, color = "darkgrey"),
        legend.position = "bottom")+
  scale_y_continuous(breaks = c(-1, 0, 1))+
  geom_hline(yintercept=0)+
  #Change the order of the levels in the breaks
  scale_fill_manual(values = colors5prof, breaks = levels(testSummary$ProficiencyLevel)[c(1:3, 5,4)])+
  coord_flip()

So now you have several work-arounds :slight_smile:
Grtz,
PJ

1 Like

PJ. This is very, helpful. I had tried to manually pass the vector that I had used for levels/labels (without success) but hadn't tried rearranging the labels of vector that I'd created. I may use FJCC's solution, as I think the mis-ordered factor could create some issues as I drill through the plot (I'm using ggiraph for interactivity in Shiny).

Thank, PJ.
Steve

1 Like

Thanks for the suggestion.
We did play around with the likert package, but we are using ggiraph/girafe in Shiny for rollovers and to drill through the plot to other plots and tables (this plot will actually display via a drill-through from a longitudinal plot of school/district means). Since ggiraph only works with certain geoms in ggplot, we've been trying to stick with ggplot/ggiraph.
Steve

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.