Bar plot with error bars

Hi all,

I am trying to make a grouped bar plot with two character variables and add error bars but for the life of me I cannot figure out how to "extract" the SEM from ddply to be used in the graph itself. Maybe I am over complicating it? Much appreciated to any help I can get!

library (ggplot2)
library(plyr)
library(dplyr)
library(reshape2)

FloodStage = c("Before", "Before","Before","Before","Before","Before", "Before","Before", "During","During","During","During","During","During","After","After","After","After","After","After","After","After","After", "After","After")
Treatment = c("Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected","Disconnected","Connected","Connected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected")
Scraper = c(1.438848921,2.577319588,6.091370558,11.81102362,7.692307692,7.792207792,21.875,7.462686567,5,7.894736842,2.051282051,13.58695652,36.80981595,18.49710983,4.624277457,5.844155844,1.169590643,2.34375,12.42236025,3.246753247,10.28571429,16.32653061,7.692307692,8,2.830188679)
df=data.frame(FloodStage, Treatment, Scraper)
df$FloodStage = factor(df$FloodStage, levels = c("Before", "During", "After"))
df$Treatment = factor(df$Treatment, levels = c("Connected", "Disconnected"))


melt = melt(df, id.vars=c("FloodStage", "Treatment"))
metric=ddply(melt, c("FloodStage", "Treatment", "variable"), summarise,
      mean = mean(value), sd = sd(value),
      sem = sd(value)/sqrt(length(value)))


ggplot(df) + 
  geom_bar(aes(x=Treatment, y=Scraper, fill = FloodStage), position = "dodge", width= 0.4, stat="identity") + 
  geom_errorbar(aes(ymin=Scraper-sem, ymax=Scraper+sem), width=.2,
                position=position_dodge(0.05)) +
  labs(x ="", y = "% Scraper", legend.title="") +
  guides(fill=guide_legend(title=NULL)) +
  theme_classic() +
  scale_fill_brewer(palette="Set1") +
  scale_y_continuous(limits = c(0, 45))

Your use of geom_bar() seems to be plotting the max value of Scraper. Is that what you intend? I used dplyr's summarize() to explicitly calculate the max and then used inner_join to line up the sem values. All I changed on your version of the plot is the y axis range so the error bars would fit.

library (ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.5.3
library(plyr)
library(dplyr)
library(reshape2)

FloodStage = c("Before", "Before","Before","Before","Before","Before", "Before","Before", "During","During","During","During","During","During","After","After","After","After","After","After","After","After","After", "After","After")
Treatment = c("Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected","Disconnected","Connected","Connected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected")
Scraper = c(1.438848921,2.577319588,6.091370558,11.81102362,7.692307692,7.792207792,21.875,7.462686567,5,7.894736842,2.051282051,13.58695652,36.80981595,18.49710983,4.624277457,5.844155844,1.169590643,2.34375,12.42236025,3.246753247,10.28571429,16.32653061,7.692307692,8,2.830188679)
df=data.frame(FloodStage, Treatment, Scraper)
df$FloodStage = factor(df$FloodStage, levels = c("Before", "During", "After"))
df$Treatment = factor(df$Treatment, levels = c("Connected", "Disconnected"))


melt = melt(df, id.vars=c("FloodStage", "Treatment"))
metric=ddply(melt, c("FloodStage", "Treatment", "variable"), summarise,
             mean = mean(value), sd = sd(value),
             sem = sd(value)/sqrt(length(value)))


ggplot(df) + 
  geom_bar(aes(x=Treatment, y=Scraper, fill = FloodStage), 
           position = "dodge", width= 0.4, stat="identity") + 
 # geom_errorbar(aes(ymin=Scraper-sem, ymax=Scraper+sem), width=.2,
  #              position=position_dodge(0.05)) +
  labs(x ="", y = "% Scraper", legend.title="") +
  guides(fill=guide_legend(title=NULL)) +
  theme_classic() +
  scale_fill_brewer(palette="Set1") +
  scale_y_continuous(limits = c(0, 50))


dfSummarize <- df %>% group_by(FloodStage, Treatment) %>% summarize(Scraper = max(Scraper))       
dfSummarize <- inner_join(dfSummarize, metric, by = c("FloodStage", "Treatment"))
ggplot(dfSummarize,aes(x=Treatment, y=Scraper, fill = FloodStage)) + 
  geom_col(position = "dodge", width= 0.4) + 
  geom_errorbar(aes(ymin=Scraper-sem, ymax=Scraper+sem), width=.4,
                position="dodge") + #
  labs(x ="", y = "% Scraper", legend.title="") +
  guides(fill=guide_legend(title=NULL)) +
  theme_classic() +
  scale_fill_brewer(palette="Set1") +
  scale_y_continuous(limits = c(0, 50))

Created on 2019-12-06 by the reprex package (v0.3.0.9000)

It was not my intention to plot the highest value. My intention was to plot the mean. I need to check some other graphs now -oops. Thanks so much for your help!

Here is the graph plotted with the mean!

Rplot03

In case you are interested, this is another approach

library(tidyverse)

FloodStage = c("Before", "Before","Before","Before","Before","Before", "Before","Before", "During","During","During","During","During","During","After","After","After","After","After","After","After","After","After", "After","After")
Treatment = c("Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected","Disconnected","Connected","Connected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Connected","Disconnected","Disconnected","Connected","Disconnected","Connected")
Scraper = c(1.438848921,2.577319588,6.091370558,11.81102362,7.692307692,7.792207792,21.875,7.462686567,5,7.894736842,2.051282051,13.58695652,36.80981595,18.49710983,4.624277457,5.844155844,1.169590643,2.34375,12.42236025,3.246753247,10.28571429,16.32653061,7.692307692,8,2.830188679)
df=data.frame(FloodStage, Treatment, Scraper)
df$FloodStage = factor(df$FloodStage, levels = c("Before", "During", "After"))
df$Treatment = factor(df$Treatment, levels = c("Connected", "Disconnected"))

df %>% 
  group_by(FloodStage, Treatment) %>%
  summarise(sem = sd(Scraper)/sqrt(n()),
            Scraper = mean(Scraper)) %>%
  ggplot(aes(x = Treatment, y = Scraper, fill = FloodStage)) + 
  geom_col(position = "dodge",
           width = 0.4) + 
  geom_errorbar(aes(ymin=Scraper-sem, ymax=Scraper+sem), width=.4,
                position = position_dodge()) +
  labs(x ="", y = "% Scraper", legend.title="") +
  guides(fill=guide_legend(title=NULL)) +
  theme_classic() +
  scale_fill_brewer(palette="Set1") +
  scale_y_continuous(limits = c(0, 50))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.