Help with geom_boxplot()!!!

How to show the outliers, without losing the graphic information of the boxplots.
These are the data and lines of code:

library(data.table)
library(ggplot2)
dt<-data.table(
  stringsAsFactors = FALSE,
  ODUFault = c("NO", "SI", "NO", "SI", "NO", "SI",
               "NO", "SI", "NO", "SI", "NO", "NO",
               "SI", "NO", "SI", "NO", "SI", "NO",
               "SI", "NO", "SI", "NO", "SI", "NO",
               "SI", "NO", "SI", "NO", "SI", "NO",
               "SI", "NO", "SI", "NO", "SI", "NO",
               "SI", "NO", "SI", "NO", "SI"),
  LastFault = c("SA", "SB", "SB", "SB", "SB", "SB",    
                "SB", "SB",  "SB", "SB", "SB", "SA",       
                "SB", "SB", "SB", "SB", "SB", "SB",    
                "SB",  "SB", "SB", "SB", "SB", "SB",    
                "SB", "SB", "SB", "SB",  "SB", "SB",    
                "SB",  "SB", "SC", "SC", "SB", "SB",    
                "SB", "SB", "SB", "SB", "SB" ),
  SubFlt = c("A", "B", "B", "B", "B", "B",          
             "B", "B", "B", "B", "B", "A",   
             "B", "B", "B", "B", "B", "B",          
             "B", "B",  "B", "B", "B", "B",          
             "B", "B", "B", "B", "B", "B",          
             "B", "B", "C", "C", "B","B",          
             "B", "B", "B", "B" ,"B"),
  Duration = c("00:09:40", "00:03:01", "00:06:58", "00:03:00", "00:06:59", "00:03:00", 
               "00:06:58", "00:03:01", "00:06:59", "00:02:59", "00:07:01", "00:00:12", 
               "00:03:06", "00:06:39", "00:02:59", "00:07:00", "00:03:01", "00:06:58", 
               "00:03:02", "00:06:57", "00:03:02", "00:06:57", "00:03:02", "00:06:57", 
               "00:02:57", "00:07:03", "00:03:00", "00:06:59", "00:03:00", "00:06:58", 
               "00:02:59", "00:07:29", "00:03:01", "00:06:29", "00:05:03", "00:04:56",
               "00:03:00", "00:06:59", "00:02:59", "00:07:00", "00:15:33")
)

dt[, Duration:=as.ITime(Duration)]
dtboxplot<-dt[which(ODUFault =="SI"),ggplot(.SD, aes(x=SubFlt, y=as.POSIXct(Duration, origin="1970-01-01", tz="GMT"),color=SubFlt)) + 
                #geom_boxplot() +
                stat_boxplot(geom = "errorbar",
                             width = 0.15,linetype = 2, # Line type
                             lwd = 0.5) +
                geom_boxplot(linetype = 2, # Line type
                             lwd = 0.5,outlier.colour="red", outlier.shape=8,
                             outlier.size=4,notch=FALSE)+
                scale_y_datetime(labels = date_format("%M:%S")) +
                xlab("SubFlt")+
                ylab("Time"),] #+,

dtboxplot

and these are all that I have achieved graphically:


in figure 1, it is the result of the lines of code shown;
In figure 2, when showing the outliers the graphic information of the boxplots is lost.
in figure3, is what I want to achieve, but filtering SubFlt "A"

I'm not sure you're losing information in panel B. It looks to me like the box plot is still there, but the quantiles are very narrow relative to the outliers(see the differences in the Y-axis between Figure 1 and Figure 2).

In this case, it may be hard to show the boxplot and outliers simultaneously in a single panel. You might consider this option, enabled by the facet_zoom() function from the ggforce package, which allows you to show a zoomed-in panel alongside the original scale to provide a global context:

plot_dt <- dt[ODUFault =="SI"]
plot_dt$Duration = as.POSIXct(plot_dt$Duration, origin = "1970-01-01", tx = "GMT")

library(ggforce)

zoom_lim <- as.POSIXct(as.ITime(c("00:02:50", "00:03:10")), origin = "1970-01-01", tx = "GMT")

dtboxplot<-ggplot(plot_dt, 
                  aes(x=SubFlt, 
                      y=Duration,
                      color=SubFlt)) + 
  stat_boxplot(geom = "errorbar",
               width = 0.15,
               linetype = 2,
               lwd = 0.5) +
  geom_boxplot(linetype = 2,
               lwd = 0.5,
               outlier.colour = "red",
               outlier.shape = 8,
               outlier.size = 4) +
  geom_point() +
  scale_y_datetime("Time",labels = scales::date_format("%M:%S")) +
  xlab("SubFlt") +
  theme_bw() + # Makes it a little easier to see zoom region
  facet_zoom(ylim = zoom_lim)

dtboxplot

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.