Horizontal Bar plots vs Ordering

Hello,
I want to compare two groups that share the same row names. I did order using one of the numeric variables in the data frame. However, the plot appeared as if the order was not done. See below the code I used to create the simulation dataset and how I run it. Please I may ask your feedback on how I can do it better to get an ordered bar graph?
Thank you.

data <- data.frame(pathway = c(
                   "cGMP-PKG",
                   "Human infection",
                   "Apelinhway",
                   "Ribosome",
                   "HIF-1",
                   "Carbon metabolism",
                   "Purine metabolism",
                   "Cholinergic synapse",
                   "ErbB",
                   "Cysteine",
                   "Valine",
                   "Human infection",
                   "Apelinhway",
                   "cGMP-PKG",
                   "Purine metabolism",
                   "Ribosome",
                   "Carbon metabolism",
                   "Cholinergic synapse",
                   "ErbB",
                   "HIF-1",
                   "Valine",
                   "Cysteine"),
                   logFC = runif(22, -1, 1),
                   pvalue = runif(22, max= 0.05, min= 0.001),
                   count = c(7,12,19,12,34,8,5,4,7,9,20,32,4,7,9,10,22,3,5,8,9,10),
                   group = rep(c("WT","KO"), each = 11))
data_order <- data[order(data$count, decreasing = TRUE),]

library(ggplot2)
library(dplyr)
 data_order %>%
  ggplot(aes(x = pathway, y = count, fill = group)) +
  geom_bar(stat = "identity",  position = position_dodge())+  
  coord_flip() +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Number of genes") +  xlab("Pathway Names") +  
  ggtitle("Regulated")

Hi, you need to turn it into a factor to order your variable in ggplot2:

data <- data.frame(pathway = c(
  "cGMP-PKG",
  "Human infection",
  "Apelinhway",
  "Ribosome",
  "HIF-1",
  "Carbon metabolism",
  "Purine metabolism",
  "Cholinergic synapse",
  "ErbB",
  "Cysteine",
  "Valine",
  "Human infection",
  "Apelinhway",
  "cGMP-PKG",
  "Purine metabolism",
  "Ribosome",
  "Carbon metabolism",
  "Cholinergic synapse",
  "ErbB",
  "HIF-1",
  "Valine",
  "Cysteine"),
  logFC = runif(22, -1, 1),
  pvalue = runif(22, max= 0.05, min= 0.001),
  count = c(7,12,19,12,34,8,5,4,7,9,20,32,4,7,9,10,22,3,5,8,9,10),
  group = rep(c("WT","KO"), each = 11))

library(ggplot2)
library(dplyr)
#> 
#> Attache Paket: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(forcats)
data %>%
  mutate(pathway = forcats::fct_reorder(pathway, count)) %>% 
  ggplot(aes(x = pathway, y = count, fill = group)) +
  geom_bar(stat = "identity",  position = position_dodge())+  
  coord_flip() +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Number of genes") +  xlab("Pathway Names") +  
  ggtitle("Regulated")

Created on 2020-07-10 by the reprex package (v0.3.0)

Since your data is already summarized, there are only two points we can draw per pathway. In case you have the raw data, just replace it and it should work as expected.

ata <- data.frame(pathway = c(
  "cGMP-PKG",
  "Human infection",
  "Apelinhway",
  "Ribosome",
  "HIF-1",
  "Carbon metabolism",
  "Purine metabolism",
  "Cholinergic synapse",
  "ErbB",
  "Cysteine",
  "Valine",
  "Human infection",
  "Apelinhway",
  "cGMP-PKG",
  "Purine metabolism",
  "Ribosome",
  "Carbon metabolism",
  "Cholinergic synapse",
  "ErbB",
  "HIF-1",
  "Valine",
  "Cysteine"),
  logFC = runif(22, -1, 1),
  pvalue = runif(22, max= 0.05, min= 0.001),
  count = c(7,12,19,12,34,8,5,4,7,9,20,32,4,7,9,10,22,3,5,8,9,10),
  group = rep(c("WT","KO"), each = 11))

library(ggplot2)
library(dplyr)
#> 
#> Attache Paket: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(forcats)
data %>%
  mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
  ggplot(aes(x = logFC, y = pathway, color= group )) +
  geom_point() +
  geom_vline(xintercept = 0) +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Pathway ") + xlab("logFC") +
  ggtitle("Regulated ") 
#> Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
#> Please use `mutate()` instead.
#> See vignette('programming') for more help
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_warnings()` to see where this warning was generated.
#> Error in UseMethod("mutate_"): nicht anwendbare Methode für 'mutate_' auf Objekt der Klasse "function" angewendet

Created on 2020-07-10 by the reprex package (v0.3.0)

Or do you mean plotting e.g. 7 dots at the given logFC value in case count == 7? But they would anyway all show as one due to the same placement?

You're welcome! Ff you each dot refers to the same logFC value this is not how one should do it since your x axis is numeric. This would map different logFC values to each dot which is not supported by the data. I cannot come op with another solution right now.

Edit: You could also map the count variable to the size or area of the points.

data <- data.frame(pathway = c(
  "cGMP-PKG",
  "Human infection",
  "Apelinhway",
  "Ribosome",
  "HIF-1",
  "Carbon metabolism",
  "Purine metabolism",
  "Cholinergic synapse",
  "ErbB",
  "Cysteine",
  "Valine",
  "Human infection",
  "Apelinhway",
  "cGMP-PKG",
  "Purine metabolism",
  "Ribosome",
  "Carbon metabolism",
  "Cholinergic synapse",
  "ErbB",
  "HIF-1",
  "Valine",
  "Cysteine"),
  logFC = runif(22, -1, 1),
  pvalue = runif(22, max= 0.05, min= 0.001),
  count = c(7,12,19,12,34,8,5,4,7,9,20,32,4,7,9,10,22,3,5,8,9,10),
  group = rep(c("WT","KO"), each = 11))

library(tidyverse)
data %>%
  mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
  ggplot(aes(x = logFC, y = pathway, color= group, size = count)) +
  geom_vline(xintercept = 0) +
  geom_point(position = position_dodge(width = .5)) +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Pathway ") + xlab("logFC") +
  ggtitle("Regulated ") 

Created on 2020-07-10 by the reprex package (v0.3.0)

Thanks a lot! I think this is fine !

You're welcome! Please mark this topic as solved :slight_smile:

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

@Z3tt Thank you so much. Maybe if I could ask one more thing here.

In the same data set, I would like to see the relationships of Pathway and logFC to be shown by points. I really need to see the number of dots/number points in each group using count variable or any other. I could not get a place to put the variable count to show me the number of dots.
Here is the code I used
´´´ data %>%
mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
ggplot(aes(x = logFC, y = pathway, fill= group )) +
geom_point() +
geom_vline(xintercept = 0) +
theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
ylab("Pathway ") + xlab("logFC") +
ggtitle("Regulated ") ´´´

Just I would like to make a plot like the uploaded picture below.

Capture

Thank you for your help

Thanks again.
Numbers in the count variable represent number of genes in my case. For-example,
I want to plot 7 dots for cGMP-PKG pathway in the WT group and 34 dots in the KO at a given logFC and so on for other pathways.

But that's the problem...? Here are a way to do it as you said (if I got you right) and two suggested ways how to deal with the overplotting issue as a starting point:

data <- data.frame(pathway = c(
  "cGMP-PKG",
  "Human infection",
  "Apelinhway",
  "Ribosome",
  "HIF-1",
  "Carbon metabolism",
  "Purine metabolism",
  "Cholinergic synapse",
  "ErbB",
  "Cysteine",
  "Valine",
  "Human infection",
  "Apelinhway",
  "cGMP-PKG",
  "Purine metabolism",
  "Ribosome",
  "Carbon metabolism",
  "Cholinergic synapse",
  "ErbB",
  "HIF-1",
  "Valine",
  "Cysteine"),
  logFC = runif(22, -1, 1),
  pvalue = runif(22, max= 0.05, min= 0.001),
  count = c(7,12,19,12,34,8,5,4,7,9,20,32,4,7,9,10,22,3,5,8,9,10),
  group = rep(c("WT","KO"), each = 11))

library(tidyverse)
data %>%
  mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
  uncount(count) %>% 
  ggplot(aes(x = logFC, y = pathway, color= group)) +
  geom_point() +
  geom_vline(xintercept = 0) +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Pathway ") + xlab("logFC") +
  ggtitle("Regulated ") 


data %>%
  mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
  uncount(count) %>% 
  ggplot(aes(x = logFC, y = pathway, color= group)) +
  geom_jitter(alpha = .4, shape = 1) +
  geom_vline(xintercept = 0) +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Pathway ") + xlab("logFC") +
  ggtitle("Regulated ") 


data %>%
  mutate(pathway = forcats::fct_reorder(pathway, logFC)) %>%
  uncount(count) %>% 
  ggplot(aes(x = logFC, y = pathway, color= group)) +
  ggbeeswarm::geom_beeswarm(size = .2, groupOnX = F, cex = .7) +
  geom_vline(xintercept = 0) +
  scale_y_discrete(expand = c(.1, .1)) +
  coord_cartesian(clip = "off") +
  theme(axis.title=element_text(size=10,face="bold"),axis.text = element_text(size = 10),title=element_text(size=8,face="bold"))+
  ylab("Pathway ") + xlab("logFC") +
  ggtitle("Regulated ") 

Created on 2020-07-10 by the reprex package (v0.3.0)

Thank you so much!
Yes this is exactly what I was looking for.
Do you think you can show the dots horizontally?