Getting rid of spaces between bars and y scale on a horizontal bar chart

Hello, I am having some trouble getting rid of the empty spaces between my bars and the y scale. I have tried using the expand - (0,0) input but it is showing this error: Error in get_labels():
! breaks and labels are different lengths

Any help would be greatly appreciated. This is my code as it stands:

#install.packages("readxl")
#install.packages("tidyverse")
#install.packages("ggplot2")

library(readxl)
library(tidyverse)
library(ggplot2)

url <-'https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-022-05402-9/MediaObjects/41586_2022_5402_MOESM11_ESM.xlsx'
#Create a dataframe from the Excel data
temp <-tempfile()
download.file(url, temp, mode='wb')
myData <- read_excel(path = temp, sheet = "Figure 1i")

myData %>%
#Select the columns and rows that are needed, columns: 2, 4 and 6 and rows 1 to 32.
select(c(2,4,6)) %>%
slice(1:32) %>%

#Rename columns
rename(GSEA_HRC_Marker_Genes = NAME,
Mouse= sign) %>%

#Change column 3 to display as.factor
mutate(across(c(3), as.factor)) %>%
mutate(GSEA_HRC_Marker_Genes = fct_reorder(GSEA_HRC_Marker_Genes, NES)) %>%

ggplot(aes(x=NES, y=GSEA_HRC_Marker_Genes, fill = Mouse)) +
geom_bar(stat='identity', width = 0.6, position = position_dodge(width = 0.7)) +

ggtitle("NES Scores of GSEA HRC-Marker Genes") +

xlab("NES")+
ylab("GSEA HRC-Marker Genes") +

scale_x_continuous(position = "top", expand = c(0,0),
labels = c("HALLMARK_HYPOXIA" = "Hypoxia",
"HALLMARK_APICAL_JUNCTION" = "Apical junction",
"GO0016337 CELL-CELL ADHESION" = "Cell-cell adhesion",
"GO0005911 CELL-CELL JUNCTION" = "Cell-cell junction",
"04510 FOCAL ADHESION" = "Focal adhesion",
"GO0031012 EXTRACELLULAR MATRIX" = "ECM",
"GO0005578 PROTEINACEOUS EXTRACELLULAR MATRIX" = "Proteinaceous ECM",
"GO0007156 HOMOPHILIC CELL ADHESION" = "Homophilic cell adhesion",
"04810 REGULATION OF ACTIN CYTOSKELETON" = "Regulation of actin cytoskeleton",
"GO0005604 BASEMENT MEMBRANE" = "Basement membrane",
"GO0030198 EXTRACELLULAR MATRIX ORGANIZATION" = "ECM organization",
"GO0030334 REGULATION OF CELL MIGRATION" = "Regulation of cell migration",
"GO2000145 REGULATION OF CELL MOTILITY" = "Regulation of cell motility",
"GO0045095 KERATIN FILAMENT" = "Keratin Filament",
"04512 ECM-RECEPTOR INTERACTION" = "ECM-receptor interaction",
"GO0046578 REGULATION OF RAS PROTEIN SIGNAL TRANSDUCTION" = "Regulation of RAS signal transduction")) +

scale_fill_discrete(labels=c('Human (FDR < 0.1)', 'Mouse (FDR > 0.1)','Mouse (FDR > 0.1)')) +

theme(legend.direction = "horizontal", legend.position = "bottom") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(text = element_text(size = 3))

I think your issue is coming from the use of "labels" in scale_x_continuous(). When you sort out your labels before the plot (in a mutate() step) the issue resolves itself.

library(readxl)
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.2.2
#> Warning: package 'purrr' was built under R version 4.2.2
library(ggplot2)

url <-
  'https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-022-05402-9/MediaObjects/41586_2022_5402_MOESM11_ESM.xlsx'
temp <- tempfile()
download.file(url, temp, mode = 'wb')
myData <- read_excel(path = temp, sheet = "Figure 1i")

plot_dat <-
  myData %>%
  #Select the columns and rows that are needed, columns: 2, 4 and 6 and rows 1 to 32.
  select(c(2, 4, 6)) %>%
  slice(1:32) %>%
  #Rename columns
  rename(GSEA_HRC_Marker_Genes = NAME,
         Mouse = sign) %>%
  #Change column 3 to display as.factor
  mutate(
    across(c(3), as.factor),
    GSEA_HRC_Marker_Genes = case_when(
      GSEA_HRC_Marker_Genes == "HALLMARK_HYPOXIA" ~ "Hypoxia",
      GSEA_HRC_Marker_Genes == "HALLMARK_APICAL_JUNCTION" ~ "Apical junction",
      GSEA_HRC_Marker_Genes == "GO0016337 CELL-CELL ADHESION" ~ "Cell-cell adhesion",
      GSEA_HRC_Marker_Genes == "GO0005911 CELL-CELL JUNCTION" ~ "Cell-cell junction",
      GSEA_HRC_Marker_Genes == "04510 FOCAL ADHESION" ~ "Focal adhesion",
      GSEA_HRC_Marker_Genes == "GO0031012 EXTRACELLULAR MATRIX" ~ "ECM",
      GSEA_HRC_Marker_Genes == "GO0005578 PROTEINACEOUS EXTRACELLULAR MATRIX" ~ "Proteinaceous ECM",
      GSEA_HRC_Marker_Genes == "GO0007156 HOMOPHILIC CELL ADHESION" ~ "Homophilic cell adhesion",
      GSEA_HRC_Marker_Genes == "04810 REGULATION OF ACTIN CYTOSKELETON" ~ "Regulation of actin cytoskeleton",
      GSEA_HRC_Marker_Genes == "GO0005604 BASEMENT MEMBRANE" ~ "Basement membrane",
      GSEA_HRC_Marker_Genes == "GO0030198 EXTRACELLULAR MATRIX ORGANIZATION" ~ "ECM organization",
      GSEA_HRC_Marker_Genes == "GO0030334 REGULATION OF CELL MIGRATION" ~ "Regulation of cell migration",
      GSEA_HRC_Marker_Genes == "GO2000145 REGULATION OF CELL MOTILITY" ~ "Regulation of cell motility",
      GSEA_HRC_Marker_Genes == "GO0045095 KERATIN FILAMENT" ~ "Keratin Filament",
      GSEA_HRC_Marker_Genes == "04512 ECM-RECEPTOR INTERACTION" ~ "ECM-receptor interaction",
      GSEA_HRC_Marker_Genes == "GO0046578 REGULATION OF RAS PROTEIN SIGNAL TRANSDUCTION" ~ "Regulation of RAS signal transduction"
    ),
    GSEA_HRC_Marker_Genes = fct_reorder(GSEA_HRC_Marker_Genes, NES)
  )

plot_dat %>%
  ggplot(aes(x = NES, y = GSEA_HRC_Marker_Genes, fill = Mouse)) +
  geom_col(width = 0.6, position = position_dodge(width = 0.7)) +
  labs(title = "NES Scores of GSEA HRC-Marker Genes", x = "NES", y = "GSEA HRC-Marker Genes") +
  scale_x_continuous(position = "top", expand = expansion(mult = c(0, .1))) +
  scale_fill_discrete(labels = c('Human (FDR < 0.1)', 'Mouse (FDR > 0.1)', 'Mouse (FDR > 0.1)')) +
  theme(
    legend.direction = "horizontal",
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)
  )

Created on 2022-11-22 with reprex v2.0.2

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.