Hi everyone!!

I am trying to make a histogram using two different databases. What I want to see is the difference of bacteria communities between the two databases, and represent only (that appear in the graph) the communities that are greater than 0.01% of the total communities. In the following image you can see an example of what I would like to obtain.

These are my two databases that I want to compare:

bpdataSILVA <-data.frame (stringsAsFactors = tibble::tribble(
                                           ~index, ~Acidobacteria, ~Actinobacteria, ~Armatimonadetes, ~BRC1, ~Bacteroidetes, ~Chlamydiae, ~Chloroflexi,
                                             "1A",              0,             375,                0,     0,           5948,           0,            0,
                                             "1B",              0,              31,                0,     0,           4948,           0,            0,
                                             "1C",              0,              31,                0,     0,           1036,           0,            0,
                                             "1D",              0,             788,                0,     0,           1690,           3,            0,
                                             "1E",              0,            3778,                0,     0,           5390,          67,           22,
                                             "1F",              9,            9326,                0,     0,          11923,         150,          129,
                                             "1G",              0,             875,                0,     0,           3356,           0,            0,
                                             "1H",              4,             338,                0,     0,          12836,           5,           76,
                                             "2A",              0,            4456,                0,     0,          15445,          61,            0,
                                             "2B",              0,            1802,                0,     0,          10218,         208,          243,
                                             "2C",              0,            2093,                0,     0,          14982,          67,           37,
                                             "2D",              0,            1168,                0,     0,           3602,          60,          119,
                                             "2E",              0,            1999,                0,     0,           6141,           0,           25
                                           )
)

bpdataRDP <-data.frame (stringsAsFactors = tibble::tribble(
                                                ~index, ~Acidobacteria, ~Actinobacteria, ~Armatimonadetes, ~BRC1, ~Bacteroidetes, ~Saccharibacteria, ~Chlamydiae,
                                                  "1A",              0,             389,                0,     0,           5936,                 0,           0,
                                                  "1B",              0,              33,                0,     0,           4927,                 0,           0,
                                                  "1C",              0,              31,                0,     0,           1035,                 0,           0,
                                                  "1D",              0,             797,                0,     0,           1690,                 0,           3,
                                                  "1E",              0,            3864,                0,     0,           5390,                 4,          67,
                                                  "1F",              9,            9570,                0,     0,          10966,                 0,         148,
                                                  "1G",              0,             878,                0,     0,           3329,                 0,           0,
                                                  "1H",              4,             338,                0,     0,          12756,                 3,           5,
                                                  "2A",              0,            4450,                0,     0,          15453,                 0,          61,
                                                  "2B",              0,            1819,                0,     0,          10255,                 5,         208,
                                                  "2C",              0,            2093,                0,     0,          15262,                 0,          67,
                                                  "2D",              0,            1163,                0,     0,           3485,                 0,          60,
                                                  "2E",              0,            1988,                0,     0,           6123,                 0,           0
                                                )
 )

Created on 2020-02-03 by the reprex package (v0.3.0)

Thank you so much!!

Osiris

Please test your code before posting it, you should not have added the data.frame() part because it produces an error.

I'm not sure I understand your questions because you haven't defined how "abundance" should be calculated but this should take you very close at least
BTW, what you are showing is a Barplot not a histogram.

library(tidyverse)

bpdataSILVA <- tibble::tribble(
    ~index, ~Acidobacteria, ~Actinobacteria, ~Armatimonadetes, ~BRC1, ~Bacteroidetes, ~Chlamydiae, ~Chloroflexi,
    "1A",              0,             375,                0,     0,           5948,           0,            0,
    "1B",              0,              31,                0,     0,           4948,           0,            0,
    "1C",              0,              31,                0,     0,           1036,           0,            0,
    "1D",              0,             788,                0,     0,           1690,           3,            0,
    "1E",              0,            3778,                0,     0,           5390,          67,           22,
    "1F",              9,            9326,                0,     0,          11923,         150,          129,
    "1G",              0,             875,                0,     0,           3356,           0,            0,
    "1H",              4,             338,                0,     0,          12836,           5,           76,
    "2A",              0,            4456,                0,     0,          15445,          61,            0,
    "2B",              0,            1802,                0,     0,          10218,         208,          243,
    "2C",              0,            2093,                0,     0,          14982,          67,           37,
    "2D",              0,            1168,                0,     0,           3602,          60,          119,
    "2E",              0,            1999,                0,     0,           6141,           0,           25
)

bpdataRDP <- tibble::tribble(
    ~index, ~Acidobacteria, ~Actinobacteria, ~Armatimonadetes, ~BRC1, ~Bacteroidetes, ~Saccharibacteria, ~Chlamydiae,
    "1A",              0,             389,                0,     0,           5936,                 0,           0,
    "1B",              0,              33,                0,     0,           4927,                 0,           0,
    "1C",              0,              31,                0,     0,           1035,                 0,           0,
    "1D",              0,             797,                0,     0,           1690,                 0,           3,
    "1E",              0,            3864,                0,     0,           5390,                 4,          67,
    "1F",              9,            9570,                0,     0,          10966,                 0,         148,
    "1G",              0,             878,                0,     0,           3329,                 0,           0,
    "1H",              4,             338,                0,     0,          12756,                 3,           5,
    "2A",              0,            4450,                0,     0,          15453,                 0,          61,
    "2B",              0,            1819,                0,     0,          10255,                 5,         208,
    "2C",              0,            2093,                0,     0,          15262,                 0,          67,
    "2D",              0,            1163,                0,     0,           3485,                 0,          60,
    "2E",              0,            1988,                0,     0,           6123,                 0,           0
)

bind_rows(list("RDP" = bpdataRDP, "SILVA" = bpdataSILVA), .id = "bpdata") %>% 
    gather(Bacteria, Count, -c(bpdata, index)) %>% 
    group_by(bpdata, Bacteria) %>% 
    summarise(Count = sum(Count, na.rm = TRUE)) %>% 
    group_by(bpdata) %>% 
    mutate(Proportion = Count / sum(Count)) %>% 
    filter(Proportion >= 0.0001) %>% 
    ggplot(aes(x = bpdata, y = Proportion, fill = Bacteria)) +
    geom_col() +
    scale_y_continuous(labels = scales::percent_format())

Created on 2020-02-04 by the reprex package (v0.3.0.9001)

1 Like

Hi Andres, thanks for all your help! Just one more question, is there any way to set the percentage increases from 10 to 10?

You can specify the breaking points

bind_rows(list("RDP" = bpdataRDP, "SILVA" = bpdataSILVA), .id = "bpdata") %>% 
    gather(Bacteria, Count, -c(bpdata, index)) %>% 
    group_by(bpdata, Bacteria) %>% 
    summarise(Count = sum(Count, na.rm = TRUE)) %>% 
    group_by(bpdata) %>% 
    mutate(Proportion = Count / sum(Count)) %>% 
    filter(Proportion >= 0.0001) %>% 
    ggplot(aes(x = bpdata, y = Proportion, fill = Bacteria)) +
    geom_col() +
    scale_y_continuous(breaks = seq(0, 1, 0.1),
                       labels = scales::percent_format(accuracy = 1))

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.