Rarefaction curve samples grouped by Months

Hello everyone :smiley:

I'm having a couple of problems with my rarefaction curve. I would like my samples (1A, 1B, 1C ..) to be classified into 3 three treatments, in my case by months: July, August and September. As you can see the result throws me the 13 samples (13 lines). I would also like to know how to design the lines of the graph of the image that I attach to the end of the script, also in that image you will see that the samples were classified by treatments.

Thank you a lot, I hope everybody is okay.

Greetings

Osris.

library(phyloseq)
library(tidyverse)
library(vegan)
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.5-6
library(readxl)
clase <- tibble::tribble(
  ~OTU,  ~`1A`, ~`1B`,  ~`1C`,  ~`1D`,  ~`1E`,  ~`1F`, ~`1G`,  ~`1H`,  ~`2A`,  ~`2B`,  ~`2C`, ~`2D`,  ~`2E`,
  "Acidobacteria",     0L,    0L,     0L,     0L,     0L,     9L,    0L,     4L,     0L,     0L,     0L,    0L,     0L,
  "Actinobacteria",   389L,   33L,    31L,   797L,  3864L,  9570L,  878L,   338L,  4450L,  1819L,  2093L, 1163L,  1988L,
  "Armatimonadetes",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "BRC1",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Bacteroidetes",  5936L, 4927L,  1035L,  1690L,  5390L, 10966L, 3329L, 12756L, 15453L, 10255L, 15262L, 3485L,  6123L,
  "Saccharibacteria",     0L,    0L,     0L,     0L,     4L,     0L,    0L,     3L,     0L,     5L,     0L,    0L,     0L,
  "Chlamydiae",     0L,    0L,     0L,     3L,    67L,   148L,    0L,     5L,    61L,   208L,    67L,   60L,     0L,
  "Chloroflexi",     0L,    0L,     0L,     0L,    22L,   129L,    0L,    76L,     0L,   243L,    37L,  119L,    25L,
  "Cloacimonetes",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Cyanobacteria",     0L,    0L,     0L,     0L,     0L,    37L,    2L,   761L,    22L,   102L,     0L,   12L,   163L,
  "Elusimicrobia",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Firmicutes",    39L,  371L,   467L,   918L,   550L,    79L,  182L,  1582L,  3115L,  3272L,  1058L, 2009L,   551L,
  "Fusobacteria",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Gemmatimonadetes",     0L,    0L,     0L,     0L,     0L,    80L,    0L,     0L,    12L,     4L,     0L,   10L,     0L,
  "Hydrogenedentes",     0L,    0L,     0L,     0L,     7L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Ignavibacteriae",     0L,    0L,     0L,     0L,     0L,     0L,    0L,    34L,     0L,     0L,     0L,    0L,     0L,
  "Lentisphaerae",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Microgenomates",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     5L,    0L,     0L,
  "Parcubacteria",     0L,    0L,     0L,     0L,     5L,   218L,    0L,    26L,    11L,    34L,     0L,    0L,     0L,
  "Planctomycetes",    51L,    0L,     3L,    34L,   700L,  1297L,   77L,   142L,   359L,   402L,   477L,  273L,   229L,
  "Proteobacteria", 20290L, 6966L, 11753L, 15992L, 12089L,  9828L, 1503L, 12685L, 12424L,  7023L,  4933L, 7472L, 24925L,
  "SR1",     0L,    0L,     0L,     0L,     0L,    20L,    0L,     2L,    21L,     8L,     0L,    0L,     0L,
  "Spirochaetes",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Synergistetes",     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,     0L,     0L,     0L,    0L,     0L,
  "Tenericutes",     0L,    0L,     0L,     0L,     0L,    46L,   26L,     9L,    19L,   309L,     0L,    0L,     0L,
  "Verrucomicrobia",   108L,    0L,    50L,     0L,   949L,  2781L,  318L,   397L,   953L,   651L,   811L,  566L,   117L,
  "Un Bacteria",    24L,   54L,    11L,     0L,   334L,  1223L,   60L,   127L,   718L,   496L,   130L,  193L,    76L
)
metadata <-tibble::tribble(
  ~SampleID, ~SamplingPoint,      ~Month, ~SampleorReplica,
  "1A",         "CEA1",      "July",         "Sample",
  "1B",         "CEA2",      "July",         "Sample",
  "1C",         "CEA4",      "July",         "Sample",
  "1D",         "CEA1",    "August",         "Sample",
  "1E",         "CEA3",    "August",         "Sample",
  "1F",         "CEA5",    "August",         "Sample",
  "1G",         "CEA2", "September",         "Sample",
  "1H",         "CEA4", "September",         "Sample",
  "2A",         "CEA1",      "July",         "Sample",
  "2B",         "CEA2",      "July",         "Sample",
  "2C",         "CEA4",      "July",         "Sample",
  "2D",         "CEA1",    "August",         "Sample",
  "2E",         "CEA3",    "August",         "Sample"
)


data <- clase
data # data is the name of your matrix
#> # A tibble: 27 x 14
#>    OTU    `1A`  `1B`  `1C`  `1D`  `1E`  `1F`  `1G`  `1H`  `2A`  `2B`  `2C`  `2D`
#>    <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#>  1 Acid~     0     0     0     0     0     9     0     4     0     0     0     0
#>  2 Acti~   389    33    31   797  3864  9570   878   338  4450  1819  2093  1163
#>  3 Arma~     0     0     0     0     0     0     0     0     0     0     0     0
#>  4 BRC1      0     0     0     0     0     0     0     0     0     0     0     0
#>  5 Bact~  5936  4927  1035  1690  5390 10966  3329 12756 15453 10255 15262  3485
#>  6 Sacc~     0     0     0     0     4     0     0     3     0     5     0     0
#>  7 Chla~     0     0     0     3    67   148     0     5    61   208    67    60
#>  8 Chlo~     0     0     0     0    22   129     0    76     0   243    37   119
#>  9 Cloa~     0     0     0     0     0     0     0     0     0     0     0     0
#> 10 Cyan~     0     0     0     0     0    37     2   761    22   102     0    12
#> # ... with 17 more rows, and 1 more variable: `2E` <int>

attach(clase)
rwnames <- OTU
data <- data.matrix(data[,-1])
rownames(data) <- rwnames
data <- t(data)
S <- specnumber(data)

raremax <- min(rowSums(data))
Srare <- rarefy(data, raremax)

#Plot rarefaction results
par(mfrow = c(1,2))
plot(S, Srare, xlab = "Observed No. of Species", 
     ylab = "Rarefied No. of Species",
     main = " plot(rarefy(data, raremax))")
abline(0, 1)
rarecurve(data, step = 20, 
          sample = raremax, 
          col = "blue", 
          cex = 0.6,
          main = "rarecurve()")


rarecurve(data[1:13,], step = 20, sample = raremax, col = "blue", cex =      0.6,
          main = "rarecurve() on subset of data")


out <- rarecurve(data, step = 20, sample = raremax, label = T)


#Clean the list up a bit:

rare <- lapply(out, function(x){b <- as.data.frame(x) 
b <- data.frame(clase = b[,1], raw.read = rownames(b)) 
b$raw.read <- as.numeric(gsub("N", "",  b$raw.read)) 
return(b)})



#convert to data frame:

rare <- map_dfr(rare, function(x){
  z <- data.frame(x) 
  return(z)
}, .id = "Sample")
library(ggplot2)
write.csv(rare, file = "~/RSTUDIO/Bacteria-total/bacteria-clase-total-otu-rarefaction.csv")
#agregar manual el tratamiento
rarefaction <- read.csv("~/RSTUDIO/Bacteria-total/bacteria-clase-total-otu-rarefaction.csv", row.names=1)
#plot
#generar una escala de colores 
colourCount = length(table(rarefaction$Sample))
getPalette = colorRampPalette(c("yellowgreen","darkturquoise", "coral"))

rarcu <- ggplot(rarefaction, aes(x=raw.read, y=clase, colour = factor (Sample)))+
  geom_line(size=0.5)+
  geom_point(size=0.5)

rarcu + theme_classic()+
  theme(legend.title = element_blank())+
  scale_color_manual(values = c("yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise", "yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise"))+
  ylab("Observed Phylum Number")+
  xlab("Sequences Number")+
  scale_x_continuous(limits = c(0, 20000), breaks = c(0, 5000, 10000, 15000, 20000))
#> Warning: Removed 3896 row(s) containing missing values (geom_path).
#> Warning: Removed 3896 rows containing missing values (geom_point).

Created on 2020-05-11 by the reprex package (v0.3.0)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.