Hello everyone
I'm having a couple of problems with my rarefaction curve. I would like my samples (1A, 1B, 1C ..) to be classified into 3 three treatments, in my case by months: July, August and September. As you can see the result throws me the 13 samples (13 lines). I would also like to know how to design the lines of the graph of the image that I attach to the end of the script, also in that image you will see that the samples were classified by treatments.
Thank you a lot, I hope everybody is okay.
Greetings
Osris.
library(phyloseq)
library(tidyverse)
library(vegan)
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.5-6
library(readxl)
clase <- tibble::tribble(
~OTU, ~`1A`, ~`1B`, ~`1C`, ~`1D`, ~`1E`, ~`1F`, ~`1G`, ~`1H`, ~`2A`, ~`2B`, ~`2C`, ~`2D`, ~`2E`,
"Acidobacteria", 0L, 0L, 0L, 0L, 0L, 9L, 0L, 4L, 0L, 0L, 0L, 0L, 0L,
"Actinobacteria", 389L, 33L, 31L, 797L, 3864L, 9570L, 878L, 338L, 4450L, 1819L, 2093L, 1163L, 1988L,
"Armatimonadetes", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"BRC1", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Bacteroidetes", 5936L, 4927L, 1035L, 1690L, 5390L, 10966L, 3329L, 12756L, 15453L, 10255L, 15262L, 3485L, 6123L,
"Saccharibacteria", 0L, 0L, 0L, 0L, 4L, 0L, 0L, 3L, 0L, 5L, 0L, 0L, 0L,
"Chlamydiae", 0L, 0L, 0L, 3L, 67L, 148L, 0L, 5L, 61L, 208L, 67L, 60L, 0L,
"Chloroflexi", 0L, 0L, 0L, 0L, 22L, 129L, 0L, 76L, 0L, 243L, 37L, 119L, 25L,
"Cloacimonetes", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Cyanobacteria", 0L, 0L, 0L, 0L, 0L, 37L, 2L, 761L, 22L, 102L, 0L, 12L, 163L,
"Elusimicrobia", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Firmicutes", 39L, 371L, 467L, 918L, 550L, 79L, 182L, 1582L, 3115L, 3272L, 1058L, 2009L, 551L,
"Fusobacteria", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Gemmatimonadetes", 0L, 0L, 0L, 0L, 0L, 80L, 0L, 0L, 12L, 4L, 0L, 10L, 0L,
"Hydrogenedentes", 0L, 0L, 0L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Ignavibacteriae", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 34L, 0L, 0L, 0L, 0L, 0L,
"Lentisphaerae", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Microgenomates", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L,
"Parcubacteria", 0L, 0L, 0L, 0L, 5L, 218L, 0L, 26L, 11L, 34L, 0L, 0L, 0L,
"Planctomycetes", 51L, 0L, 3L, 34L, 700L, 1297L, 77L, 142L, 359L, 402L, 477L, 273L, 229L,
"Proteobacteria", 20290L, 6966L, 11753L, 15992L, 12089L, 9828L, 1503L, 12685L, 12424L, 7023L, 4933L, 7472L, 24925L,
"SR1", 0L, 0L, 0L, 0L, 0L, 20L, 0L, 2L, 21L, 8L, 0L, 0L, 0L,
"Spirochaetes", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Synergistetes", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
"Tenericutes", 0L, 0L, 0L, 0L, 0L, 46L, 26L, 9L, 19L, 309L, 0L, 0L, 0L,
"Verrucomicrobia", 108L, 0L, 50L, 0L, 949L, 2781L, 318L, 397L, 953L, 651L, 811L, 566L, 117L,
"Un Bacteria", 24L, 54L, 11L, 0L, 334L, 1223L, 60L, 127L, 718L, 496L, 130L, 193L, 76L
)
metadata <-tibble::tribble(
~SampleID, ~SamplingPoint, ~Month, ~SampleorReplica,
"1A", "CEA1", "July", "Sample",
"1B", "CEA2", "July", "Sample",
"1C", "CEA4", "July", "Sample",
"1D", "CEA1", "August", "Sample",
"1E", "CEA3", "August", "Sample",
"1F", "CEA5", "August", "Sample",
"1G", "CEA2", "September", "Sample",
"1H", "CEA4", "September", "Sample",
"2A", "CEA1", "July", "Sample",
"2B", "CEA2", "July", "Sample",
"2C", "CEA4", "July", "Sample",
"2D", "CEA1", "August", "Sample",
"2E", "CEA3", "August", "Sample"
)
data <- clase
data # data is the name of your matrix
#> # A tibble: 27 x 14
#> OTU `1A` `1B` `1C` `1D` `1E` `1F` `1G` `1H` `2A` `2B` `2C` `2D`
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 Acid~ 0 0 0 0 0 9 0 4 0 0 0 0
#> 2 Acti~ 389 33 31 797 3864 9570 878 338 4450 1819 2093 1163
#> 3 Arma~ 0 0 0 0 0 0 0 0 0 0 0 0
#> 4 BRC1 0 0 0 0 0 0 0 0 0 0 0 0
#> 5 Bact~ 5936 4927 1035 1690 5390 10966 3329 12756 15453 10255 15262 3485
#> 6 Sacc~ 0 0 0 0 4 0 0 3 0 5 0 0
#> 7 Chla~ 0 0 0 3 67 148 0 5 61 208 67 60
#> 8 Chlo~ 0 0 0 0 22 129 0 76 0 243 37 119
#> 9 Cloa~ 0 0 0 0 0 0 0 0 0 0 0 0
#> 10 Cyan~ 0 0 0 0 0 37 2 761 22 102 0 12
#> # ... with 17 more rows, and 1 more variable: `2E` <int>
attach(clase)
rwnames <- OTU
data <- data.matrix(data[,-1])
rownames(data) <- rwnames
data <- t(data)
S <- specnumber(data)
raremax <- min(rowSums(data))
Srare <- rarefy(data, raremax)
#Plot rarefaction results
par(mfrow = c(1,2))
plot(S, Srare, xlab = "Observed No. of Species",
ylab = "Rarefied No. of Species",
main = " plot(rarefy(data, raremax))")
abline(0, 1)
rarecurve(data, step = 20,
sample = raremax,
col = "blue",
cex = 0.6,
main = "rarecurve()")
rarecurve(data[1:13,], step = 20, sample = raremax, col = "blue", cex = 0.6,
main = "rarecurve() on subset of data")
out <- rarecurve(data, step = 20, sample = raremax, label = T)
#Clean the list up a bit:
rare <- lapply(out, function(x){b <- as.data.frame(x)
b <- data.frame(clase = b[,1], raw.read = rownames(b))
b$raw.read <- as.numeric(gsub("N", "", b$raw.read))
return(b)})
#convert to data frame:
rare <- map_dfr(rare, function(x){
z <- data.frame(x)
return(z)
}, .id = "Sample")
library(ggplot2)
write.csv(rare, file = "~/RSTUDIO/Bacteria-total/bacteria-clase-total-otu-rarefaction.csv")
#agregar manual el tratamiento
rarefaction <- read.csv("~/RSTUDIO/Bacteria-total/bacteria-clase-total-otu-rarefaction.csv", row.names=1)
#plot
#generar una escala de colores
colourCount = length(table(rarefaction$Sample))
getPalette = colorRampPalette(c("yellowgreen","darkturquoise", "coral"))
rarcu <- ggplot(rarefaction, aes(x=raw.read, y=clase, colour = factor (Sample)))+
geom_line(size=0.5)+
geom_point(size=0.5)
rarcu + theme_classic()+
theme(legend.title = element_blank())+
scale_color_manual(values = c("yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise", "yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","yellowgreen","coral","coral","coral","darkturquoise","darkturquoise","darkturquoise","darkturquoise"))+
ylab("Observed Phylum Number")+
xlab("Sequences Number")+
scale_x_continuous(limits = c(0, 20000), breaks = c(0, 5000, 10000, 15000, 20000))
#> Warning: Removed 3896 row(s) containing missing values (geom_path).
#> Warning: Removed 3896 rows containing missing values (geom_point).
Created on 2020-05-11 by the reprex package (v0.3.0)