Problem Change a name of the variables of a column

Hello! This is my code,but i have a problem.

setwd("/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6")
#setwd("D:/PROJECTS/Resistin_High Fat diet_miR155/DEseq Tanycytes")
.libPaths(c( .libPaths(), "D;/R/R-4.0.2/library") )
install.packages("pheatmap")
install.packages("reshape2")
install.packages("gplots")
install.packages("RColorBrewer")
install.packages("ggplot2")
install.packages("reshape")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")

BiocManager::install("apeglm")
BiocManager::install("DESeq2")
DESeq_tan_input <-"/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/input"
DESeq_tan_output <-"/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/output"
library(DESeq2)
library(ggplot2)
Proj_count_data = read.table (file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/count_matrix.txt", header = T, row.names = 1, sep = '\t')

Proj_col_data = read.table(file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/BL6Metadata.csv", header = T, sep = ';')

head(Proj_count_data,8)
boxplot(Proj_count_data)
hist(Proj_count_data [,1])
pseudoCount = log2(Proj_count_data + 1)
boxplot(pseudoCount)
hist(pseudoCount[,1])

library(DESeq2)
library(ggplot2)
library(reshape)
pseudoCount = as.data.frame(pseudoCount)
df = melt(pseudoCount, variable.name = "value", value.name = "X") # reshape the matrix
print(length())
write.table(df, file ="/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/count_matrixnamechanged.txt")
df_new = read.table (file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/count_matrixnamechanged.txt", header = T, row.names = 1, sep = '\t')
df_new
df_new = melt(pseudoCount, variable.names = "value", value.name = "X")

df_new = data.frame(df_new, Condition = substr (df$value, 1,18))
ggplot(df_new, aes(x = variable, y = value , fill = value)) + geom_boxplot() + xlab("") +
ylab(expression(log[2](count + 1)))

we're testing for the different condidtions

dds = DESeqDataSetFromMatrix(countData = Proj_count_data, colData = Proj_col_data, design =~ Condition)
dds

df_new should have the same variable name of Proj_col_data,but it does not. How can i do to resolve the problem?

This defines df_new as having two variables. Condition could be replaced with the desired name in that expression or could be changed by

colnames(df_new)[2] <- "desired"

Thank you,i resolved!
But now I have an error in the DESeq: “all variables in design formula must be columns in colData”

setwd("/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6")
#setwd("D:/PROJECTS/Resistin_High Fat diet_miR155/DEseq Tanycytes")
.libPaths(c( .libPaths(), "D;/R/R-4.0.2/library") )
install.packages("pheatmap")
install.packages("reshape2")
install.packages("gplots")
install.packages("RColorBrewer")
install.packages("ggplot2")
install.packages("reshape")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")

BiocManager::install("apeglm")
BiocManager::install("DESeq2")
DESeq_tan_input <-"/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/input"
DESeq_tan_output <-"/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/output"
library(DESeq2)
library(ggplot2)
Proj_count_data = read.table (file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/count_matrix.txt", header = T, row.names = 1, sep = '\t')

Proj_col_data = read.table(file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/BL6Metadata.csv", header = T, sep = ';')

head(Proj_count_data,8)
boxplot(Proj_count_data)
hist(Proj_count_data [,1])
pseudoCount = log2(Proj_count_data + 1)
boxplot(pseudoCount)
hist(pseudoCount[,1])

library(DESeq2)
library(ggplot2)
library(reshape)
pseudoCount = as.data.frame(pseudoCount)
df = melt(pseudoCount, variable.name = "variable", value.name = "value") # reshape the matrix
write.table(df, file ="/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/count_matrixnamechanged.txt")
df_new = read.table (file = "/Volumes/eHDD40_10TB/PROJECTS/Ariane_RNA seq/Raw data_RNA seq_Ariane/Data/BL6/Copy of count_matrixnamechanged.txt", header = T, row.names = 1, sep = '\t')
df_new
df_new = data.frame(df_new, Condition = substr (df$value, 1,18))
ggplot(df_new, aes(x = value, y = X , fill = X)) + geom_boxplot() + xlab("") +
ylab(expression(log[2](count + 1)))

we're testing for the different condidtions

dds = DESeqDataSetFromMatrix(countData = Proj_count_data, colData = Proj_col_data, design =~ Condition)
dds

Check the help page for DESeqDataSetFromMatrix. Sounds like it's looking for a variable in Proj_col_data that's missing.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.