Dear all,
Hope my question is not too trivial but I've really searched everywhere for an answer...
I'm trying to get the flatten table (with the name of variable tested, the correlation and the p value) but I unfortunately can't get the column names, they are all noted "NA", and the result looks like this:
row column cor p
Coul NA 0.1700 2.0e-01
Coul NA 0.1600 2.2e-01
Min_N NA 0.9800 2.6e-40
Coul NA 0.0810 5.4e-01
etc...
With this message (translated from french...) "only the first element of the 'length.out' argument is used$r"
Here is the rquery function I'm using:
#+++++++++++++++++++++++++
# Computing of correlation matrix
#+++++++++++++++++++++++++
# x : matrix
# type: possible values are "lower" (default), "upper", "full" or "flatten";
#display lower or upper triangular of the matrix, full or flatten matrix.
# graph : if TRUE, a correlogram or heatmap is plotted
# graphType : possible values are "correlogram" or "heatmap"
# col: colors to use for the correlogram
# ... : Further arguments to be passed to cor or cor.test function
# Result is a list including the following components :
# r : correlation matrix, p : p-values
# sym : Symbolic number coding of the correlation matrix
rquery.cormat<-function(x,
type=c('lower', 'upper', 'full', 'flatten'),
graph=TRUE,
graphType=c("correlogram", "heatmap"),
col=NULL, cor.method = "pearson", ...)
{
library(corrplot)
# Helper functions
#+++++++++++++++++
# Compute the matrix of correlation p-values
cor.pmat <- function(x, ...) {
mat <- as.matrix(x)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# Get lower triangle of the matrix
getLower.tri<-function(mat){
upper<-mat
upper[upper.tri(mat)]<-""
mat<-as.data.frame(upper)
mat
}
# Get upper triangle of the matrix
getUpper.tri<-function(mat){
lt<-mat
lt[lower.tri(mat)]<-""
mat<-as.data.frame(lt)
mat
}
# Get flatten matrix
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
# Define color
if (is.null(col)) {
col <- colorRampPalette(
c("#67001F", "#B2182B", "#D6604D", "#F4A582",
"#FDDBC7", "#FFFFFF", "#D1E5F0", "#92C5DE",
"#4393C3", "#2166AC", "#053061"))(200)
col<-rev(col)
}
# Correlation matrix
cormat<-signif(cor(x, use = "complete.obs", ...),2)
pmat<-signif(cor.pmat(x, ...),2)
# Reorder correlation matrix
ord<-corrMatOrder(cormat, order="hclust")
cormat<-cormat[ord, ord]
pmat<-pmat[ord, ord]
# Replace correlation coeff by symbols
sym<-symnum(cormat, abbr.colnames=FALSE)
# Correlogram
if(graph & graphType[1]=="correlogram"){
corrplot(cormat, type=ifelse(type[1]=="flatten", "lower", type[1]),
tl.col="black", tl.srt=45,col=col,...)
}
else if(graphType[1]=="heatmap")
heatmap(cormat, col=col, symm=TRUE)
# Get lower/upper triangle
if(type[1]=="lower"){
cormat<-getLower.tri(cormat)
pmat<-getLower.tri(pmat)
}
else if(type[1]=="upper"){
cormat<-getUpper.tri(cormat)
pmat<-getUpper.tri(pmat)
sym=t(sym)
}
else if(type[1]=="flatten"){
cormat<-flattenCorrMatrix(cormat, pmat)
pmat=NULL
sym=NULL
}
list(r=cormat, p=pmat, sym=sym)
}
And how I "call" it:
Nb: don is my dataframe, I want every variable to be tested with the other:
don2 <- as.matrix(don)
rquery.cormat(don, type="flatten", graph=FALSE, cor.method = "pearson")
I also tried this with the same result:
flattenCorrMatrix <- function(DF) {
DF <- DF %>% as.matrix() %>% Hmisc::rcorr()
ut <- upper.tri(DF$r)
flat <- data.frame(row = rownames(DF$r)[row(DF$r)[ut]], column = rownames(DF$r)[col(DF$r)[ut]],
cor = (DF$r)[ut], p = DF$P[ut], n = DF$n[ut])
return(flat)
}
#using the function and filtering out the y variable and correlations higher than abs(0.7)
flattenCorrMatrix(don) %>%
filter(!grepl("y", row)) %>%
filter(cor > abs(0.7)&p<0.05)
I already shortened the variable names, just in case ...
Thanks ,
Best regards,
Françoise