Correlation matrix within a data.table

Hello, it will be possible to simplify, improve the following lines of code:

library(data.table)
library(ellipse)
set.seed(955)
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)

# A data frame with multiple variables
data <- data.table(vvar, wvar, xvar, yvar, zvar)
print(data,topn = 3)
# Make the correlation table
ctab <- data[,cor(.SD)]#data[,x := .(list(cor(.SD))),]
#ctab <-ctab[,x[1:20]]
#without the desired result !!!!!
#!!!ctab<-ctab[,lapply(.SD, round, digits = 2),by=.SD[1,],.SDcols = 1:5]
#!!!ctab<-ctab[,lapply(.SD, round, digits = 2),by=.SD[1,],.SDcols = 1:5]
#!!!ctab[,.SD[1,], round(.SD[, 1:5], 2)]
#without the desired result !!!!!
print(ctab,topn = 3)
plotcorr(cor(data), mar = c(0.1, 0.1, 0.1, 0.1))
A<-as.matrix(ctab, rownames = TRUE)
A<-round(A,2)
B<-as.data.table(A,keep.rownames=TRUE)
plotcorr(A, mar = c(0.1, 0.1, 0.1, 0.1))
B[,plotcorr(as.matrix(.SD[2:6],rownames = TRUE), mar = c(0.1, 0.1, 0.1, 0.1)),]
# Do the same, but with colors corresponding to value
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
B[,plotcorr(as.matrix(.SD[2:6],rownames = TRUE), 
            col=rgb(colorfun((ctab+1)/2), maxColorValue=255),
            mar = c(0.1, 0.1, 0.1, 0.1)),]

a) Especially step A <-as.matrix (ctab, rownames = TRUE) and then perform the inversion B <-as.data.table (A, keep.rownames = TRUE)
to be able to manipulate the plotcorr function inside the data.table
b) how to achieve the round on the data.table ctab immediately after the ctab <- data [, cor (.SD)]

Thanks for the complete code. It would be slightly more useful as a reprex. Please see the FAQ: What's a reproducible example (`reprex`) and how do I do one? Using a reprex, complete with representative data will attract quicker and more answers.

library(data.table)
library(ellipse)
#> 
#> Attaching package: 'ellipse'
#> The following object is masked from 'package:graphics':
#> 
#>     pairs
library(magrittr) # for pipe
set.seed(955)
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)

## A data frame with multiple variables
data <- data.table(vvar, wvar, xvar, yvar, zvar)
# print(data,topn = 3)
## Make the correlation table
ctab <- data[,cor(.SD)] #data[,x := .(list(cor(.SD))),]
# print(ctab,topn = 3)
# plotcorr(cor(data), mar = c(0.1, 0.1, 0.1, 0.1))
# getting rounding
A <- as.matrix(ctab, rownames = TRUE) %>% round(.,2) %>% as.data.table(.,keep.rownames = TRUE) -> B
# HOWEVER
A == B
#>        rn vvar wvar xvar yvar zvar
#> [1,] TRUE TRUE TRUE TRUE TRUE TRUE
#> [2,] TRUE TRUE TRUE TRUE TRUE TRUE
#> [3,] TRUE TRUE TRUE TRUE TRUE TRUE
#> [4,] TRUE TRUE TRUE TRUE TRUE TRUE
#> [5,] TRUE TRUE TRUE TRUE TRUE TRUE
# Doesn't seem as though there is anything to be gained by the round trip to B
A <- as.matrix(ctab, rownames = TRUE) %>% round(.,2) -> A

# A <- round(A,2)
# B <-as.data.table(A,keep.rownames=TRUE)
# plotcorr(A, mar = c(0.1, 0.1, 0.1, 0.1))
# B[,plotcorr(as.matrix(.SD[2:6],rownames = TRUE), mar = c(0.1, 0.1, 0.1, 0.1)),]
# Do the same, but with colors corresponding to value
# colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
# B[,plotcorr(as.matrix(.SD[2:6],rownames = TRUE), 
#           col=rgb(colorfun((ctab+1)/2), maxColorValue=255),
#            mar = c(0.1, 0.1, 0.1, 0.1)),]

Created on 2020-03-29 by the reprex package (v0.3.0)

Taking some of your ideas, make some progress:

library(data.table)
library(ellipse)

library(magrittr) # for pipe
set.seed(955)
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)
data <- data.table(vvar, wvar, xvar, yvar, zvar)
print(data,topn = 3)
## Make the correlation table
data[,mtx:=.(list(cor(.SD)))]
ctab<-data[1,mtx]

ctab<-as.data.table(ctab,keep.rownames=TRUE)
ctab
str(ctab)
as.matrix(ctab, rownames = TRUE)
ctab[,round(.SD[,2:6],2)]
ctab[,plotcorr(as.matrix(.SD, rownames = TRUE), mar = c(0.1, 0.1, 0.1, 0.1))]
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
ctab[,plotcorr(as.matrix(.SD, rownames = TRUE), 
               col=rgb(colorfun((as.matrix(.SD, rownames = TRUE)+1)/2), maxColorValue=255),
               mar = c(0.1, 0.1, 0.1, 0.1)),]

I think it could make it even more compact and elegant

Looks like the wheels start coming off the wagon here

as.matrix(ctab, rownames = TRUE)

which doesn't actually do anything before going on to the next two statements that don't, either. :grin:

You can either overwrite ctab or assign to a new object

as.matrix(ctab, rownames = TRUE) -> ctab
#https://forum.posit.co/t/correlation-matrix-within-a-data-table/58688/4
library(data.table)
library(ellipse)
set.seed(955)
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)
data <- data.table(vvar, wvar, xvar, yvar, zvar)
print(data,topn = 3)
## Make the correlation table
ctab<-data[,1:5, by = .I][,cor(.SD),]# class matrix
class(ctab)
ctab<-data.table(ctab, keep.rownames = TRUE)# class data.table
class(ctab)
cols<-names(ctab)[2:6]
ctab[, (cols) := lapply(.SD, round,2), .SDcols = cols]
ctab[]
ctab[,plotcorr(as.matrix(.SD, rownames = TRUE), mar = c(0.1, 0.1, 0.1, 0.1))]
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
ctab[,plotcorr(as.matrix(.SD, rownames = TRUE), 
              col=rgb(colorfun((0:14)/15),maxColorValue=255),
               mar = c(0.1, 0.1, 0.1, 0.1)),]
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.