Hello fellow R users,
Here you can see a sample of my data with some of my observations.
I have 10 variable values for each of my observations.
Each observation is affected to a condition called "clusters" (1, 2 or 3)
I want a matrix correlation that only show the correlation of the 4 first dimensions with the other (not the whole matrix). I have 2 issues :
-
When I try to add crosses on the coefficients that are below signifiance level, I have also crosses that appear randomly all around my plot, is it because my corr. matrix is cut ?
-
I would like to have a separate correlation matrix, for each condition (clusters). I tried to split my dataframe in 3, for each of the 3 conditions, then I re-run my code to generate the matrix. Is there a line I could add when I code the matrix to do that automatically ?
Thank you in advance for your kind answers !
# LIBRARIES
library("dplyr")
library("ggcorrplot")
library("corrplot")
# DATAFRAME
datacorr <- tibble::tribble(
~SP, ~PAR1, ~PAR2, ~PAR3, ~PAR4, ~PAR5, ~PAR6, ~PAR7, ~PAR8, ~PAR9, ~PAR10, ~clusters,
0.8222222222, 1.591708569, 0.461244729, 1.777250762, 4L, 3L, 3L, 4L, 3L, 5L, 2L, 2L,
0.5568428043, 1.958865971, 0.543320189, 1.036227231, 2L, 3L, 1L, 2L, 2L, 2L, 3L, 3L,
0.5929769601, 1.990803599, 0.521928246, 0.982365297, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L,
0.6342714572, 1.990218758, 0.524461771, 1.24661541, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
0.8058100072, 1.637449971, 0.512539274, 1.883289713, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
0.8060415043, 0.900799193, 0.50656114, 1.104014063, 4L, 4L, 1L, 4L, 2L, 4L, 4L, 2L,
0.6187287986, 1.969451519, 0.542057262, 1.163499412, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 3L,
0.6771451456, 1.766014623, 0.498124717, 1.106559631, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 3L,
0.8058604216, 1.232431277, 0.525738889, 1.247517103, 4L, 4L, 4L, 2L, 2L, 4L, 4L, 2L,
0.7631409152, 1.867398846, 0.514537918, 1.77366006, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
0.5011997039, 1.932008974, 0.539240032, 0.971144082, 2L, 2L, 2L, 3L, 2L, 4L, 2L, 3L,
0.6515970494, 1.908695542, 0.508596723, 1.175178711, 4L, 5L, 5L, 3L, 5L, 4L, 5L, 3L,
0.7200863651, 1.444316115, 0.498819845, 1.188263069, 4L, 2L, 2L, 2L, 4L, 4L, 2L, 2L,
0.7757407509, 1.934798043, 0.506440704, 1.992863891, 4L, 2L, 4L, 2L, 1L, 5L, 1L, 3L,
0.7630974523, 1.787207177, 0.503453869, 1.483378124, 2L, 2L, 1L, 1L, 1L, 3L, 4L, 3L,
0.7281860587, 1.557906724, 0.519093897, 1.221386699, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
0.5614598528, 0.952831146, 0.516862798, 0.69749457, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
0.6993030525, 1.185515667, 0.504472913, 1.070678283, 3L, 3L, 2L, 3L, 3L, 3L, 4L, 2L,
0.9280756797, 1.179279874, 0.512671429, 1.945963563, 5L, 5L, 5L, 4L, 5L, 5L, 4L, 2L,
0.7300343289, 1.934375487, 0.567305168, 1.490715804, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
0.8408527947, 1.39682442, 0.566571286, 1.549509004, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
0.7997062171, 1.495689361, 0.525368548, 1.335113558, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
0.7028558026, 1.986365065, 0.519533601, 1.407212738, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
0.8820948698, 1.364926122, 0.552036758, 1.984006396, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
0.6696436468, 1.788972558, 0.525300454, 1.173941218, 1L, 1L, 1L, 1L, 4L, 4L, 2L, 3L
)
# CODE
mcor <- cor(datacorr)
p_mat <- cor_pmat(mcor)
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(mcor[1:4, , drop = FALSE],
method = "color", col = col(200),
type = "upper",
addCoef.col = "black", number.cex = 0.65, # Add coefficient of correlation
tl.col = "darkblue", tl.srt = 45, tl.cex = 0.7, # Text label color and rotation
# Combine with significance level
p.mat = p_mat, sig.level = 0.01,
# hide correlation coefficient on the principal diagonal
diag = FALSE
)
# TRIED SOLUTIONS
splitdata <- split(datacorr, datacorr$clusters)
datacorr1 <- data.frame(splitdata[1])
datacorr2 <- data.frame(splitdata[2])
datacorr3 <- data.frame(splitdata[3])
datacorr <- datacorr3 # select cluster to analyze
# Then re-run "GENERATE CORR MATRIX" part
Created on 2021-07-01 by the reprex package (v2.0.0)