It can be done with patience (I estimate it would take around 3-4 years on my machine). Calculating the test statistics pairwise on 9,000 variables involves applying cor.test
40,495,500 times: g_1,g_2 \dots g_1,g_{9000} then g_2, g_3 \dots g_2,g_{9000} \dots g_{8999},g_{9000}.
And then there's all that review to be done.
# fake some data for two-gene case
fakes <- matrix(sample(19:45,10000,replace = TRUE),nrow = 100, ncol = 2)
#> Warning in matrix(sample(19:45, 10000, replace = TRUE), nrow = 100, ncol = 2):
#> data length differs from size of matrix: [10000 != 100 x 2]
head(fakes)
#> [,1] [,2]
#> [1,] 40 42
#> [2,] 30 37
#> [3,] 21 29
#> [4,] 32 24
#> [5,] 24 23
#> [6,] 34 28
# an individual record won't work
cor.test(fakes[1,1],fakes[1,2])
#> Error in cor.test.default(fakes[1, 1], fakes[1, 2]): not enough finite observations
# test is designed to operate this way
cor.test(fakes[,1],fakes[,2])
#>
#> Pearson's product-moment correlation
#>
#> data: fakes[, 1] and fakes[, 2]
#> t = -0.54353, df = 98, p-value = 0.588
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> -0.2485638 0.1431372
#> sample estimates:
#> cor
#> -0.05482221
# or subsets
cor.test(fakes[1:1000,1],fakes[1:1000,2])
#> Error in fakes[1:1000, 1]: subscript out of bounds
# rerun with fakes of dim 100, 9000
fakes <- matrix(sample(19:45,100000,replace = TRUE),nrow = 100, ncol = 9000)
cor.test(fakes[,1],fakes[,2])
#>
#> Pearson's product-moment correlation
#>
#> data: fakes[, 1] and fakes[, 2]
#> t = -0.39724, df = 98, p-value = 0.6921
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> -0.2346647 0.1575643
#> sample estimates:
#> cor
#> -0.04009468
cor.test(fakes[,1],fakes[,3]) # etc
#>
#> Pearson's product-moment correlation
#>
#> data: fakes[, 1] and fakes[, 3]
#> t = 1.193, df = 98, p-value = 0.2357
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> -0.07861747 0.30880901
#> sample estimates:
#> cor
#> 0.1196482
# cor.test on all genes against each other gene
# pairwise
pairs <- combn(9000,2)
dim(pairs)
#> [1] 2 40495500
pairs[1:2,1:3]
#> [,1] [,2] [,3]
#> [1,] 1 1 1
#> [2,] 2 3 4
# it can be done, this way
mass_cor_test <- function(x) cor.test(fakes[,pairs[1,x]],fakes[,pairs[2,x]])
result <- microbenchmark::microbenchmark(
l <- sapply(pairs[1:4],mass_cor_test), times = 10
)
#> Warning in microbenchmark::microbenchmark(l <- sapply(pairs[1:4],
#> mass_cor_test), : less accurate nanosecond times to avoid potential integer
#> overflows
l
#> [,1]
#> statistic -0.3972365
#> parameter 98
#> p.value 0.6920569
#> estimate -0.04009468
#> null.value 0
#> alternative "two.sided"
#> method "Pearson's product-moment correlation"
#> data.name "fakes[, pairs[1, x]] and fakes[, pairs[2, x]]"
#> conf.int numeric,2
#> [,2]
#> statistic 1.193027
#> parameter 98
#> p.value 0.235739
#> estimate 0.1196482
#> null.value 0
#> alternative "two.sided"
#> method "Pearson's product-moment correlation"
#> data.name "fakes[, pairs[1, x]] and fakes[, pairs[2, x]]"
#> conf.int numeric,2
#> [,3]
#> statistic -0.3972365
#> parameter 98
#> p.value 0.6920569
#> estimate -0.04009468
#> null.value 0
#> alternative "two.sided"
#> method "Pearson's product-moment correlation"
#> data.name "fakes[, pairs[1, x]] and fakes[, pairs[2, x]]"
#> conf.int numeric,2
#> [,4]
#> statistic -1.143017
#> parameter 98
#> p.value 0.255816
#> estimate -0.1147002
#> null.value 0
#> alternative "two.sided"
#> method "Pearson's product-moment correlation"
#> data.name "fakes[, pairs[1, x]] and fakes[, pairs[2, x]]"
#> conf.int numeric,2
result
#> Unit: microseconds
#> expr min lq mean median
#> l <- sapply(pairs[1:4], mass_cor_test) 209.797 213.897 1236.396 216.1725
#> uq max neval
#> 231.158 10390.96 10
Created on 2023-02-07 with reprex v2.0.2