How to create a loop in R to perform repeated comparisons

Hi,

I have a question about creating a loop to perform repeated calculations in R. I have a (8 x 11) observations data matrix and would like to calculate the p-values for each column (Gene_A, Gene_B, Gene_C, etc.,) compared to a reference column (Reference). Is there a way to loop this and out the output the (p-values) to a separate column or saved separately. I have a large matrix consisting of (10000*100) dimensions and performing the calculations manually would be tedious. Please assist me with this.

dput(Dat_mat)
structure(list(Gene_A = c(16.26875, 21.374, 35.7917, 21.01615, 
                          22.5471, 53.78655, 8.2572, 11.24755), Gene_B = c(16.26875, 21.374, 
                                                                           35.7917, 21.01615, 22.5471, 53.78655, 8.2572, 11.24755), Gene_C = c(30.8274, 
                                                                                                                                               112.1106, 126.3887, 104.7907, 122.3795, 90.0362, 50.6777, 100.9305
                                                                           ), Gene_D = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146, 
                                                                                         283.54685, 153.041325, 176.373225), Gene_E = c(111.2367, 252.9354, 
                                                                                                                                        215.32245, 112.6046, 203.8146, 283.54685, 153.041325, 176.373225
                                                                                         ), Gene_F = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146, 
                                                                                                       283.54685, 153.041325, 176.373225), Gene_G = c(35.3882, 53.4914, 
                                                                                                                                                      88.0871, 56.443, 63.7323, 49.4972, 37.0928, 53.0731), Gene_H = c(41.27, 
                                                                                                                                                                                                                       24.0781, 86.3814, 41.8651, 65.2544, 145.7944, 77.7312, 36.7819
                                                                                                                                                      ), Gene_I = c(31.7941, 51.24006667, 40.5937, 27.23903333, 33.96723333, 
                                                                                                                                                                    63.5719, 31.3466, 49.88686667), Gene_J = c(31.7941, 51.24006667, 
                                                                                                                                                                                                               40.5937, 27.23903333, 33.96723333, 63.5719, 31.3466, 49.88686667
                                                                                                                                                                    ), Reference = c(33.59115, 52.365733335, 87.23425, 49.15405, 
                                                                                                                                                                                     64.49335, 76.80405, 43.88525, 51.479983335)), class = "data.frame", row.names = c("Sample_1", 
                                                                                                                                                                                                                                                                       "Sample_2", "Sample_3", "Sample_4", "Sample_5", "Sample_6", "Sample_7", 
                                                                                                                                                                                                                                                                       "Sample_8"))
#>            Gene_A   Gene_B   Gene_C   Gene_D   Gene_E   Gene_F  Gene_G   Gene_H
#> Sample_1 16.26875 16.26875  30.8274 111.2367 111.2367 111.2367 35.3882  41.2700
#> Sample_2 21.37400 21.37400 112.1106 252.9354 252.9354 252.9354 53.4914  24.0781
#> Sample_3 35.79170 35.79170 126.3887 215.3225 215.3225 215.3225 88.0871  86.3814
#> Sample_4 21.01615 21.01615 104.7907 112.6046 112.6046 112.6046 56.4430  41.8651
#> Sample_5 22.54710 22.54710 122.3795 203.8146 203.8146 203.8146 63.7323  65.2544
#> Sample_6 53.78655 53.78655  90.0362 283.5469 283.5469 283.5469 49.4972 145.7944
#> Sample_7  8.25720  8.25720  50.6777 153.0413 153.0413 153.0413 37.0928  77.7312
#> Sample_8 11.24755 11.24755 100.9305 176.3732 176.3732 176.3732 53.0731  36.7819
#>            Gene_I   Gene_J Reference
#> Sample_1 31.79410 31.79410  33.59115
#> Sample_2 51.24007 51.24007  52.36573
#> Sample_3 40.59370 40.59370  87.23425
#> Sample_4 27.23903 27.23903  49.15405
#> Sample_5 33.96723 33.96723  64.49335
#> Sample_6 63.57190 63.57190  76.80405
#> Sample_7 31.34660 31.34660  43.88525
#> Sample_8 49.88687 49.88687  51.47998


### Obtain p-values

results = t.test(Dat_mat$Gene_A, Dat_mat$Reference)
results$p.value
[1] 0.001095952
results = t.test(Dat_mat$Gene_B, Dat_mat$Reference)
results$p.value
[1] 0.001095952
results = t.test(Dat_mat$Gene_C, Dat_mat$Reference)
results$p.value
[1] 0.02739666
results = t.test(Dat_mat$Gene_D, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_E, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_F, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_G, Dat_mat$Reference)
results$p.value
0.7507277
results = t.test(Dat_mat$Gene_H, Dat_mat$Reference)
results$p.value
 0.6314491
results = t.test(Dat_mat$Gene_I, Dat_mat$Reference)
results$p.value
 0.05589539
results = t.test(Dat_mat$Reference, Dat_mat$Reference)
results$p.value
 1

Created on 2021-11-16 by the reprex package (v2.0.1)

Here is one method using lapply().

Dat_mat <- structure(list(Gene_A = c(16.26875, 21.374, 35.7917, 21.01615, 
                                     22.5471, 53.78655, 8.2572, 11.24755), 
                          Gene_B = c(16.26875, 21.374, 
                                     35.7917, 21.01615, 22.5471, 53.78655, 8.2572, 11.24755), 
                          Gene_C = c(30.8274, 
                                     112.1106, 126.3887, 104.7907, 122.3795, 90.0362, 50.6777, 100.9305
                          ), 
                          Gene_D = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146, 
                                     283.54685, 153.041325, 176.373225), 
                          Gene_E = c(111.2367, 252.9354, 
                                     215.32245, 112.6046, 203.8146, 283.54685, 153.041325, 176.373225
                          ), 
                          Gene_F = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146, 
                                     283.54685, 153.041325, 176.373225), 
                          Gene_G = c(35.3882, 53.4914, 
                                     88.0871, 56.443, 63.7323, 49.4972, 37.0928, 53.0731), 
                          Gene_H = c(41.27, 
                                     24.0781, 86.3814, 41.8651, 65.2544, 145.7944, 77.7312, 36.7819
                          ), 
                          Gene_I = c(31.7941, 51.24006667, 40.5937, 27.23903333, 33.96723333, 
                                     63.5719, 31.3466, 49.88686667), 
                          Gene_J = c(31.7941, 51.24006667, 
                                     40.5937, 27.23903333, 33.96723333, 63.5719, 31.3466, 49.88686667
                          ), 
                          Reference = c(33.59115, 52.365733335, 87.23425, 49.15405, 
                                        64.49335, 76.80405, 43.88525, 51.479983335)), class = "data.frame", row.names = c("Sample_1", 
                                                                                                                          "Sample_2", "Sample_3", "Sample_4", "Sample_5", "Sample_6", "Sample_7", 
                                                                                                                          "Sample_8"))
PvalFunc <- function(Col){
  t.test(Col,Dat_mat$Reference)$p.value
}
Result <- lapply(Dat_mat,FUN = PvalFunc)
Result
#> $Gene_A
#> [1] 0.001095952
#> 
#> $Gene_B
#> [1] 0.001095952
#> 
#> $Gene_C
#> [1] 0.02739666
#> 
#> $Gene_D
#> [1] 0.0004201088
#> 
#> $Gene_E
#> [1] 0.0004201088
#> 
#> $Gene_F
#> [1] 0.0004201088
#> 
#> $Gene_G
#> [1] 0.7507277
#> 
#> $Gene_H
#> [1] 0.6314491
#> 
#> $Gene_I
#> [1] 0.05589539
#> 
#> $Gene_J
#> [1] 0.05589539
#> 
#> $Reference
#> [1] 1

Created on 2021-11-16 by the reprex package (v2.0.1)

@FJCC , thank you very much for the inputs. This is very helpful, and solved my query.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.