Processes are slower when I multiply matrices by 10, and faster when I divide by 10

wumaan · September 23, 2020, 12:13am

In R, I am setting up some mutation simulations on a codon sequence. I define a matrix R which is a discrete time Markov chain from which mutation probabilities are defined (r_{ij} is the probability of codon j mutation to codon i given a mutation will happen). The entries of this matrix tell us what the weightings are for a probability distribution so we know what mutation will happen at a site in a particular state. If the entries of the appropriate column R do not add to 1, this is fine as the sample function can normalise the weightings.

The issue I am having is when I multiply R by 10, the process is much slower even though each number in the matrix has the same number of significant figures. Conversely, when I multiply R by 0.1, the process is much faster. I have tried this on two computers and have had the same result.

Thanks in advance for your help!

Here is all my code (sorry it isn't written very nicely). And I know the rate and transition matrices aren't particularly well defined at this stage but I doubt that would fix the problem.

'''
r
library(expm)
library(profvis)

cn <- function(st) {
ans <- strsplit(st,split="")[[1]]
ans <- match(ans,statesD)
i <- ans[1]
j <- ans[2]
k <- ans[3]
sum(c(4^2,4,1)*(c(i,j,k)-1))+1
}

codonDiags <- function(codonsInSeq,Rmatrix){
rateVector <- c()
for (i in 1:length(codonsInSeq)){
rate <- Rmatrix[codonsInSeq[i],codonsInSeq[i]]
rateVector <- append(rateVector,rate)
}
return(rateVector)
}

sampleCodon <- function(probDist){
try(if (is.vector(probDist)==FALSE) stop('input for sampleCodon function should be a vector'))
sampleVector <- c()
for (i in 1:length(probDist)){
sampleVector <- append(sampleVector,i)
}
x <- sample(sampleVector,1,replace=TRUE,prob = probDist)
return(x)
}

mutateCodon <- function(codonsInSeq,sampleCodon,Rmatrix,codonList){
oldCodon <- codonsInSeq[sampleCodon]
newCodon <- sample(codonList,1,replace=TRUE, prob = Rmatrix[,oldCodon])
return(newCodon)
}

mutateSequenceSingle <- function(codonsInSeq,codonList,Rmatrix){
sampleCodon <- sampleCodon(codonDiags(codonsInSeq,Rmatrix))
newCodon <- mutateCodon(codonsInSeq,sampleCodon,Rmatrix,codonList)
codonsInSeq[sampleCodon] <- newCodon
newSequence <- paste(codonsInSeq,collapse='')
return(newSequence)
}

mutateSequence2 <- function(Rmatrix,endTime,codonsInSeq,codonList=codonList){
time=0
#numberOfEvents = 0
while (time < endTime){
rateOfMutations <- sum(codonDiags(codonsInSeq,Rmatrix))
timeElap <- rexp(1,rateOfMutations)
time <- time + timeElap
#numberOfEvents <- numberOfEvents +1
sequence <- mutateSequenceSingle(codonsInSeq,codonList,Rmatrix)
}
return(sequence)
}

codonLociInG <- function(groupList,codonList){
listOfLoci <- integer(length(codonList))
for (i in 1:length(codonList)){
for (j in 1:length(groupList)){
if (i %in% g[[j]]){
listOfLoci[i] <- j
break
}
}
}
return(listOfLoci)
}

sMatGen <- function(sVector,polarityV,groupList,aaList,codonList){
sMat <- matrix(,nrow=length(codonList),ncol = length(codonList))
codLocInG <- codonLociInG(groupList,codonList)
for (i in 1:(length(codonList)-1)){
for (j in (i+1):length(codonList)){
if (codLocInG[i] == codLocInG[j]){
if (polarityV[codLocInG[i]] == 'p'){
sMat[j,i] <-sVector[5]
}
else if (polarityV[codLocInG[j]]=='h') {
sMat[j,i] <- sVector[6]
}
else if (polarityV[codLocInG[j]]=='STOP'){
sMat[j,i] <- 0
}
else{
print('error as the codon isnt coming up as p or h')
}
}
else {
if (polarityV[codLocInG[i]]=='p'){
if(polarityV[codLocInG[j]]=='p'){
sMat[j,i] <- sVector[1]
}
else if (polarityV[codLocInG[j]]=='h'){
sMat[j,i] <- sVector[2]
}
else if (polarityV[codLocInG[j]]=='STOP'){
sMat[j,i] <- 0
}
else {
print('error in the for loops')
}
}
else if (polarityV[codLocInG[i]]=='h'){
if (polarityV[codLocInG[j]]=='p'){
sMat[j,i] <- sVector[3]
}
else if (polarityV[codLocInG[j]]=='h') {
sMat[j,i] <- sVector[4]
}
else if (polarityV[codLocInG[j]]=='STOP'){
sMat[j,i] <- 0
}
else {
print('error in the for loops in the other place')
}
}
else if (polarityV[codLocInG[i]] =='STOP'){
sMat[j,i] <- 0
}
else {
print('third place for an error in the for loops for this function.')
}
}
}
}
return(sMat)
}

codonsFromSeq <- c("GAU", "CAG", "ACC", "GGC", "CAA", "AGG", "GGC" ,"ACA", "AAA", "GAA")
codonList <- c("AAA", "AAG", "AAC", "AAU", "AGA", "AGG", "AGC", "AGU", "ACA", "ACG", "ACC", "ACU", "AUA", "AUG", "AUC", "AUU", "GAA", "GAG", "GAC", "GAU", "GGA", "GGG", "GGC", "GGU" ,"GCA", "GCG", "GCC", "GCU", "GUA", "GUG", "GUC", "GUU", "CAA", "CAG", "CAC", "CAU", "CGA", "CGG","CGC", "CGU", "CCA", "CCG", "CCC", "CCU", "CUA", "CUG", "CUC", "CUU", "UAA", "UAG" ,"UAC", "UAU", "UGA", "UGG", "UGC", "UGU", "UCA","UCG", "UCC", "UCU", "UUA", "UUG", "UUC" ,"UUU")

statesD <- c('A','G','C','U')

g <- list()
g[[1]] <- c(cn("GCU"),cn("GCC"),cn("GCA"),cn("GCG"))
g[[2]] <- c(cn("CGU"),cn("CGC"),cn("CGA"),cn("CGG"),cn("AGA"),cn("AGG"))
g[[3]] <- c(cn("AAU"),cn("AAC"))
g[[4]] <- c(cn("GAU"),cn("GAC"))
g[[5]] <- c(cn("UGU"),cn("UGC"))
g[[6]] <- c(cn("CAA"),cn("CAG"))
g[[7]] <- c(cn("GAA"),cn("GAG"))
g[[8]] <- c(cn("GGU"),cn("GGC"),cn("GGA"),cn("GGG"))
g[[9]] <- c(cn("CAU"),cn("CAC"))
g[[10]] <- c(cn("AUU"),cn("AUC"),cn("AUA"))
g[[11]] <- c(cn("UUA"),cn("UUG"),cn("CUU"),cn("CUC"),cn("CUA"),cn("CUG"))
g[[12]] <- c(cn("AAA"),cn("AAG"))
g[[13]] <- c(cn("AUG"))
g[[14]] <- c(cn("UUU"),cn("UUC"))
g[[15]] <- c(cn("CCU"),cn("CCC"),cn("CCA"),cn("CCG"))
g[[16]] <- c(cn("UCU"),cn("UCC"),cn("UCA"),cn("UCG"),cn("AGU"),cn("AGC"))
g[[17]] <- c(cn("ACU"),cn("ACC"),cn("ACA"),cn("ACG"))
g[[18]] <- c(cn("UGG"))
g[[19]] <- c(cn("UAU"),cn("UAC"))
g[[20]] <- c(cn("GUU"),cn("GUC"),cn("GUA"),cn("GUG"))
#STOP codons
g[[21]] <- c(cn("UAA"),cn("UAG"),cn("UGA"))

fixDiagonalPos <- function(Q) {
n <- dim(Q)[1]
if (dim(Q)[2]!=n) {
stop("Expecting a square matrix")
}
diag(Q)<-0
for (i in 1:n) Q[i,i] <- sum(Q[,i])
return(Q)
}

rateM <-matrix(c(-1,1,1,1,1,-1,1,1,1,1,-1,1,1,1,1,-1),ncol=4,nrow=4,byrow=TRUE)

polarityVdna <- c('h','p','p','p','h','p','p','h','p','h','h','p','h','h','h','p','p','h','p','h','STOP')

codonRate <- kronecker(kronecker(rateM,diag(4)),diag(4))+ kronecker(kronecker(diag(4),rateM),diag(4))+ kronecker(kronecker(diag(4),diag(4)),rateM)
t <- 0.5
aminoacidsStop = c('A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V','STOP')

R2 <- codonRate * sMatGen(runif(6,0.9,1.2),polarityVdna,g,aminoacidsStop,codonList)
R2[is.na(R2)] <- 0

R2 <- R2 + t(R2)
R2 <- fixDiagonalPos(R2)

rownames(R2) <- codonList
colnames(R2) <- codonList

R4 <- 0.1R2
R5 <- 0.01R2
R6 <- 0.001*R2

profvis(replicate(n=10000,mutatedSequence2 <-mutateSequence2(R2,t,codonsFromSeq,codonList)))
profvis(replicate(n=10000,mutatedSequence2 <-mutateSequence2(R4,t,codonsFromSeq,codonList)))
profvis(replicate(n=10000,mutatedSequence2 <-mutateSequence2(R5,t,codonsFromSeq,codonList)))

'''

technocrat · September 23, 2020, 12:39am

Code this long cannot be readily evaluated without a minimal working example, preferably in the form of a reprex. This has the benefit of scrubbing for situations such as

> codonsInSeq <- c()
> condonInSeq
Error: object 'condonInSeq' not found

wumaan · September 23, 2020, 2:26am

Hi Technocrat,

Sorry, I'm a bit of a noob at this, thanks for the advice!

I've cut it back as much as I can (without manually defining a 64*64 matrix) and have checked to make sure it actually works.

system · October 14, 2020, 2:27am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.