If anyone can help solve all the errors, or if provide an easier way to estimate the parameters...
library(deSolve) # using the "ode" function
#initial Values
initial_values <- c(
S <- 327200000, #Initial Susceptible 66575226 (England total population)
E <- 10, #Initial Exposed at time=0
I <- 1, #Initial Infected at time=0
R <- 0, #Initial Recovered at time=0
D <- 0 #Initial Deaths at time=0
)
parameters_values <- c(
alpha = 1/997500000,
beta = 1/1000, # infectious contact rate (/person/day)
gamma = 1/14, # recovery rate (/day)
eta = 1/280
)
time_values <- seq(0, 113) # days
SEIRD <- function(S, E, I, R, D,
alpha, beta, gamma, eta,
n1=200){
Out1 <- matrix(0, ncol = 5, nrow = n1)
for(i in 1:n1){
Sn <- S
En <- E
In <- I
Rn <- R
Dn <- D
S <- max(0, Sn - alpha*En*Sn)
E <- max(0, En + alpha*En*Sn - gamma*En) #Exposed group
I <- max(0, In + gamma*En - beta*In - eta*In)
R <- max(0, Rn + eta*In)
D <- max(0, Dn + eta*In)
Out1[i,1] <- S
Out1[i,2] <- E
Out1[i,3] <- I
Out1[i,4] <- R
Out1[i,5] <- D
}
Out2 <- data.frame (S = Out1[,1],
E = Out1[,2],
I = Out1[,3],
R = Out1[,4],
D = Out1[,5])
return(Out2)
}
alpha = 1/997500000
beta = 1/1000 # infectious contact rate (/person/day)
gamma = 1/14 # recovery rate (/day)
eta = 1/280
Out1 <- SEIRD( S, E, I, R, D,
alpha, beta, gamma, eta,
n1 = 200)
plot(1:200, Out1$S,
type = "l",
ylim = c( 0,997500000),
col = "purple")
lines(1:200, Out1$E, col = "orange")
lines(1:200, Out1$I, col = "red")
lines(1:200, Out1$R, col = "seagreen")
lines(1:200, Out1$D, col = "black")
# adding a legend:
legend("right", c("Susceptible", "Exposed","Infected", "recovered", "Deceased"),
col = c("purple", "orange", "red", "seagreen", "black"), lty = 1, bty = "n")
sir_equations <- function(time, variables, parameters) {
Sn <- S
En <- E
In <- I
Rn <- R
Dn <- D
with(as.list(c(variables, parameters)), {
S <- max(0, Sn - alpha*En*Sn)
E <- max(0, En + alpha*En*Sn - gamma*En) #Exposed group
I <- max(0, In + gamma*En - beta*In - eta*In)
R <- max(0, Rn + eta*In)
D <- max(0, Dn + eta*In)
return(list(c(S, E, I, R, D)))
})
}
sir_values_1 <- ode(
y = initial_values,
times = time_values,
func = sir_equations,
parms = parameters_values
)
sir_values_1
sir_values_1 <- as.data.frame(sir_values_1)
sir_values_1
(66575226 + 1) * parameters_values["beta"] / parameters_values["gamma"]
sir_1 <- function(alpha, beta, gamma, eta, S0, E0, I0, R0, D0, times) {
require(deSolve) # for the "ode" function
# the differential equations:
sir_equations <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
S <- max(0, Sn - alpha*En*Sn)
E <- max(0, En + alpha*En*Sn - gamma*En) #Exposed group
I <- max(0, In + gamma*En - beta*In - eta*In)
R <- max(0, Rn + eta*In)
D <- max(0, Dn + eta*In)
return(list(c(S, E, I, R, D)))
})
}}
# the parameters values:
parameters_values <- c(alpha = alpha, beta = beta, gamma = gamma, eta = eta )
# the initial values of variables:
initial_values <- c( S0 <- S, E0 <- E, I0 <- I, R0 <- R, D0 <- D)
# solving
time_values <- seq(0, 113) # days
times <- time_values
out <- ode(initial_values, times, sir_equations, parameters_values)
# returning the output:
as.data.frame(out)
flu <- read.csv("COVIDdataset1.csv",
header = TRUE)
with(flu, plot(Date, Cases, pch = 19, col = "red", ylim = c(0, 1)))
predictions <- sir_1(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280, S0 = 1000, E0 = 10, I0 = 1, R0 = 0, D0 = 0, times = flu$Date)
with(predictions, lines(time, I, col = "red"))
model_fit <- function(alpha, beta, data, N = 327200000, ...) {
I0 <- data$Cases[1] # initial number of infected (from data)
times <- data$Date # time points (from data)
# model's predictions:
predictions <- sir_1 (alpha = alpha, beta = beta, gamma = gamma, eta = eta, # parameters
S0 = N - I0, E0 = E0, I0 = I0, R0 = 0, D = D0, # variables' intial values
times = time_values) # time points
# plotting the observed prevalences:
with(data, plot(Date, Cases, ...))
# adding the model-predicted prevalence:
with(predictions, lines(time, I, col = "red"))
}
model_fit(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280, flu, pch = 19, col = "red", ylim = c(0, 100000))
predictions <- sir_1(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280, S0 = 1000, E0 = 10, I0 = 1, R0 = 0, D0 = 0, times = flu$Date)
predictions
sum((predictions$I - flu$Cases)^2)
# the observed prevalences:
with(flu, plot(Date, Cases, pch = 19, col = "red", ylim = c(0, 1000)))
# the model-predicted prevalences:
with(predictions, lines(time, I, col = "red", type = "o"))
# the "errors":
segments(flu$Date, flu$Cases, predictions$time, predictions$I)
ss <- function(alpha, beta, data = flu, N = 763) {
I0 <- data$Cases[1]
times <- data$Date
predictions <- sir_1(alpha = alpha, beta = beta, gamma = gamma, eta = eta, # parameters
S0 = N - I0, E0 = E0, I0 = I0, R0 = 0, D = D0, # variables' intial values
times = time_values) # time points
sum((predictions$I[-1] - data$Cases[-1])^2)
}
ss(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280)
alpha_val <- seq(from = 0.0016, to = 1/1000, le = 100)
ss_val <- sapply(alpha_val, ss, beta = 0.5)
min_ss_val <- min(ss_val)
min_ss_val
alpha_hat <- alpha_val[ss_val == min_ss_val]
alpha_hat
plot(alpha_val, ss_val, type = "l", lwd = 2,
xlab = expression(paste("infectious contact rate ", alpha)),
ylab = "sum of squares")
# adding the minimal value of the sum of squares:
abline(h = min_ss_val, lty = 2, col = "grey")
# adding the estimate of alpha:
abline(v = alpha_hat, lty = 2, col = "grey")
beta_val <- seq(from = 0.4, to = 0.575, le = 100)
ss_val <- sapply(beta_val, function(x) ss(alpha_hat, x))
(min_ss_val <- min(ss_val))
(beta_hat <- beta_val[ss_val == min_ss_val])
plot(beta_val, ss_val, type = "l", lwd = 2,
xlab = expression(paste("recovery rate ", beta)),
ylab = "sum of squares")
abline(h = min_ss_val, lty = 2, col = "grey")
abline(v = beta_hat, lty = 2, col = "grey")
n <- 10 # number of parameter values to try
alpha_val <- seq(from = 0.002, to = 0.0035, le = n)
beta_val <- seq(from = 0.3, to = 0.65, le = n)
param_val <- expand.grid(alpha_val, beta_val)
ss_val <- with(param_val, Map(ss, Var1, Var2))
ss_val <- matrix(unlist(ss_val), n)
persp(alpha_val, beta_val, -ss_val, theta = 40, phi = 30,
xlab = "alpha", ylab = "beta", zlab = = "-sum of squares")
n <- 30 # number of parameters values
alpha_val <- seq(from = 0.002, to = 0.0035, le = n)
beta_val <- seq(from = 0.3, to = 0.65, le = n)
# calculating the sum of squares:
param_val <- expand.grid(alpha_val, beta_val)
ss_val <- with(param_val, Map(ss, Var1, Var2))
ss_val <- unlist(ss_val)
# minimum sum of squares and parameters values:
(ss_min <- min(ss_val))
ind <- ss_val == ss_min
(alpha_hat <- param_val$Var1[ind])
(beta_hat <- param_val$Var2[ind])
# visualizing the sum of squares profile:
ss_val <- matrix(ss_val, n)
image(alpha_val, beta_val, ss_val,
xlab = expression(paste("infectious contact rate ", alpha, " (/person/Date)")),
ylab = expression(paste("recovery rate ", beta, " (/Date)")))
contour(alpha_val, beta_val,ss_val, add = TRUE)
points(alpha_hat, beta_hat, pch = 3)
box(bty = "o")
ss(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280)
ss2 <- function(x) {
ss(alpha = x[1], beta = x[2], gamma = x[3], eta = x[4])
}
ss2(c(1/997500000, 1/1000, 1/14, 1/128))
starting_param_val <- c(1/997500000, 1/1000, 1/14, 1/128)
ss_optim <- optim(starting_param_val, ss2)
ss_optim
ss_optim$value
ss_optim$par
mLL <- function(alpha, beta, sigma, Date, Cases, N = 763) {
alpha <- exp(alpha) # to make sure that the parameters are positive
beta <- exp(beta)
sigma <- exp(sigma)
I0 <- Cases[1] # initial number of infectious
observations <- Cases[-1] # the fit is done on the other data points
predictions <- sir_1(alpha = alpha, beta = beta, gamma = gamma, eta = eta,
S0 = N - I0, E0 = E0 I0 = I0, R0 = 0, D = D0, times = Date)
predictions <- predictions$I[-1] # removing the first point too
# returning minus log-likelihood:
-sum(dnorm(x = observations, mean = predictions, sd = sigma, log = TRUE))
}
library(bbmle) # for "mle2", "coef", "confint", "vcov", "logLik", "profile", "summary", "plot.profile.mle2"
starting_param_val <- list(alpha = 1/997500000, beta = 1/1000, gamma = 1/14, eta = 1/280, sigma = 1)
estimates <- mle2(minuslogl = mLL, start = lapply(starting_param_val, log),
method = "Nelder-Mead", data = c(flu, N = 763))
summary(estimates)
exp(coef(estimates))
exp(confint(estimates))
vcov(estimates)
-logLik(estimates)
prof <- profile(estimates)
plot(prof, main = NA)
N <- 10000 # total population size
time_points <- seq(min(flu$Date), max(flu$Date), le = 100) # vector of time points
I0 <- flu$Cases[1] # initial number of infected
param_hat <- exp(coef(estimates)) # parameters estimates
# model's best predictions:
best_predictions <- sir_1(alpha = param_hat["alpha"], beta = param_hat["beta"], gamma = param_hat["gamma"], eta = param_hat["eta"],
S0 = N - I0, E0 = E0 I0 = I0, R0 = 0, D = D0, time_points)$I
# confidence interval of the best predictions:
cl <- 0.95 # confidence level
cl <- (1 - cl) / 2
lwr <- qnorm(p = cl, mean = best_predictions, sd = param_hat["sigma"])
upr <- qnorm(p = 1 - cl, mean = best_predictions, sd = param_hat["sigma"])
# layout of the plot:
plot(time_points, time_points, ylim = c(0, max(upr)), type = "n",
xlab = "time (Dates)", ylab = "prevalence")
# adding the predictions' confidence interval:
sel <- time_points >= 1 # predictions start from the second data point
polygon(c(time_points[sel], rev(time_points[sel])), c(upr[sel], rev(lwr[sel])),
border = NA, col = adjustcolor("red", alpha.f = 0.1))
# adding the model's best predictions:
lines(time_points, best_predictions, col = "red")
# adding the observed data:
with(flu, points(Date, Cases, pch = 19, col = "red"))
#Poisson Distribution of Errors
#mLL <- function(alpha, beta, sigma, Date, Cases, N = 10000) {
mLL_pois <- function(alpha, Date, Cases, N = 1000) {
alpha <- exp(alpha) # to make sure that the parameters are positive
beta <- 1/1000
gamma <- 1/14
eta <- 1/280
# sigma <- exp(sigma)
I0 <- Cases[1] # initial number of infectious
observations <- Cases[-1] # the fit is done on the other data points
predictions <- sir_1(alpha = alpha, beta = 1/1000, gamma = 1/14, eta = 1/280,
S0 = N - I0, E0 = E0 I0 = I0, R0 = 0, D = D0, times = Date)
predictions <- predictions$I[-1] # removing the first point too
if (any(predictions < 0)) return(NA) # safety
# returning minus log-likelihood:
# -sum(dnorm(x = observations, mean = predictions, sd = sigma, log = TRUE))
-sum(dpois(x = observations, lambda = predictions, log = TRUE))
}
starting_param_val <- list(alpha = 1/997500000)
estimates_pois <- mle2(minuslogl = mLL_pois,
start = lapply(starting_param_val, log),
data = c(flu$Cases, N = 1000))
exp(coef(estimates))
exp(coef(estimates_pois))
exp(confint(estimates))
exp(confint(estimates_pois))
vcov(estimates_pois)
# points estimates:
param_hat <- exp(coef(estimates_pois))
# model's best predictions:
best_predictions <- sir_1(alpha = param_hat["alpha"], beta = param_hat["beta"],
S0 = N - I0, E0 = E0 I0 = I0, R0 = 0, D = D0, time_points)$I
# confidence interval of the best predictions:
cl <- 0.95 # confidence level
cl <- (1 - cl) / 2
lwr <- qpois(p = cl, lambda = best_predictions)
upr <- qpois(p = 1 - cl, lambda = best_predictions)
# layout of the plot:
plot(time_points, time_points, ylim = c(0, max(upr)), type = "n",
xlab = "time (Dates)", ylab = "prevalence")
# adding the predictions' confidence interval:
sel <- time_points >= 1 # predictions start from the second data point
polygon(c(time_points[sel], rev(time_points[sel])), c(upr[sel], rev(lwr[sel])),
border = NA, col = adjustcolor("red", alpha.f = 0.1))
# adding the model's best predictions:
lines(time_points, best_predictions, col = "red")
# adding the observed data:
with(flu, points(Date, Cases, pch = 19, col = "red"))
-logLik(estimates)
-logLik(estimates_pois)