I have used the mixed_model function from GLMMadaptive to run a model with fixed effects and a random effect to account for clustering. I have extracted the ICC using icc() from performance. I want to estimate the ICC confidence intervals.
I know that other functions are available to do this that are compatible with lme4 models, but they don't seem to apply to GLMMadaptive models. (note that I have been advised to use GLMMadaptive rather than lme4 due to the differences in approximation methods.)
In the meantime, I have used simulate() to try to estimate ICC. This involved simulating new data from my model, re-fitting a new model to simulated data, and extracting parameters of interest. It looks like my code to update the model (newfit = update(m, newy ~ .) is not working and it produces warnings. When I explore the model output, the model output/estimates are the same as the original model and the ICC confidence interval output are just the ICC value.
In summary, my questions are:
- Is simulate() the right approach to estimate ICC confidence intervals from my model (or is there another specific function that will estimate the parameters)?
- If simulate() is the right approach, how can I fix my code to produce meaningful/correct estimates?
packages
library(magrittr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(GLMMadaptive)
library(performance)
library(reprex)
#Create dataset
set.seed(432)
dummy_vec <- rbinom(400, 1, prob = 0.3) # Create dummy vector
head(dummy_vec) # Print dummy vector
#> [1] 0 0 1 0 1 1
dummy_mat <- matrix(dummy_vec, ncol = 4) # Convert vector to matrix
data <- as.data.frame(dummy_mat) # Convert matrix to data frame
cols<-c("2","3","4")
data %<>%
mutate_each_(funs(factor(.)),cols) # Convert columns to factors
#> Warning: mutate_each_()
was deprecated in dplyr 0.7.0.
#> Please use across()
instead.
#> This warning is displayed once every 8 hours.
#> Call lifecycle::last_lifecycle_warnings()
to see where this warning was generated.
#> Warning: funs()
was deprecated in dplyr 0.8.0.
#> Please use a list of either functions or lambdas:
#>
#> # Simple named list:
#> list(mean = mean, median = median)
#>
#> # Auto named with tibble::lst()
:
#> tibble::lst(mean, median)
#>
#> # Using lambdas
#> list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
#> This warning is displayed once every 8 hours.
#> Call lifecycle::last_lifecycle_warnings()
to see where this warning was generated.
str(data)
#> 'data.frame': 100 obs. of 4 variables:
#> V1: int 0 0 1 0 1 1 1 0 0 1 ...
#> V2: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 2 2 1 ...
#> V3: Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
#> V4: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
names(data)[1] <- "distress" # Rename column names
names(data)[2] <- "mhhx"
names(data)[3] <- "gender"
names(data)[4] <- "exposure"
data$school<-sample(100, size = nrow(data), replace = TRUE) #Add new column
head(data) # Visualise data
#> distress mhhx gender exposure school
#> 1 0 0 0 0 4
#> 2 0 1 1 0 20
#> 3 1 1 0 0 21
#> 4 0 1 1 1 49
#> 5 1 0 1 0 94
#> 6 1 0 0 0 18
str(data)
#> 'data.frame': 100 obs. of 5 variables:
#> distress: int 0 0 1 0 1 1 1 0 0 1 ...
#> mhhx : Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 2 2 1 ...
#> gender : Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
#> exposure: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
#> $ school : int 4 20 21 49 94 18 45 23 38 53 ...
#Run mixed model
m<-mixed_model(fixed=distress~mhhx+gender+exposure,random=~1|school,data=data, family="binomial")
summary(m)
#>
#> Call:
#> mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1 |
#> school, data = data, family = "binomial")
#>
#> Data Descriptives:
#> Number of Observations: 100
#> Number of Groups: 67
#>
#> Model:
#> family: binomial
#> link: logit
#>
#> Fit statistics:
#> log.Lik AIC BIC
#> -62.2027 134.4054 145.4289
#>
#> Random effects covariance matrix:
#> StdDev
#> (Intercept) 0.1314838
#>
#> Fixed effects:
#> Estimate Std.Err z-value p-value
#> (Intercept) -0.3308 0.3136 -1.0547 0.291551
#> mhhx1 -0.9077 0.5221 -1.7387 0.082086
#> gender1 -0.3253 0.4960 -0.6559 0.511905
#> exposure1 -0.0532 0.4749 -0.1121 0.910769
#>
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#>
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE
#Extract icc
icc(m)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.005
#> Conditional ICC: 0.005
#Simulate new data from model and fit new model to simulated data
nsim = 10
results = rep(NA, nsim)
for (isim in 1:nsim) {
newy=GLMMadaptive::simulate(m)
newfit = update(m, newy ~ .)
results[isim] = icc(newfit)[[1]]
}
#> Warning in mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1
#> | : unknown names in control: formula
head(newy) # Visualise data
#> [,1]
#> [1,] 0
#> [2,] 1
#> [3,] 0
#> [4,] 0
#> [5,] 0
#> [6,] 0
str(newy)
#> num [1:100, 1] 0 1 0 0 0 0 0 0 1 0 ...
all(newy == m$data$distress) # Checking whether simulated data different to original data
#> [1] FALSE
summary(newfit) # Checking model (estimates are the same as m)
#>
#> Call:
#> mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1 |
#> school, data = data, family = "binomial", formula = newy ~
#> mhhx + gender + exposure)
#>
#> Data Descriptives:
#> Number of Observations: 100
#> Number of Groups: 67
#>
#> Model:
#> family: binomial
#> link: logit
#>
#> Fit statistics:
#> log.Lik AIC BIC
#> -62.2027 134.4054 145.4289
#>
#> Random effects covariance matrix:
#> StdDev
#> (Intercept) 0.1314838
#>
#> Fixed effects:
#> Estimate Std.Err z-value p-value
#> (Intercept) -0.3308 0.3136 -1.0547 0.291551
#> mhhx1 -0.9077 0.5221 -1.7387 0.082086
#> gender1 -0.3253 0.4960 -0.6559 0.511905
#> exposure1 -0.0532 0.4749 -0.1121 0.910769
#>
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#>
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE
#Extract paramaters from refitted model
icc(newfit)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.005
#> Conditional ICC: 0.005
quantile(results, c(0.025, 0.975))
#> 2.5% 97.5%
#> 0.005227451 0.005227451