Multiple Imputation for Regression in R
Arndt Regorz, Dipl. Kfm. & M.Sc. Psychology, 11/20/2022
Here is the R code for the Youtube tutorial about multiple imputation for regression analysis in R.
# install.packages("mice")
library(mice)
# Using the dataset nhanes2 from the mice package
?nhanes2
summary(nhanes2)
str(nhanes2) #important: factor variables defined as factors!
md.pattern(nhanes2)
# Ordinary regression with listwise exclusion of missing data
reg.fit <- lm(chl ~ bmi + hyp, data=nhanes2)
summary(reg.fit)
# Multiple Imputation with the mice package
#=========================================
# 1. Imputation
imp.data <- mice (data = nhanes2, m = 50, maxit = 10, seed = 12345, print=FALSE)
# Which methods were used?
imp.data
#====================================================
# (If you want to look at the imputed datasets:
imp.datasets <- complete(imp.data, "long")
imp.datasets
# In general, I don't do this)
# (If we wanted to change the method
methods(mice)
my.method <- c("", "pmm", "logreg", "norm")
imp.data2 <- mice (data = nhanes2, m = 50, maxit = 10, seed = 12345,
method = my.method, print=FALSE)
imp.data2
# But we use the default values from above for the rest of the tutorial)
#====================================================
# Checking if convergence was achieved
plot(imp.data)
# Checking if imputed data has plausible values
stripplot(imp.data)
#(for the changed estimation method:)
stripplot(imp.data2)
# 2. Regression for each imputed dataset
reg.fit.mi <- with(imp.data, lm(chl ~ bmi + hyp))
reg.fit.mi
# Regression result for e.g. 3rd imputation
summary(reg.fit.mi$analyses[[3]])
# 3. Pooling the results
pool.fit <- pool(reg.fit.mi)
summary(pool.fit)
# Technical information:
pool.fit
?mipo
#R2
pool.r2 <- pool.r.squared(reg.fit.mi)
pool.r2
# Comparison with listwise exclusion (again)
summary(reg.fit)