# Define function
sim_covars <- function(n = 20, diff = 0, sd_y = 1,
rho_covars = c(0.7, 0.3, 0)) {
# Parameters
# n <- 20 # observations per group
# diff <- 0 # difference between groups
# sd_y <- 3 # within-group sd of outcome (population-level)
# rho_covars <- c(0.7, 0.3, 0) # correlation of control variables with outcome
# in control group; add more coefficients to
# include more covariates
# In this simulation, the control variables are *not* intercorrelated.
# Construct variance-covariance matrix
k_covars <- length(rho_covars)
sds <- c(sd_y, rep(1, k_covars))
cor_matrix <- matrix(data = 0, nrow = 1 + k_covars, ncol = 1 + k_covars)
diag(cor_matrix) <- 1
cor_matrix[1, ] <- c(1, rho_covars)
cor_matrix[, 1] <- c(1, rho_covars)
cov_matrix <- cor_matrix * tcrossprod(sds)
# Generate data
d <- MASS::mvrnorm(n = 2*n, mu = rep(0, 1 + k_covars),
Sigma = cov_matrix)
d <- as.data.frame(d)
d$V1[1:n] <- d$V1[1:n] + diff
d$Condition <- rep(c("experimental", "control"), each = n)
# Fit model with all covariates
m1 <- lm(V1 ~ ., data = d)
# Fit model without covariates
m2 <- lm(V1 ~ Condition, data = d)
# Results
results <- list(
estimate_covar = unname(coef(m1)[2 + k_covars]),
se_covar = summary(m1)$coefficients[2 + k_covars, 2],
ci_lo_covar = confint(m1)[2 + k_covars, 1],
ci_hi_covar = confint(m1)[2 + k_covars, 2],
estimate_nocovar = unname(coef(m2)[2]),
se_nocovar = summary(m2)$coefficients[2, 2],
ci_lo_nocovar = confint(m2)[2, 1],
ci_hi_nocovar = confint(m2)[2, 2]
)
results
}
# Run function 10k times
sim_results <- matrix(ncol = 8, nrow = 1e4)
for (i in 1:1e4) {
sim_results[i, ] <- unlist(sim_covars(n = 8, rho_covars = c(0.5, 0.2, 0)))
}
# Let's compare the widths of the 95% CIs
ci_width_covar <- sim_results[, 4] - sim_results[, 3]
ci_width_nocovar <- sim_results[, 8] - sim_results[, 7]
difference_width <- ci_width_nocovar - ci_width_covar
hist(difference_width)

summary(difference_width)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.24478 -0.21508 0.03602 0.04967 0.31315 2.16452
# On average, slightly wider CI width when ignoring covariates,
# so not harmful.
# Now with two useless covariates and one with hardly any use
for (i in 1:1e4) {
sim_results[i, ] <- unlist(sim_covars(n = 8, rho_covars = c(0.1, 0, 0)))
}
ci_width_covar <- sim_results[, 4] - sim_results[, 3]
ci_width_nocovar <- sim_results[, 8] - sim_results[, 7]
difference_width <- ci_width_nocovar - ci_width_covar
hist(difference_width)

summary(difference_width)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.8266 -0.4990 -0.2935 -0.3184 -0.1013 1.0229
# Now narrower CIs when ignoring covariates, so useless covariates are harmful.
# Upping n a bit.
for (i in 1:1e4) {
sim_results[i, ] <- unlist(sim_covars(n = 15, rho_covars = c(0.1, 0, 0)))
}
ci_width_covar <- sim_results[, 4] - sim_results[, 3]
ci_width_nocovar <- sim_results[, 8] - sim_results[, 7]
difference_width <- ci_width_nocovar - ci_width_covar
hist(difference_width)

summary(difference_width)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.80265 -0.14614 -0.08467 -0.08732 -0.02564 0.43218
# Same tendency, if less pronounced.
# Upping rho a bit for one variable.
for (i in 1:1e4) {
sim_results[i, ] <- unlist(sim_covars(n = 15, rho_covars = c(0.35, 0, 0)))
}
ci_width_covar <- sim_results[, 4] - sim_results[, 3]
ci_width_nocovar <- sim_results[, 8] - sim_results[, 7]
difference_width <- ci_width_nocovar - ci_width_covar
hist(difference_width)

summary(difference_width)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.713681 -0.079998 -0.002944 0.008030 0.090197 0.774731
# Now pretty much no harm, no benefit either.