Bootstrap Mediation Analyses

Below is the R code for conducting mediation analyses using the bootstrap method detailed in Preacher and Hayes (2004). Please note that my function uses the bias corrected and accelerated (BCa) confidence intervals, whereas their SPSS and SAS macros use the bias corrected (BC) confidence intervals. However, they should yield similar results.

Note

This function is redundant and remains posted for archival purposes only.

Readers should refer to Andrew Hayes’ R version of the PROCESS macro. Alternatively, I recommend Keon-Woong Moon’s processR package, which although requires specification of models using lavaan, you can use its Shiny app to run your analyses and get automatically generated statistical diagrams and formatted tables.

The med() function takes the following arguments:

iv = independent variable (X). Use: iv = "iv“
dv = dependent variable (Y). Use: dv = "dv"
mv = mediating variable (M). Use: mv = "mv"
cv = covariate(s) if any. Use: cv = "cv.1" for single covariate or cv = c("cv.1", "cv.2", ..., "cv.n") for multiple covariates.
dec = number of decimal places (default = 4)
rep = number of bootstrap replications (default = 5000)

Function

# Load required libraries ----
if (!require(pacman)) install.packages("pacman")

pacman::p_load(boot, car, MASS, QuantPsych)

#---- Bootstrap Mediation Function ----

boot.med <- function(data, formula.m, formula.y, iv, mv, i) {
  data.resamp <- data[i, ]
  reg.m <- lm(formula.m, data = data.resamp)
  reg.y <- lm(formula.y, data = data.resamp)
  
  # Make sure that you pull the correct coefficients from each regression equation
  # With interactions, quadratics, etc, R may reorder the equation so you need to check it.
  
  # The a path coefficient
  a <- coefficients(reg.m)[iv] 
  
  # The b path coefficient
  b <- coefficients(reg.y)[mv] 
  
  # The indirect effect (product of a*b)
  ind <- a * b 
  
  #  The direct effect (c')
  dir <- coefficients(reg.y)[iv]
  
  # Total effect
  total <- ind + dir 
  
  # Pass to bootstrap function
  boot.med <- return(c(ind, dir, total))
}

#---- Mediation Analyses ----
#
# data = data frame
# iv = independent variable (X)
# dv = dependent variable (Y)
# mv = mediating variable (M)
# cv = covariate(s) if any. Use: cv = c("var.1", "var.2", "var.3", ..., "var.n")
# reps = number of bootstrap replications (default = 1000)
#

med <- function(data, iv, dv, mv, cv, dec = 4, reps = 5000) {
  
  # No covariates specified
  if (missing(cv)) {
      
    iv.formula <- as.formula(paste(mv, paste("~"), paste(iv)))
    mv.formula <- as.formula(paste(dv, paste("~"), paste(mv)))
      dv.formula <- as.formula(paste(dv, paste("~"), paste(iv)))
      dvmv.formula <- as.formula(
        paste(dv, paste("~"), paste(iv), paste("+"), paste(mv))
      )
  
    } else {
    
      iv.formula <- as.formula(
      paste(mv, paste("~"), paste(iv), paste("+"), paste(cv, collapse = "+"))
    )
    
    mv.formula <- as.formula(
      paste(dv, paste("~"), paste(mv), paste("+"), paste(cv, collapse = "+"))
    )
    
    dv.formula <- as.formula(
      paste(dv, paste("~"), paste(iv), paste("+"), paste(cv, collapse = "+"))
    )
    
    dvmv.formula <- as.formula(
      paste(dv, paste("~"), paste(iv), paste("+"), paste(mv), paste("+"), paste(cv, collapse = "+"))
    )
  }
  
  ## Regressions ----

  # IV on MV
  iv.out <- lm(iv.formula, data = data)
  
  # Coefficients
  iv.coef <- coef(iv.out)
  
  # Standard Errors
  iv.se <- summary(iv.out)$coefficients[, 2]
  
  # p-values
  iv.pr <- 2 * pt(
    abs(iv.coef / iv.se), 
    df = nrow(iv.out$model) - 2, 
    lower.tail = FALSE
  )
  
  # Confidence Intervals
  iv.ll <- iv.coef - 1.96 * iv.se
  iv.ul <- iv.coef + 1.96 * iv.se
  
  # Standardised Betas
  iv.beta <- lm.beta(iv.out)
  iv.beta <- cbind(NA, iv.beta)
  iv.beta <- rbind(NA, iv.beta)[, 2]
  
  # R-squared
  iv.rsquared <- summary(iv.out)$r.squared
  
  # p-value for R-squared
  iv.rsquared.pr <- pf(
    summary(iv.out)$fstatistic[1], 
    summary(iv.out)$fstatistic[2],
    summary(iv.out)$fstatistic[3],
    lower.tail = F
  )
  
  # Output Table
  iv.table <- cbind(
    "Estimate" = round(iv.coef, dec),
    "SE" = round(iv.se, dec),
    "Pr(>|t|)" = round(
      2 * pt(
            abs(iv.coef / iv.se),
            df = nrow(iv.out$model) - 2,
            lower.tail = FALSE
          ), 
      dec
      ),
    "LL" = round(iv.coef - 1.96 * iv.se, dec),
    "UL" = round(iv.coef + 1.96 * iv.se, dec),
    "Beta" = round(iv.beta, dec)
  )

  ### MV on DV ----
  mv.out <- lm(mv.formula, data = data)
  
  # Coefficients
  mv.coef <- coef(mv.out)
  
  # Standard Errors
  mv.se <- summary(mv.out)$coefficients[, 2]
  
  # p-values
  mv.pr <- 2 * pt(
    abs(mv.coef / mv.se), 
    df = nrow(mv.out$model) - 2, 
    lower.tail = FALSE
  )
  
  # Confidence intervals
  mv.ll <- mv.coef - 1.96 * mv.se
  mv.ul <- mv.coef + 1.96 * mv.se
  
  # Standardised Betas
  mv.beta <- lm.beta(mv.out)
  mv.beta <- cbind(NA, mv.beta)
  mv.beta <- rbind(NA, mv.beta)[, 2]
  
  # R-squared
  mv.rsquared <- summary(mv.out)$r.squared
  
  # p-value for R-squared
  mv.rsquared.pr <- pf(
    summary(mv.out)$fstatistic[1], 
    summary(mv.out)$fstatistic[2],
    summary(mv.out)$fstatistic[3],
    lower.tail = F
  )
  
  # Output Table
  mv.table <- cbind(
    "Estimate" = round(mv.coef, dec),
    "SE" = round(mv.se, dec),
    "Pr(>|t|)" = round(2 * pt(abs(mv.coef / mv.se),
      df = nrow(mv.out$model) - 2,
      lower.tail = FALSE
    ), dec),
    "LL" = round(mv.coef - 1.96 * mv.se, dec),
    "UL" = round(mv.coef + 1.96 * mv.se, dec),
    "Beta" = round(mv.beta, dec)
  )

  ### IV on DV ----
  dv.out <- lm(dv.formula, data = data)
  
  # Coefficients
  dv.coef <- coef(dv.out)
  
  # Standard Errors
  dv.se <- summary(dv.out)$coefficients[, 2] # SEs
  dv.pr <- 2 * pt(abs(dv.coef / dv.se), df = nrow(dv.out$model) - 2, lower.tail = FALSE) # p-values
  
  # Confidence Intervals
  dv.ll <- dv.coef - 1.96 * dv.se
  dv.ul <- dv.coef + 1.96 * dv.se
  
  # Standardised Betas
  dv.beta <- lm.beta(dv.out)
  dv.beta <- cbind(NA, dv.beta)
  dv.beta <- rbind(NA, dv.beta)[, 2]
  
  # R-squared
  dv.rsquared <- summary(dv.out)$r.squared # R2
  
  # p-value for R-squared
  dv.rsquared.pr <- pf(
    summary(dv.out)$fstatistic[1], 
    summary(dv.out)$fstatistic[2],
    summary(dv.out)$fstatistic[3],
    lower.tail = F
  )
  
  # Output Table
  dv.table <- cbind(
    "Estimate" = round(dv.coef, dec),
    "SE" = round(dv.se, dec),
    "Pr(>|t|)" = round(2 * pt(abs(dv.coef / dv.se),
      df = nrow(dv.out$model) - 2,
      lower.tail = FALSE
    ), dec),
    "LL" = round(dv.coef - 1.96 * dv.se, dec),
    "UL" = round(dv.coef + 1.96 * dv.se, dec),
    "Beta" = round(dv.beta, dec)
  )

  ### IV and MV on DV ----
  
  # Coefficients
  dvmv.out <- lm(dvmv.formula, data = data)
  dvmv.coef <- coef(dvmv.out)
  
  # Standard Errors
  dvmv.se <- summary(dvmv.out)$coefficients[, 2]
  dvmv.pr <- 2 * pt(
    abs(dvmv.coef / dvmv.se), 
    df = nrow(dvmv.out$model) - 2, 
    lower.tail = FALSE)
  
  # Confidence Intervales
  dvmv.ll <- dvmv.coef - 1.96 * dvmv.se
  dvmv.ul <- dvmv.coef + 1.96 * dvmv.se
  
  # Standardised Betas
  dvmv.beta <- lm.beta(dvmv.out)
  dvmv.beta <- cbind(NA, dvmv.beta)
  dvmv.beta <- rbind(NA, dvmv.beta)[, 2]
  
  # R-squared
  dvmv.rsquared <- summary(dvmv.out)$r.squared
  
  # p-value for R-squared
  dvmv.rsquared.pr <- pf(
    summary(dvmv.out)$fstatistic[1], 
    summary(dvmv.out)$fstatistic[2],
    summary(dvmv.out)$fstatistic[3],
    lower.tail = F
  )
  
  # Output Table
  dvmv.table <- cbind(
    "Estimate" = round(dvmv.coef, dec),
    "SE" = round(dvmv.se, dec),
    "Pr(>|t|)" = round(
      2 * pt(
        abs(dvmv.coef / dvmv.se),
        df = nrow(data) - 2,
        lower.tail = FALSE
        ), 
    dec),
    "LL" = round(dvmv.coef - 1.96 * dvmv.se, dec),
    "UL" = round(dvmv.coef + 1.96 * dvmv.se, dec),
    "Beta" = round(dvmv.beta, dec)
  )
  
  ## Bootstrap function ----
  
  medboot.out <- boot(
    statistic = boot.med,
    data = data,
    formula.m = iv.formula,
    formula.y = dvmv.formula,
    iv = iv,
    mv = mv,
    R = reps,
    parallel = "multicore"
  )
  
  # Indirect effect
  indirect.b <- round(summary(medboot.out)$original[1], dec) # Indirect B
  indirect.se <- round(summary(medboot.out)$bootSE[1], dec) # Indirect SE
  indirect.bca <- boot.ci(medboot.out, type = "bca", index = 1) # Indirect BCA
  indirect.bca.l <- round(indirect.bca$bca[4], dec) # Extract lower BCa CI
  indirect.bca.u <- round(indirect.bca$bca[5], dec) # Extract upper BCa CI
  
  # Direct effect
  direct.b <- round(summary(medboot.out)$original[2], dec) # Direct B
  direct.se <- round(summary(medboot.out)$bootSE[2], dec) # Direct SE
  direct.bca <- boot.ci(medboot.out, type = "bca", index = 2) # indirect
  direct.bca.l <- round(direct.bca$bca[4], dec) # Extract lower BCa CI
  direct.bca.u <- round(direct.bca$bca[5], dec) # Extract upper BCa CI
  
  # Total effects
  total.b <- round(summary(medboot.out)$original[3], dec) # Direct B
  total.se <- round(summary(medboot.out)$bootSE[3], dec) # Direct SE
  total.bca <- boot.ci(medboot.out, type = "bca", index = 3) # Total Effect
  total.bca.l <- round(total.bca$bca[4], dec) # Extract lower BCa CI
  total.bca.u <- round(total.bca$bca[5], dec) # Extract upper BCa CI
  
  
  cat(
    paste(
      "\n",
      "Independent Variable: ", iv,
      "\n", "Dependent Variable: ", dv,
      "\n", "Mediator Variable: ", mv,
      "\n",
      "\n",
      "IV (and CVs) on MV: Path a",
      "\n",
      sep = ""
    )
  )

  print(iv.table)

  cat(
    paste(
      "\n",
      "R-Squared: ", round(iv.rsquared, dec), ", p-value: ", round(iv.rsquared.pr, dec),
      "\n",
      sep = ""
    )
  )

  cat(paste("\n",
    "\n",
    "MV (and CVs) on DV: Path b",
    "\n",
    sep = ""
  ))
  
  print(mv.table)

  cat(
    paste(
      "\n",
      "R-Squared: ", round(mv.rsquared, dec), ", p-value: ", round(mv.rsquared.pr, dec),
      "\n",
      sep = ""
    )
  )

  cat(
    paste(
      "\n",
      "\n",
      "IV (and CVs) on DV: Path c",
      "\n",
      sep = ""
    )
  )
  
  print(dv.table)
  
  cat(
    paste(
      "\n",
      "R-Squared: ", round(dv.rsquared, dec), ", p-value: ", round(dv.rsquared.pr, dec),
      sep = ""
    )
  )

  cat(
    paste(
      "\n",
      "\n",
      "IV and MV (and CVs) on DV",
      "\n",
      sep = ""
    )
  )
  
  print(dvmv.table)
  
  cat(
    paste(
      "\n",
      "R-Squared: ", round(dvmv.rsquared, dec), ", p-value: ", round(dvmv.rsquared.pr, dec),
      "\n",
      sep = ""
    )
  )
  
  cat(
    "\n",
    "============================================", "\n",
    "Bootstrap Mediation Effects with 95% BCa CIs", "\n",
    "============================================", "\n",
    "\n",
    "Indirect effect", "\n",
    "---------------", "\n",
    paste("B = ", indirect.b, ", SE = ", indirect.se, ", 95% CI [", indirect.bca.l, ", ", indirect.bca.u, "]", sep = ""),
    "\n", 
    "\n",
    "Direct effect", "\n",
    "-------------", "\n",
    paste("B = ", direct.b, ", SE = ", direct.se, ", 95% CI [", direct.bca.l, ", ", direct.bca.u, "]", sep = ""),
    "\n", 
    "\n",
    "Total effect", "\n",
    "-------------", "\n",
    paste("B = ", total.b, ", SE = ", total.se, ", 95% CI [", total.bca.l, ", ", total.bca.u, "]", sep = ""),
    "\n",
    sep = ""
  )
}

Example

Assessing whether body image inflexibility (biaaq.total) mediates the relationship between negative body appearance evaluation (mbsrq.ae) and avoidance behaviours (bicsi.avoid) while controlling for age (age), gender (gender), and body image investment (asir.comp).

med(
	data = body_image, 
	dv = "bicsi.avoid", 
	iv = "mbsrq.ae", 
	mv = "biaaq.total", 
	cv = c("gender", "asir.comp"), 
	dec = 3, 
	reps = 5000
)

Independent Variable: mbsrq.ae
Dependent Variable: bicsi.avoid
Mediator Variable: biaaq.total

IV (and CVs) on MV
            Estimate    SE Pr(>|t|)      LL      UL   Beta
(Intercept)   79.286 6.112        0  67.306  91.265     NA
mbsrq.ae       6.926 0.890        0   5.181   8.671  0.394
genderMale    -6.301 1.774        0  -9.779  -2.824 -0.171
asir.comp    -12.881 1.277        0 -15.384 -10.378 -0.518

R-Squared: 0.547, p-value: 0

MV (and CVs) on DV
            Estimate    SE Pr(>|t|)     LL     UL   Beta
(Intercept)    2.761 0.268    0.000  2.236  3.286     NA
biaaq.total   -0.021 0.002    0.000 -0.025 -0.017 -0.737
genderMale    -0.272 0.059    0.000 -0.388 -0.157 -0.256
asir.comp     -0.173 0.051    0.001 -0.273 -0.073 -0.241

R-Squared: 0.417, p-value: 0

IV (and CVs) on DV
            Estimate    SE Pr(>|t|)     LL     UL   Beta
(Intercept)    1.230 0.233    0.000  0.773  1.688     NA
mbsrq.ae      -0.181 0.034    0.000 -0.248 -0.114 -0.357
genderMale    -0.133 0.068    0.051 -0.266  0.000 -0.125
asir.comp      0.084 0.049    0.086 -0.011  0.180  0.117

R-Squared: 0.208, p-value: 0

IV and MV (and CVs) on DV
            Estimate    SE Pr(>|t|)     LL     UL   Beta
(Intercept)    2.806 0.269    0.000  2.278  3.334     NA
mbsrq.ae      -0.043 0.033    0.191 -0.108  0.022 -0.086
biaaq.total   -0.020 0.002    0.000 -0.024 -0.015 -0.688
genderMale    -0.258 0.060    0.000 -0.375 -0.141 -0.243
asir.comp     -0.172 0.051    0.001 -0.272 -0.072 -0.239

R-Squared: 0.422, p-value: 0

============================================
Bootstrap Mediation Effects with 95% BCa CIs
============================================

Indirect effect
---------------
B = -0.138, SE = 0.024, 95% CI [-0.194, -0.098]

Direct effect
-------------
B = -0.043, SE = 0.038, 95% CI [-0.126, 0.025]

Total effect
-------------
B = -0.181, SE = 0.038, 95% CI [-0.259, -0.108]

Write up of results in APA format

Controlling for the gender, age, and body image investment, the mediation analysis showed that appearance evaluation had a significant total effect on appearance-fixing (B = -0.18, 95% CI [-0.26, -0.11], p < .05), a non-significant direct effect (B = -0.04, 95% CI [-0.13, 0.03], p = n.s.), and a significant indirect effect (B = -0.14, 95% CI [-0.19, -0.10], p < .05). This pattern of results suggests that body image inflexibility fully mediates the relationship between appearance evaluation and avoidance behaviours.

Reference

Preacher, K.J., & Hayes, A.F. (2004). SPSS and SAS procedures for estimating indirect effects in simple Mediation models. Behavior Research Methods, Instruments, & Computers, 36, 717-731.