This is an update to my previous post and runs the PROCESS Model 1 without needing to use structural equation modelling.

Function

Below is a function for which you specify the YX, and M (and any covariates) together with a contrast matrix and will calculate the interaction terms.

# Load required packages --------------------------------------------------
if (!require(pacman)) {install.packages("pacman")}
pacman::p_load(boot, lmtest, sandwich)

# PROCESS Model 1 ---------------------------------------------------------
processMod1 = function(Y, X, M,
                       contrMat,
                       X.center = FALSE,
                       boot = FALSE,
                       cv = NULL,
                       data,
                       rob.est = FALSE) {

  # Create temporary data.frame and omit missing values
  dataTemp = na.omit(data[, c(Y, X, M, cv)])

  # Center X if needed
  if(X.center == TRUE) {

    dataTemp[, paste0(X, ".c")] = dataTemp[, X] - mean(dataTemp[, X])

  } else {

    dataTemp[, paste0(X, ".c")] = dataTemp[, X]
  }

  # Code Dk-1 variables based on the contrMat
  for(i in 1:nrow(contrMat)) {

    for(j in 1:ncol(contrMat)) {

      dataTemp[dataTemp[, M] == levels(dataTemp[, M])[i], paste0(M, ".", j)] = contrMat[i, j]                                          

     # Create interaction terms
     dataTemp[, paste0("int.", j)] = dataTemp[, paste0(X, ".c")] * dataTemp[, paste0(M, ".", j)]
    }

  }

  MNames = paste0(M, ".", seq(1, ncol(contrMat), by = 1))
  intNames = paste0("int.", seq(1, ncol(contrMat), by = 1))

  # Create formula without interaction terms
  formula1 = paste0(Y, "~", paste0(X, ".c"), "+",
                    paste0(c(MNames, cv), collapse = "+"))

  # Fit model without interaction terms
  fitLM1 = do.call("lm", list(formula = as.formula(formula1),
                              data = dataTemp))

  # Fit model with interaction terms

  fitLM2 = do.call("update", list(object = fitLM1,
                                  formula = paste("~ . +",
                                                  paste0(intNames, collapse = "+"))))

  # Get summary statistics for the two models
  sumLM1 = summary(fitLM1)
  sumLM2 = summary(fitLM2)

  # Shows output if requested
  if (boot == FALSE) {

    cat("\n",
        "\n", "Regression output:",
        "\n", sep = "")

    # Compare R-2 changes (R^2 does not change if using robust SEs)
    r2change = sumLM2$r.squared - sumLM1$r.squared

    # Use heteroskedasticity-consistent standard errors?
    est = ifelse(rob.est == FALSE, "const", "HC3")

    # Return regression table
    print(coeftest(fitLM2, vcov. = vcovHC(fitLM2, type = est)))

    # Run ANOVA for the model with interactions
    sumLM2 = data.frame(waldtest(fitLM2, vcov = vcovHC(fitLM2, type = est)))

    # Get F-values for Model 2
    Fdf1 = abs(sumLM2[2, 2])
    Fdf2 = sumLM2[2, 1]
    Fval = sumLM2[2, 3]
    Fpval = sumLM2[2, 4]

    # Compare models 1 and 2 (R^2 change)
    compLM12 = waldtest(fitLM1, fitLM2, vcov = function(x) {vcovHC(x, type = est)})  

    # Give nicely formatted output
    cat("ANOVA of Model with interaction term: ",
        "F(", round(Fdf1, 1), ", ",
        round(Fdf2, 1),") = ",
        round(Fval, 2)," p = ",
        Fpval,
        "\n",
        "\n", "R-squared change due to interaction: ", round(r2change, 4),
        ", F(", compLM12$Df[2], ", ", compLM12$Res.Df[2], ") = ", round(compLM12$F[2], 2), " p = " , compLM12$`Pr(>F)`[2],
        sep = "")
    } 

  # Get coefficients
  coeffs = coef(fitLM2)

  # Create an empty vector to store b values
  bValues = vector()

  # b1
  bValues[1] = as.numeric(coeffs[which(names(coeffs) == paste0(X, ".c"))])
  names(bValues)[1] = "b1"

  # b(k+2) to b(2k-1)
  k = ncol(contrMat) + 2

  for (i in 1:ncol(contrMat)) {

    # Store in bValues vector
    bValues[i + 1] = as.numeric(coeffs[which(names(coeffs) == intNames[i])])

    # Assign coefficient name
    names(bValues)[i + 1] = paste0("b", k)

    # Increase counter
    k = k + i
  }

  # Conditional effects

  # Create empty vector
  outPut = vector()

  # Give names to vector
  # names(outPut) = levels(dataTemp[, M])

  # Loop through columns
  for (i in 1:nrow(contrMat)) {

    # b1 is common to all conditional effects
    outPut[i] = bValues[1]

    # Loop through rows
    for (j in 1:ncol(contrMat)) {

      # Get conditional effects
      outPut[i] = outPut[i] + (bValues[j + 1] * contrMat[i, j])
    }

  }

  names(outPut) = levels(dataTemp[, M])

  if (boot == TRUE) {

    return(outPut)

  }
}

Contrast Matrix

We need to specify a contrast matrix based on the type of variable coding we wish to use.

For indicator (or dummy coding) we simply follow the conventions shown in Table 1 of the previous post. The following code gives the desired contrast matrix:

contrastInd <- matrix(c(0, 0,
                        1, 0,
                        0, 1),
                      ncol = 2,     # k - 1
                      nrow = 3,     # k
                      byrow = TRUE)

Which gives us the following matrix:

    [,1] [,2]
[1,]   0    0
[2,]   1    0
[3,]   0    1

Before we run the example, we need to reorder the factor levels for our moderator (school) variable to State, Independent, and Catholic. Otherwise, the levels default to alphabetical order.

dataSim$school = factor(dataSim$school,
                        levels = c("State",
                                   "Independent",
                                   "Catholic"))

PROCESS Model 1 (without conditional effects)

Now let’s run the model without bootstrapping the conditional effects. We also specify that we want to center the X variable and return robust HC3-based standard errors.

processMod1(Y = "X2",
            X = "X1",
            M = "school",
            X.center = TRUE,        # Center the X variable
            rob.est = TRUE,         # Use HC3 standard errors
            contrMat = contrastInd, # The contrast matrix
            cv = NULL,              # No covariates used
            data = dataSim)

You can see from the output that we get both the regression table, an overall ANOVA for the model with the interaction term(s), and the R2 change and associated F-test.

Regression output:

t test of coefficients:

             Estimate Std. Error  t value   Pr(>|t|)
(Intercept)  0.848891   0.067878  12.5062    2.2e-16 ***
X1.c         0.374854   0.062327   6.0143  2.632e-09 ***
school.1    -0.937244   0.088990 -10.5320    2.2e-16 ***
school.2    -0.914904   0.087824 -10.4175    2.2e-16 ***
int.1       -0.890031   0.082863 -10.7410    2.2e-16 ***
int.2       -0.554198   0.077864  -7.1175  2.256e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

ANOVA of Model with interaction term: F(5, 899) = 79.14 p = 8.87079e-69

R-squared change due to interaction: 0.09, F(2, 894) = 58.02 p = 2.042157e-24

PROCESS Model 1 Conditional Effects

To get the conditional effects with bootstrapped confidence intervals we will need to create a bootstrap function and run the bootstrapping with 5000 resamples.

# Create bootstrapped function
bootModel1 = function(Y, X, M, cv, X.center, contrMat, data, i) {

  dataResamp = data[i, ]

  bootCondEffects = processMod1(Y = Y,
                                X = X,
                                M = M,
                                X.center = X.center,
                                boot = TRUE,
                                cv = cv,
                                contrMat = contrMat,
                                data = dataResamp)
}

# Set seed to replicate results
set.seed(12345)

# Call boot function (might take some time)
bootOut = boot(statistic = bootModel1,
               Y = "X2",
               X = "X1",
               M = "school",
               X.center = TRUE,
               cv = NULL,
               contrMat = contrastInd,
               data = dataSim,
               R = 5000)

# Create data.frame for booOut output
coeff = bootOut$t0
se = vector()
bca.lower = vector()
bca.upper = vector()

for(i in 1:length(coeff)) {

  # Standard Error
  se[i] = sd(bootOut$t[, i])

  # Confidence intervals (BCa)
  ci = boot.ci(bootOut, index = i, type = "bca")
  bca.lower[i] = ci$bca[4]
  bca.upper[i] = ci$bca[5]

}

# Get p-values for the conditional effects
p.val = vector()

for(i in 1:length(coeff)) {

  p1 = sum(bootOut$t[, i] > 0)/length(bootOut$t[, i])
  p2 = sum(bootOut$t[, i] < 0)/length(bootOut$t[, i])

  p.val[i] = min(p1, p2)*2

}

# Create data.frame
ciOut = data.frame(coeff,
                   se,
                   z = coeff/se,
                   p.val,
                   bca.lower, bca.upper)

# Print results
ciOut

This gives us the following output:

                 coeff         se          z  p.val  bca.lower   bca.upper
State        0.3748535 0.05550332   6.753713 0.0000  0.2626235  0.48291335
Independent -0.5151777 0.04813387 -10.703018 0.0000 -0.6072532 -0.41835225
Catholic    -0.1793441 0.05674241  -3.160672 0.0016 -0.2974390 -0.07526235

It is quite similar to the relevant output from lavaan, though the function above uses BCa confidence intervals and take much less time to run.

           lhs op   rhs       label    est    se       z pvalue ci.lower ci.upper
22       state :=    b1       state  0.375 0.055   6.829  0.000    0.268    0.480
23 independent := b1+b4 independent -0.515 0.049 -10.518  0.000   -0.612   -0.418
24    catholic := b1+b5    catholic -0.179 0.057  -3.155  0.002   -0.293   -0.067
Category:
Bootstrapping, Moderation Analyses, Regression, Uncategorized
Tags:
, , ,

Join the conversation! 1 Comment

  1. […] The above code should look familiar because I used it in the PROCESS Model 1 function described in the previous post. […]

    Like

    Reply

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: