# Moderation with a multicategorical moderating variable part II

UPDATE: Please see the following post for an all-in-one solution: RePROCESS Model 1

This is an update to my previous post and runs the PROCESS Model 1 without needing to use structural equation modelling.

# Function

Below is a function for which you specify the YX, and M (and any covariates) together with a contrast matrix and will calculate the interaction terms.

```# Load required packages --------------------------------------------------
if (!require(pacman)) {install.packages("pacman")}

# PROCESS Model 1 ---------------------------------------------------------
processMod1 = function(Y, X, M,
contrMat,
X.center = FALSE,
boot = FALSE,
cv = NULL,
data,
rob.est = FALSE) {

# Create temporary data.frame and omit missing values
dataTemp = na.omit(data[, c(Y, X, M, cv)])

# Center X if needed
if(X.center == TRUE) {

dataTemp[, paste0(X, ".c")] = dataTemp[, X] - mean(dataTemp[, X])

} else {

dataTemp[, paste0(X, ".c")] = dataTemp[, X]
}

# Code Dk-1 variables based on the contrMat
for(i in 1:nrow(contrMat)) {

for(j in 1:ncol(contrMat)) {

dataTemp[dataTemp[, M] == levels(dataTemp[, M])[i], paste0(M, ".", j)] = contrMat[i, j]

# Create interaction terms
dataTemp[, paste0("int.", j)] = dataTemp[, paste0(X, ".c")] * dataTemp[, paste0(M, ".", j)]
}

}

MNames = paste0(M, ".", seq(1, ncol(contrMat), by = 1))
intNames = paste0("int.", seq(1, ncol(contrMat), by = 1))

# Create formula without interaction terms
formula1 = paste0(Y, "~", paste0(X, ".c"), "+",
paste0(c(MNames, cv), collapse = "+"))

# Fit model without interaction terms
fitLM1 = do.call("lm", list(formula = as.formula(formula1),
data = dataTemp))

# Fit model with interaction terms

fitLM2 = do.call("update", list(object = fitLM1,
formula = paste("~ . +",
paste0(intNames, collapse = "+"))))

# Get summary statistics for the two models
sumLM1 = summary(fitLM1)
sumLM2 = summary(fitLM2)

# Shows output if requested
if (boot == FALSE) {

cat("\n",
"\n", "Regression output:",
"\n", sep = "")

# Compare R-2 changes (R^2 does not change if using robust SEs)
r2change = sumLM2\$r.squared - sumLM1\$r.squared

# Use heteroskedasticity-consistent standard errors?
est = ifelse(rob.est == FALSE, "const", "HC3")

# Return regression table
print(coeftest(fitLM2, vcov. = vcovHC(fitLM2, type = est)))

# Run ANOVA for the model with interactions
sumLM2 = data.frame(waldtest(fitLM2, vcov = vcovHC(fitLM2, type = est)))

# Get F-values for Model 2
Fdf1 = abs(sumLM2[2, 2])
Fdf2 = sumLM2[2, 1]
Fval = sumLM2[2, 3]
Fpval = sumLM2[2, 4]

# Compare models 1 and 2 (R^2 change)
compLM12 = waldtest(fitLM1, fitLM2, vcov = function(x) {vcovHC(x, type = est)})

# Give nicely formatted output
cat("ANOVA of Model with interaction term: ",
"F(", round(Fdf1, 1), ", ",
round(Fdf2, 1),") = ",
round(Fval, 2)," p = ",
Fpval,
"\n",
"\n", "R-squared change due to interaction: ", round(r2change, 4),
", F(", compLM12\$Df[2], ", ", compLM12\$Res.Df[2], ") = ", round(compLM12\$F[2], 2), " p = " , compLM12\$`Pr(>F)`[2],
sep = "")
}

# Get coefficients
coeffs = coef(fitLM2)

# Create an empty vector to store b values
bValues = vector()

# b1
bValues[1] = as.numeric(coeffs[which(names(coeffs) == paste0(X, ".c"))])
names(bValues)[1] = "b1"

# b(k+2) to b(2k-1)
k = ncol(contrMat) + 2

for (i in 1:ncol(contrMat)) {

# Store in bValues vector
bValues[i + 1] = as.numeric(coeffs[which(names(coeffs) == intNames[i])])

# Assign coefficient name
names(bValues)[i + 1] = paste0("b", k)

# Increase counter
k = k + i
}

# Conditional effects

# Create empty vector
outPut = vector()

# Give names to vector
# names(outPut) = levels(dataTemp[, M])

# Loop through columns
for (i in 1:nrow(contrMat)) {

# b1 is common to all conditional effects
outPut[i] = bValues[1]

# Loop through rows
for (j in 1:ncol(contrMat)) {

# Get conditional effects
outPut[i] = outPut[i] + (bValues[j + 1] * contrMat[i, j])
}

}

names(outPut) = levels(dataTemp[, M])

if (boot == TRUE) {

return(outPut)

}
}
```

## Contrast Matrix

We need to specify a contrast matrix based on the type of variable coding we wish to use.

For indicator (or dummy coding) we simply follow the conventions shown in Table 1 of the previous post. The following code gives the desired contrast matrix:

```contrastInd <- matrix(c(0, 0,
1, 0,
0, 1),
ncol = 2,     # k - 1
nrow = 3,     # k
byrow = TRUE)
```

Which gives us the following matrix:

```    [,1] [,2]
[1,]   0    0
[2,]   1    0
[3,]   0    1
```

Before we run the example, we need to reorder the factor levels for our moderator (`school`) variable to State, Independent, and Catholic. Otherwise, the levels default to alphabetical order.

```dataSim\$school = factor(dataSim\$school,
levels = c("State",
"Independent",
"Catholic"))
```

## PROCESS Model 1 (without conditional effects)

Now let’s run the model without bootstrapping the conditional effects. We also specify that we want to center the X variable and return robust HC3-based standard errors.

```processMod1(Y = "X2",
X = "X1",
M = "school",
X.center = TRUE,        # Center the X variable
rob.est = TRUE,         # Use HC3 standard errors
contrMat = contrastInd, # The contrast matrix
cv = NULL,              # No covariates used
data = dataSim)
```

You can see from the output that we get both the regression table, an overall ANOVA for the model with the interaction term(s), and the R2 change and associated F-test.

```Regression output:

t test of coefficients:

Estimate Std. Error  t value   Pr(>|t|)
(Intercept)  0.848891   0.067878  12.5062    2.2e-16 ***
X1.c         0.374854   0.062327   6.0143  2.632e-09 ***
school.1    -0.937244   0.088990 -10.5320    2.2e-16 ***
school.2    -0.914904   0.087824 -10.4175    2.2e-16 ***
int.1       -0.890031   0.082863 -10.7410    2.2e-16 ***
int.2       -0.554198   0.077864  -7.1175  2.256e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

ANOVA of Model with interaction term: F(5, 899) = 79.14 p = 8.87079e-69

R-squared change due to interaction: 0.09, F(2, 894) = 58.02 p = 2.042157e-24
```

## PROCESS Model 1 Conditional Effects

To get the conditional effects with bootstrapped confidence intervals we will need to create a bootstrap function and run the bootstrapping with 5000 resamples.

```# Create bootstrapped function
bootModel1 = function(Y, X, M, cv, X.center, contrMat, data, i) {

dataResamp = data[i, ]

bootCondEffects = processMod1(Y = Y,
X = X,
M = M,
X.center = X.center,
boot = TRUE,
cv = cv,
contrMat = contrMat,
data = dataResamp)
}

# Set seed to replicate results
set.seed(12345)

# Call boot function (might take some time)
bootOut = boot(statistic = bootModel1,
Y = "X2",
X = "X1",
M = "school",
X.center = TRUE,
cv = NULL,
contrMat = contrastInd,
data = dataSim,
R = 5000)

# Create data.frame for booOut output
coeff = bootOut\$t0
se = vector()
bca.lower = vector()
bca.upper = vector()

for(i in 1:length(coeff)) {

# Standard Error
se[i] = sd(bootOut\$t[, i])

# Confidence intervals (BCa)
ci = boot.ci(bootOut, index = i, type = "bca")
bca.lower[i] = ci\$bca[4]
bca.upper[i] = ci\$bca[5]

}

# Get p-values for the conditional effects
p.val = vector()

for(i in 1:length(coeff)) {

p1 = sum(bootOut\$t[, i] > 0)/length(bootOut\$t[, i])
p2 = sum(bootOut\$t[, i] < 0)/length(bootOut\$t[, i])

p.val[i] = min(p1, p2)*2

}

# Create data.frame
ciOut = data.frame(coeff,
se,
z = coeff/se,
p.val,
bca.lower, bca.upper)

# Print results
ciOut
```

This gives us the following output:

```                 coeff         se          z  p.val  bca.lower   bca.upper
State        0.3748535 0.05550332   6.753713 0.0000  0.2626235  0.48291335
Independent -0.5151777 0.04813387 -10.703018 0.0000 -0.6072532 -0.41835225
Catholic    -0.1793441 0.05674241  -3.160672 0.0016 -0.2974390 -0.07526235
```

It is quite similar to the relevant output from `lavaan`, though the function above uses BCa confidence intervals and take much less time to run.

```           lhs op   rhs       label    est    se       z pvalue ci.lower ci.upper
22       state :=    b1       state  0.375 0.055   6.829  0.000    0.268    0.480
23 independent := b1+b4 independent -0.515 0.049 -10.518  0.000   -0.612   -0.418
24    catholic := b1+b5    catholic -0.179 0.057  -3.155  0.002   -0.293   -0.067
```

## One thought on “Moderation with a multicategorical moderating variable part II”

This site uses Akismet to reduce spam. Learn how your comment data is processed.