# Functions for testing mediation developed by: # Thomas D. Fletcher # Assistant Professor # Department of Psychology # University of Missouri-St. Louis # FletcherT@umsl.edu # # # The following functions are developed for use in R # They will likely work in S+ with some minor modifications # I have not tested them in S+ # I suspect the discrepancies if any will be in the bootstrapping syntax # # # Index of functions in this document: ## proximal.med ## distal.med ## se.indirect2 ## se.indirect3 ## goodman.se.indirect2 ## aroian.se.indirect2 ## proxInd.ef ## distInd.ef ## tmp.proximal ## tmp.distal # Testing simple 'proximal' mediation X -> M -> Y # To return a 'pretty' table, use data.frame(proximal.med(DATA)) # Can't embed data.frame() in return, because it creates problems with proxInd.ef() needed for boot() proximal.med <- function (data) { # Models needed to test mediation yx.lm <- lm (y ~ x, data) # t yall.lm <- lm (y ~ m + x, data) # b tpr m1.lm <- lm (m ~ x, data) # a # individual effects a <- summary(m1.lm)$coef[2,1] se.a <- summary(m1.lm)$coef[2,2] t.a <- summary(m1.lm)$coef[2,3] b <- summary(yall.lm)$coef[2,1] se.b <- summary(yall.lm)$coef[2,2] t.b <- summary(yall.lm)$coef[2,3] t <- summary(yx.lm)$coef[2,1] se.t <- summary(yx.lm)$coef[2,2] t.t <- summary(yx.lm)$coef[2,3] tpr <- summary(yall.lm)$coef[3,1] se.tpr <- summary(yall.lm)$coef[3,2] t.tpr <- summary(yall.lm)$coef[3,3] # indirect effects ab <- a*b se.ab <- se.indirect2 (a,se.a,b,se.b) t.ab <- ab/se.ab r.ab <- ab/t ar.se <- aroian.se.indirect2(a,se.a,b,se.b) go.se <- goodman.se.indirect2(a,se.a,b,se.b) # Create a table and return values rtab <- matrix(nrow=7,ncol=4) rownames(rtab) <- c("a","b","t","t'","ab","Aroian","Goodman") colnames(rtab) <- c("Effect","SE","t-ratio","Med.Ratio") rtab[1,] <- c(a,se.a,t.a,"--") rtab[2,] <- c(b,se.b,t.b,"--") rtab[3,] <- c(t,se.t,t.t,"--") rtab[4,] <- c(tpr,se.tpr,t.tpr,"--") rtab[5,] <- c(ab,se.ab,t.ab,r.ab) rtab[6,] <- c("--",ar.se,"--","--") rtab[7,] <- c("--",go.se,"--","--") return((rtab)) } # Testing distal mediation --- X -> M1 -> M2 -> Y # Assumes all paths estimated (e.g., X -> M2; X -> Y; M1 -> Y) distal.med <- function (data) { yx.lm <- lm (y ~ x, data) # t yall.lm <- lm (y ~ m2 + m1 + x, data) # c f tp m2.lm <- lm (m2 ~ m1 + x, data) # b e m1.lm <- lm (m1 ~x, data) # a # individual effects a <- summary(m1.lm)$coef[2,1] se.a <- summary(m1.lm)$coef[2,2] t.a <- summary(m1.lm)$coef[2,3] b <- summary(m2.lm)$coef[2,1] se.b <- summary(m2.lm)$coef[2,2] t.b <- summary(m2.lm)$coef[2,3] c <- summary(yall.lm)$coef[2,1] se.c <- summary(yall.lm)$coef[2,2] t.c <- summary(yall.lm)$coef[2,3] e <- summary(m2.lm)$coef[3,1] se.e <- summary(m2.lm)$coef[3,2] t.e <- summary(m2.lm)$coef[3,3] f <- summary(yall.lm)$coef[3,1] se.f <- summary(yall.lm)$coef[3,2] t.f <- summary(yall.lm)$coef[3,3] t <- summary(yx.lm)$coef[2,1] se.t <- summary(yx.lm)$coef[2,2] t.t <- summary(yx.lm)$coef[2,3] tpr <- summary(yall.lm)$coef[4,1] se.tpr <- summary(yall.lm)$coef[4,2] t.tpr <- summary(yall.lm)$coef[4,3] # covariances of regression coefficients sbe <- vcov(m2.lm)[3,2] scf <- vcov(yall.lm)[3,2] # indirect effects abc <- a*b*c se.abc <- se.indirect3 (a,se.a,b,se.b,c,se.c) t.abc <- abc/se.abc r.abc <- abc/t af <- a*f se.af <- se.indirect2 (a,se.a,f,se.f) t.af <- af/se.af r.af <- af/t ec <- e*c se.ec <- se.indirect2 (e,se.e,c,se.c) t.ec <- ec/se.ec r.ec <- ec/t # total indirect effect x -> y ind.xy <- abc + af + ec sabcaf <- b*c*f*se.a^2 + a^2*b*scf sabcec <- a*c^2*sbe + a*b*e*se.c^2 safec <- a*e*scf se.ind.xy <- sqrt(se.abc^2 + se.af^2 + se.ec^2 + 2*(sabcaf + sabcec + safec)) t.ind.xy <- ind.xy/se.ind.xy # indirect effect ratio med.ratio1 <- (t-tpr)/t med.ratio2 <- ind.xy/t # Create a table and return values rtab <- matrix(nrow=11,ncol=4) rownames(rtab) <- c("a","b","c","e","f","abc","af","ec","ind.xy","t","t'") colnames(rtab) <- c("Effect","SE","t-ratio","Med.Ratio") rtab[1,] <- c((a),(se.a),(t.a),"--") rtab[2,] <- c((b),(se.b),(t.b),"--") rtab[3,] <- c((c),(se.c),(t.c),"--") rtab[4,] <- c((e),(se.e),(t.e),"--") rtab[5,] <- c((f),(se.f),(t.f),"--") rtab[6,] <- c((abc),(se.abc),(t.abc),(r.abc)) rtab[7,] <- c((af),(se.af),(t.af),(r.af)) rtab[8,] <- c((ec),(se.ec),(t.ec),(r.ec)) rtab[9,] <- c((ind.xy),(se.ind.xy),(t.ind.xy),(med.ratio2)) rtab[10,] <- c((t),(se.t),(t.t),"--") rtab[11,] <- c((tpr),(se.tpr),(t.tpr),(med.ratio1)) return(rtab) } # f(x)s to estimate the standard error (se) for specific indirect effects # se for indirect as product of two paths se.indirect2 <- function (a,sa,b,sb) { sqrt(a^2*sb^2 + b^2*sa^2) } # function to compute se as product of 3 paths se.indirect3 <- function (a,sa,b,sb,c,sc) { sqrt(a^2*b^2*sc^2 + a^2*c^2*sb^2 + b^2*c^2*sa^2) } # Other estimates of se as product of 2 paths (see MacKinnon et al., 2002) # These are returned as additional output for proximal.med() goodman.se.indirect2 <- function (a,sa,b,sb) { sqrt(a^2*sb^2 + b^2*sa^2 - sa^2*sb^2 ) } aroian.se.indirect2 <- function (a,sa,b,sb) { sqrt(a^2*sb^2 + b^2*sa^2 + sa^2*sb^2 ) } ### Functions to 'pull' indirect effect from corresponding function for use in bootstrapping # Gets total indirect effect from proximal.med() for use in boot proxInd.ef <- function (data,i) { d <- data[i,] ind <- as.numeric(proximal.med(d)[5,1]) return(ind) } # Gets total indirect effect from distal.med() for use in boot distInd.ef <- function (data,i) { d <- data[i,] ind <- as.numeric(distal.med(d)[9,1]) return(ind) } ###################################################################### ########################## EXAMPLES ############################## ###################################################################### # Requires library(boot) based on Davison & Hinkley (1997) # In S+ this library is called bootstrap() ~~I think~~ # Create some data based on a correlation matrix: See Fletcher (under review) cormat <- matrix (c(1,.3,.15,.075,.3,1,.3,.15,.15,.3,1,.3,.075,.15,.3,1),ncol=4) # function to create data create.dat <- function(seed,cormat,size) { set.seed(seed) k <- ncol(cormat) tdat <- matrix(rnorm(k*size),ncol=k) tcor <- t(chol(cormat)) tdat %*% tcor } # Use create.dat function d200 <- create.dat(123456,cormat,200) # convert to data.frame and add labels d200 <- data.frame(d200) names(d200) <- c("antecedent","mediator1","mediator2","consequence") # object d200 is now a data.frame containing 200 observations for 4 variables # # to run and save 999 replicates using bootstrap for assessing distal mediation # use the boot() function and refer to the distInd.ef function within boot() # However, to use the distInd.ef or proxInd.ef functions, a new data.frame must be created # The new data.frame must consist of variables labeled x,m,y or x,m1,m2,y # This is step is regretabbly inconvenient, but simplifies programming ... # An example of pulling 4 variables from a data.frame and renaming them is below # Be sure you label the new variables correctly! Watch the ordering # IFF I get them to work, tmp.distal() and tmp.proximal() will create new data with proper labels # But, until then, new.d200 <- d200[,1:4] names(new.d200) <- c("x","m1","m2","y") library(boot) dist.boot200 <- boot(new.d200, distInd.ef, R=999) # To plot the density of the distribution and add normal curve plot(density(dist.boot200$t)) lines(density(qnorm(ppoints(999),mean(dist.boot200$t),sd(dist.boot200$t))),lty=2,col=2) # to return the 95% confidence interval sort(dist.boot200$t[c(25,975)]) # To get the "OLS" estimates of all the effects of a distal mediation model: # use distal.med() function distal.med(new.d200) # see note above RE: use of data.frame() to make results 'pretty' # If interest is only in proximal mediation M1 -> M2 -> Y # use proximal.med() function # create data.frame with only 3 variables labeled x,m,y but corresponding to M1, M2, Y prox.d200 <- d200[,2:4] names(prox.d200) <- c("x","m","y") proximal.med(prox.d200)