Home > Stats Toolbox > Analyses Toolboxes > Model-based Multivariate Analyses

Model-based Multivariate Analyses


Setting up your script:

Install mvabund and its dependencies Load the mvbund package

library(mvabund)

Tasmania copepods example:

Does treatment have an effect on assemblage?

Load the Tasmania dataset

data(Tasmania)
tasm<-Tasmania$copepods

Create the mvanbund object and exploratory variables:

tasm.cop <- mvabund(Tasmania$copepods)
treatment <- Tasmania$treatment
block <- Tasmania$block
str(tasm.cop)
##  'mvabund' int [1:16, 1:12] 43 63 124 105 4 5 91 57 7 6 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:16] "1" "2" "3" "4" ...
##   ..$ : chr [1:12] "Ameira" "Adopsyllus" "Ectinosoma" "Ectinosomat" ...

Visualise effect of treatment on copepod abundance

plot(tasm.cop ~ treatment, col=as.numeric(block))
## 
##  PIPING TO 2nd MVFACTOR

Fitting predictive models using a negative binomial model for counts:

tasm.cop.pois <- manyglm(tasm.cop ~ block*treatment, family="poisson")
tasm.cop.nb <- manyglm(tasm.cop ~ block*treatment, family="negative.binomial")

Check assumptions and plot residuals

plot(tasm.cop.pois)

plot(tasm.cop.nb)

Check for normality

Testing hypotheses about the treatment effect and treatment-by-block interactions, using a Wald statistic and 199 resamples (use 999 resamples for a paper):

anova(tasm.cop.nb, nBoot=199, test="wald")
## Time elapsed: 0 hr 0 min 3 sec
## Analysis of Variance Table
## 
## Model: manyglm(formula = tasm.cop ~ block * treatment, family = "negative.binomial")
## 
## Multivariate test:
##                 Res.Df Df.diff  wald Pr(>wald)   
## (Intercept)         15                           
## block               12       3 9.348     0.005 **
## treatment           11       1 7.618     0.030 * 
## block:treatment      8       3 5.367     0.160   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming uncorrelated response (for faster computation) 
##  P-value calculated using 199 resampling iterations via PIT-trap resampling (to account for correlation in testing).

No interaction between block and treatment, definitely a treatment and a block effect. In which species is there an effect?

anova(tasm.cop.nb, nBoot=199, test="wald", p.uni = "adjusted")
## Time elapsed: 0 hr 0 min 3 sec
## Analysis of Variance Table
## 
## Model: manyglm(formula = tasm.cop ~ block * treatment, family = "negative.binomial")
## 
## Multivariate test:
##                 Res.Df Df.diff  wald Pr(>wald)   
## (Intercept)         15                           
## block               12       3 9.348     0.005 **
## treatment           11       1 7.618     0.025 * 
## block:treatment      8       3 5.367     0.170   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Univariate Tests:
##                 Ameira           Adopsyllus           Ectinosoma          
##                   wald Pr(>wald)       wald Pr(>wald)       wald Pr(>wald)
## (Intercept)                                                               
## block            2.499     0.305      2.196     0.340      1.856     0.530
## treatment        4.885     0.030      0.301     0.990      2.924     0.205
## block:treatment  2.749     0.390       0.02     0.855      0.026     0.855
##                 Ectinosomat           Haloschizo           Lepta.A
##                        wald Pr(>wald)       wald Pr(>wald)    wald
## (Intercept)                                                       
## block                 3.639     0.125      0.049     0.655   6.546
## treatment             0.654     0.950      0.076     0.990    0.43
## block:treatment       1.067     0.855      0.016     0.855   4.484
##                           Lepta.B           Lepta.C           Mictyricola
##                 Pr(>wald)    wald Pr(>wald)    wald Pr(>wald)        wald
## (Intercept)                                                              
## block               0.005   1.892     0.530   3.059     0.165       1.505
## treatment           0.990   1.424     0.660    2.94     0.205       2.924
## block:treatment     0.135   0.033     0.855   0.018     0.855       0.033
##                           Parevansula            Quin           Rhizothrix
##                 Pr(>wald)        wald Pr(>wald)  wald Pr(>wald)       wald
## (Intercept)                                                               
## block               0.530       0.952     0.570 0.822     0.570      0.055
## treatment           0.205           0     0.990 0.077     0.990      2.387
## block:treatment     0.855       0.021     0.855 0.018     0.855      0.004
##                          
##                 Pr(>wald)
## (Intercept)              
## block               0.655
## treatment           0.220
## block:treatment     0.855
## Arguments:
##  Test statistics calculated assuming uncorrelated response (for faster computation) 
## P-value calculated using 199 resampling iterations via PIT-trap resampling (to account for correlation in testing.

- treatment effect in Ameira


Spider example:

Which environmental variables are most strongly associated with an assemblage?

data(spider)

View spider data

spider$abund
##    Alopacce Alopcune Alopfabr Arctlute Arctperi Auloalbi Pardlugu Pardmont
## 1        25       10        0        0        0        4        0       60
## 2         0        2        0        0        0       30        1        1
## 3        15       20        2        2        0        9        1       29
## 4         2        6        0        1        0       24        1        7
## 5         1       20        0        2        0        9        1        2
## 6         0        6        0        6        0        6        0       11
## 7         2        7        0       12        0       16        1       30
## 8         0       11        0        0        0        7       55        2
## 9         1        1        0        0        0        0        0       26
## 10        3        0        1        0        0        0        0       22
## 11       15        1        2        0        0        1        0       95
## 12       16       13        0        0        0        0        0       96
## 13        3       43        1        2        0       18        1       24
## 14        0        2        0        1        0        4        3       14
## 15        0        0        0        0        0        0        6        0
## 16        0        3        0        0        0        0        6        0
## 17        0        0        0        0        0        0        2        0
## 18        0        1        0        0        0        0        5        0
## 19        0        1        0        0        0        0       12        0
## 20        0        2        0        0        0        0       13        0
## 21        0        1        0        0        0        0       16        1
## 22        7        0       16        0        4        0        0        2
## 23       17        0       15        0        7        0        2        6
## 24       11        0       20        0        5        0        0        3
## 25        9        1        9        0        0        2        1       11
## 26        3        0        6        0       18        0        0        0
## 27       29        0       11        0        4        0        0        1
## 28       15        0       14        0        1        0        0        6
##    Pardnigr Pardpull Trocterr Zoraspin
## 1        12       45       57        4
## 2        15       37       65        9
## 3        18       45       66        1
## 4        29       94       86       25
## 5       135       76       91       17
## 6        27       24       63       34
## 7        89      105      118       16
## 8         2        1       30        3
## 9         1        1        2        0
## 10        0        0        1        0
## 11        0        1        4        0
## 12        1        8       13        0
## 13       53       72       97       22
## 14       15       72       94       32
## 15        0        0       25        3
## 16        2        0       28        4
## 17        0        0       23        2
## 18        0        0       25        0
## 19        1        0       22        3
## 20        0        0       22        2
## 21        0        1       18        2
## 22        0        0        1        0
## 23        0        0        1        0
## 24        0        0        0        0
## 25        6        0       16        6
## 26        0        0        1        0
## 27        0        0        0        0
## 28        0        0        2        0
spider$x
##       soil.dry bare.sand fallen.leaves   moss herb.layer reflection
##  [1,]   2.3321    0.0000        0.0000 3.0445     4.4543     3.9120
##  [2,]   3.0493    0.0000        1.7918 1.0986     4.5643     1.6094
##  [3,]   2.5572    0.0000        0.0000 2.3979     4.6052     3.6889
##  [4,]   2.6741    0.0000        0.0000 2.3979     4.6151     2.9957
##  [5,]   3.0155    0.0000        0.0000 0.0000     4.6151     2.3026
##  [6,]   3.3810    2.3979        3.4340 2.3979     3.4340     0.6931
##  [7,]   3.1781    0.0000        0.0000 0.6931     4.6151     2.3026
##  [8,]   2.6247    0.0000        4.2627 1.0986     3.4340     0.6931
##  [9,]   2.4849    0.0000        0.0000 4.3307     3.2581     3.4012
## [10,]   2.1972    3.9318        0.0000 3.4340     3.0445     3.6889
## [11,]   2.2192    0.0000        0.0000 4.1109     3.7136     3.6889
## [12,]   2.2925    0.0000        0.0000 3.8286     4.0254     3.6889
## [13,]   3.5175    1.7918        1.7918 0.6931     4.5109     3.4012
## [14,]   3.0865    0.0000        0.0000 1.7918     4.5643     1.0986
## [15,]   3.2696    0.0000        4.3944 0.6931     3.0445     0.6931
## [16,]   3.0301    0.0000        4.6052 0.6931     0.6931     0.0000
## [17,]   3.3322    0.0000        4.4543 0.6931     3.0445     1.0986
## [18,]   3.1224    0.0000        4.3944 0.0000     3.0445     1.0986
## [19,]   2.9232    0.0000        4.5109 1.6094     1.6094     0.0000
## [20,]   3.1091    0.0000        4.5951 0.6931     0.6931     0.0000
## [21,]   2.9755    0.0000        4.5643 0.6931     1.7918     0.0000
## [22,]   1.2528    3.2581        0.0000 4.3307     0.6931     3.9120
## [23,]   1.1939    3.0445        0.0000 4.0254     3.2581     4.0943
## [24,]   1.6487    3.2581        0.0000 4.0254     3.0445     4.0073
## [25,]   1.8245    3.5835        0.0000 1.0986     4.1109     2.3026
## [26,]   0.9933    4.5109        0.0000 1.7918     1.7918     4.3820
## [27,]   0.9555    2.3979        0.0000 3.8286     3.4340     3.6889
## [28,]   0.9555    3.4340        0.0000 3.7136     3.4340     3.6889

Create mvabund object and explanatory environmental variables

spiddat <- mvabund(spider$abund)
X<-data.frame(spider$x)

Check for colinearity

plot(X)

Draw a plot of the spider data:

## Kicking off BoxPlot sequence 
## 
## 
## ABOUT TO PLOT THE FUNCTION

Model using all the environmental variables

glm.spid <- manyglm(spiddat~., data=X)

Model using specific environmental variables

glm.spid2<- manyglm(spiddat~soil.dry+bare.sand+moss, data=X,family = "negative.binomial")

Conditional effects:

summary(glm.spid,nBoot = 999,test = "LR")
## 
## Test statistics:
##               LR value Pr(>LR)    
## (Intercept)     107.31   0.001 ***
## soil.dry         90.86   0.001 ***
## bare.sand        26.59   0.139    
## fallen.leaves    31.27   0.083 .  
## moss             34.98   0.057 .  
## herb.layer       95.43   0.001 ***
## reflection       46.88   0.013 *  
## --- 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Likelihood Ratio statistic:  465.3, p-value: 0.001 
## Arguments:
##  Test statistics calculated assuming response assumed to be uncorrelated 
##  P-value calculated using 999 resampling iterations via pit.trap resampling (to account for correlation in testing).
anova(glm.spid)
## Time elapsed: 0 hr 1 min 39 sec
## Analysis of Deviance Table
## 
## Model: manyglm(formula = spiddat ~ ., data = X)
## 
## Multivariate test:
##               Res.Df Df.diff    Dev Pr(>Dev)    
## (Intercept)       27                            
## soil.dry          26       1 147.30    0.001 ***
## bare.sand         25       1  26.89    0.128    
## fallen.leaves     24       1 132.23    0.001 ***
## moss              23       1  29.70    0.106    
## herb.layer        22       1  82.26    0.001 ***
## reflection        21       1  46.88    0.024 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming uncorrelated response (for faster computation) 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).

Marginal effects

devs = rep(NA,ncol(spider$x))
names(devs) = colnames(spider$x)
for (iVar in 1:ncol(spider$x))
    {
      spid.glmi = manyglm(spiddat~spider$x[,iVar],data = X)
      devs[iVar] = -2*sum( logLik(spid.glmi) )
    }
devs = devs+2*sum(logLik(glm.spid))
devs
##      soil.dry     bare.sand fallen.leaves          moss    herb.layer 
##      317.9538      394.8170      369.4439      393.3924      358.0667 
##    reflection 
##      353.4796

Presence/absence data

pres.abs <- spiddat
pres.abs[pres.abs>0] = 1
X <- data.frame(spider$x) #turn into a data frame to refer to variables in formula
glm.spid.bin <- manyglm(pres.abs~soil.dry+bare.sand+moss, data=X, family="binomial")
glm.spid.bin
## 
## Call:  manyglm(formula = pres.abs ~ soil.dry + bare.sand + moss, family = "binomial",      data = X) 
## [1] "binomial(link=logit)"
## 
## Degrees of Freedom: 27 Total (i.e. Null); 24 Residual
## 
##                     Alopacce  Alopcune  Alopfabr  Arctlute  Arctperi
## 2*log-likelihood:   -20.102   -21.201   -13.191   -23.457    -0.004 
## Residual Deviance:   20.102    21.201    13.191    23.457     0.004 
## AIC:                 28.102    29.201    21.191    31.457     8.004 
##                     Auloalbi  Pardlugu  Pardmont  Pardnigr  Pardpull
## 2*log-likelihood:   -34.568   -13.839   -22.618   -31.817   -24.109 
## Residual Deviance:   34.568    13.839    22.618    31.817    24.109 
## AIC:                 42.568    21.839    30.618    39.817    32.109 
##                     Trocterr  Zoraspin
## 2*log-likelihood:    -8.669   -16.368 
## Residual Deviance:    8.669    16.368 
## AIC:                 16.669    24.368
drop1(glm.spid.bin) #AICs for one-term deletions, suggests dropping bare.sand
## Single term deletions
## 
## Model:
## pres.abs ~ soil.dry + bare.sand + moss
##           Df    AIC
## <none>       325.94
## soil.dry  12 333.27
## bare.sand 12 318.90
## moss      12 333.26
glm2.spid.bin<-manyglm(pres.abs~soil.dry+moss, data=X, family="binomial")
drop1(glm2.spid.bin)
## Single term deletions
## 
## Model:
## pres.abs ~ soil.dry + moss
##          Df    AIC
## <none>      318.90
## soil.dry 12 367.47
## moss     12 324.42

Plot data


Coral data (tikus) to test the manyany function:

Try fitting Tikus Islands data with Tweedie models with power parameter 1.5, to test for compositional effect:

data(tikus)
coral <- as.matrix(tikus$abund[1:20,])
sumSpp = apply(coral>0,2,sum)
coral <- coral[,sumSpp>6] ## cutting to just species with seven(!) or more presences to cut

Computation time. Maybe rerun with less (e.g. 4 or more presences) if curious and patient.

coralX <- tikus$x[1:20,]

Using the block argument

data(tikus)
coral = mvabund(tikus$abund[1:20,])

Convert to presence/absence

pres = matrix(as.numeric(I(coral>0)),nrow=20)
nPres = apply(pres,2,sum)
pres = pres[,nPres>1] # remove zerotons and singletons to speed up
time = factor(tikus$x$time[1:20])
transect=tikus$x$rep[1:20]
coral.mod = manyglm(pres~transect+time,family=binomial("cloglog"))
coral.null = manyglm(pres~transect,family=binomial("cloglog"))

Use the block argument

anova(coral.null,coral.mod,block=transect)
## Time elapsed: 0 hr 0 min 35 sec
## Analysis of Deviance Table
## 
## coral.null: pres ~ transect
## coral.mod: pres ~ transect + time
## 
## Multivariate test:
##            Res.Df Df.diff Dev Pr(>Dev)    
## coral.null     10                         
## coral.mod       9       1 425    0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming uncorrelated response (for faster computation) 
##  P-value calculated using 999 resampling iterations via PIT-trap block resampling (to account for correlation in testing).