There are two strategies for extracting eigenvectors (components) and their eigenvalues from a multivariate data set of n objects by p variables. First, we can use an eigenanalysis (spectral decomposition) of a p x p association matrix between variables. Second, we can use a singular value decomposition (SVD) of the n x p data matrix, with variables standardized as necessary.
Consider the matrix (Y) of raw data from Clevenger and Waltho (2000), who recorded the numbers of seven taxa of wildlife for eleven underpasses in Alberta, Canada.
Load the vegan package
library(tidyverse)
library(vegan)
#datasets: clevenger.csv
clevenger <- read_csv("../data/clevenger.csv")
Rows: 11 Columns: 22── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): underpass
dbl (21): width, height, length, openness, noise, egdist, forcov, draindist, raildist, towndist, hui, bike, horse, foot, bbear, gbear, cougar, wolf, deer, elk, moose
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(clevenger,10)
Plots used for QK use the ggplot classic theme, with some tweaks. Tweaks are consolidated into theme_QK; use this theme for figures and tweak the theme to avoid repetitive code changes.
Fig 14.2
f14_2<-ggplot(data=clevenger,aes(x=hui, y=elk))+
geom_point(color=sc, size=ss, alpha= 1) +
theme_qk()+
geom_point(aes(x=mean(hui),y=mean(elk)), pch=8, size=3)
f14_2
# centered
clevenger.cen <- scale(clevenger[,c('bbear','gbear','cougar','wolf','deer','elk','moose')], scale=F)
clevenger.cen
bbear gbear cougar wolf deer elk moose
[1,] -7.5455 -0.63636 -5.6364 -27.27273 334.8182 10.5455 0.81818
[2,] 2.4545 -0.63636 18.3636 -21.27273 -177.1818 -613.4545 -0.18182
[3,] 25.4545 -0.63636 -7.6364 -25.27273 74.8182 -483.4545 0.81818
[4,] 19.4545 1.36364 19.3636 -0.27273 33.8182 384.5455 -0.18182
[5,] -4.5455 -0.63636 -3.6364 -25.27273 -4.1818 247.5455 -0.18182
[6,] -9.5455 -0.63636 -10.6364 -23.27273 -198.1818 -347.4545 -0.18182
[7,] -17.5455 -0.63636 -6.6364 -27.27273 -158.1818 761.5455 -0.18182
[8,] -13.5455 -0.63636 -6.6364 -23.27273 118.8182 707.5455 -0.18182
[9,] -9.5455 -0.63636 9.3636 48.72727 68.8182 6.5455 -0.18182
[10,] 16.4545 4.36364 4.3636 117.72727 71.8182 -131.4545 -0.18182
[11,] -1.5455 -0.63636 -10.6364 6.72727 -165.1818 -542.4545 -0.18182
attr(,"scaled:center")
bbear gbear cougar wolf deer elk moose
17.54545 0.63636 10.63636 28.27273 219.18182 814.45455 0.18182
# get standardized raw data
clevenger.stan <- scale(clevenger[,c('bbear','gbear','cougar','wolf','deer','elk','moose')], scale=T)
clevenger.stan
bbear gbear cougar wolf deer elk moose
[1,] -0.52613 -0.40618 -0.50687 -0.6011792 2.053482 0.021836 2.02260
[2,] 0.17115 -0.40618 1.65141 -0.4689198 -1.086679 -1.270281 -0.44947
[3,] 1.77491 -0.40618 -0.68672 -0.5570928 0.458869 -1.001090 2.02260
[4,] 1.35654 0.87039 1.74133 -0.0060118 0.207411 0.796279 -0.44947
[5,] -0.31695 -0.40618 -0.32701 -0.5570928 -0.025648 0.512593 -0.44947
[6,] -0.66559 -0.40618 -0.95651 -0.5130063 -1.215474 -0.719475 -0.44947
[7,] -1.22342 -0.40618 -0.59679 -0.6011792 -0.970149 1.576933 -0.44947
[8,] -0.94451 -0.40618 -0.59679 -0.5130063 0.728727 1.465115 -0.44947
[9,] -0.66559 -0.40618 0.84205 1.0741069 0.422071 0.013554 -0.44947
[10,] 1.14735 2.78524 0.39241 2.5950903 0.440470 -0.272203 -0.44947
[11,] -0.10776 -0.40618 -0.95651 0.1482909 -1.013081 -1.123261 -0.44947
attr(,"scaled:center")
bbear gbear cougar wolf deer elk moose
17.54545 0.63636 10.63636 28.27273 219.18182 814.45455 0.18182
attr(,"scaled:scale")
bbear gbear cougar wolf deer elk moose
14.34129 1.56670 11.12001 45.36539 163.04896 482.92823 0.40452
Covariance matrix for wildlife
clevenger.cov <- cov(clevenger[,c('bbear','gbear','cougar','wolf','deer','elk','moose')])
Correlation matrix for wildlife
clevenger.cor <- cor(clevenger[,c('bbear','gbear','cougar','wolf','deer','elk','moose')])
clevenger.eigen <- eigen(clevenger.cor)
clevenger.eigen
eigen() decomposition
$values
[1] 2.482623 1.792025 1.331450 0.810624 0.429038 0.087698 0.066542
$vectors
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] -0.448043 -0.328534 0.287268 -0.1954368 0.5863042 -0.476116 0.012862
[2,] -0.573452 0.042514 -0.200815 0.2558983 0.2779802 0.617669 -0.323647
[3,] -0.379880 0.167233 0.020840 -0.8173202 -0.3243526 0.194192 0.127916
[4,] -0.535575 0.114254 -0.145149 0.4304235 -0.3773786 -0.289848 0.517055
[5,] -0.089565 -0.537852 -0.537181 -0.0821087 -0.3456934 -0.273163 -0.461808
[6,] 0.144077 0.192644 -0.752052 -0.1904082 0.4602466 -0.067838 0.351944
[7,] 0.102376 -0.723077 0.038767 -0.0068275 -0.0049325 0.436871 0.523687
clevenger.pca <- princomp(~bbear+gbear+cougar+wolf+deer+elk+moose, cor=T, data=clevenger)
summary(clevenger.pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
Standard deviation 1.57563 1.33867 1.15388 0.90035 0.655010 0.296139 0.2579565
Proportion of Variance 0.35466 0.25600 0.19021 0.11580 0.061291 0.012528 0.0095059
Cumulative Proportion 0.35466 0.61066 0.80087 0.91667 0.977966 0.990494 1.0000000
Calculate Euclidean, Chisq and Bray-Curtis measures
# raw data
clev1 <- clevenger[c(16:22)]
vegdist(clev1, method="euclidean")
1 2 3 4 5 6 7 8 9 10
2 807.61
3 559.23 285.70
4 482.25 1020.42 869.77
5 413.65 878.52 735.88 148.15
6 642.11 268.67 307.02 769.36 625.89
7 898.42 1375.52 1267.35 426.35 536.74 1109.76
8 729.74 1354.08 1192.45 337.42 476.26 1101.61 282.27
9 277.09 670.85 497.12 384.00 262.83 449.65 792.24 706.65
10 333.26 560.41 380.28 530.91 412.79 374.65 934.17 852.68 156.65
11 746.34 82.62 250.69 948.85 806.91 200.19 1304.57 1282.27 598.65 487.84
vegdist(clev1, method="chisq")
1 2 3 4 5 6 7 8 9 10
2 1.214756
3 0.473186 1.176354
4 0.613849 0.867985 0.788188
5 0.582277 1.053666 0.835994 0.216743
6 0.891871 1.127752 1.091184 0.393196 0.317984
7 0.912458 1.174356 1.162607 0.452737 0.341191 0.144877
8 0.551231 1.111064 0.844089 0.274477 0.078034 0.359303 0.362906
9 0.555373 0.999691 0.779179 0.376378 0.457888 0.648908 0.694334 0.453531
10 0.879124 1.144335 0.931561 0.736290 0.861271 0.968858 1.047640 0.867662 0.468409
11 0.867274 1.091450 0.917409 0.533020 0.631889 0.647308 0.756904 0.658179 0.433404 0.422037
vegdist(clev1, method="bray")
1 2 3 4 5 6 7 8 9 10
2 0.694395
3 0.381941 0.447639
4 0.257046 0.676407 0.436151
5 0.216617 0.667292 0.427848 0.087399
6 0.476015 0.412500 0.382653 0.511220 0.445863
7 0.413430 0.744462 0.658179 0.207145 0.233175 0.543630
8 0.282961 0.764273 0.501570 0.143776 0.188150 0.581297 0.096444
9 0.139464 0.632518 0.329804 0.182048 0.161496 0.415743 0.378852 0.273081
10 0.229572 0.613035 0.283937 0.254499 0.255457 0.401791 0.468040 0.352150 0.103015
11 0.619853 0.213018 0.344106 0.615784 0.592129 0.302961 0.676077 0.702222 0.536141 0.513862
# standardized by object totals
clev1$total <- rowSums(clev1)
clev2 <- clev1
clev2$bbeart <- clev2$bbear/clev2$total
clev2$gbeart <- clev2$gbear/clev2$total
clev2$cougart <- clev2$cougar/clev2$total
clev2$wolft <- clev2$wolf/clev2$total
clev2$deert <- clev2$deer/clev2$total
clev2$elkt <- clev2$elk/clev2$total
clev2$mooset <- clev2$moose/clev2$total
clev3 <- clev2[c(9:15)]
vegdist(clev3, method="euclidean")
1 2 3 4 5 6 7 8 9 10
2 0.291792
3 0.121784 0.359291
4 0.298125 0.137084 0.395698
5 0.323485 0.183377 0.427224 0.049811
6 0.492488 0.299062 0.593666 0.200595 0.169239
7 0.515240 0.327571 0.619011 0.227138 0.192278 0.033720
8 0.309797 0.186871 0.415581 0.052808 0.017846 0.183663 0.205465
9 0.191972 0.145257 0.284220 0.131858 0.169746 0.326846 0.352998 0.160387
10 0.195308 0.196111 0.243427 0.235566 0.278261 0.422617 0.451210 0.271083 0.115281
11 0.301822 0.131368 0.383604 0.097350 0.137390 0.249410 0.279936 0.139452 0.115329 0.178396
vegdist(clev3, method="chisq")
1 2 3 4 5 6 7 8 9 10
2 1.044808
3 0.395173 1.040019
4 0.603052 0.708637 0.763501
5 0.585076 0.862155 0.801565 0.186791
6 0.895407 0.941382 1.073682 0.376177 0.316351
7 0.917158 0.984173 1.130417 0.427877 0.339667 0.121883
8 0.553548 0.908897 0.797735 0.232020 0.066501 0.354814 0.364968
9 0.527167 0.826881 0.715063 0.350259 0.426191 0.628385 0.672989 0.418013
10 0.816003 0.983899 0.866602 0.685887 0.801442 0.924621 0.993907 0.802559 0.435164
11 0.820566 0.902835 0.885481 0.477302 0.568411 0.601200 0.694032 0.588427 0.382098 0.409907
vegdist(clev3, method="bray")
1 2 3 4 5 6 7 8 9 10
2 0.257096
3 0.100604 0.296569
4 0.234233 0.125961 0.313523
5 0.232180 0.169599 0.327493 0.044929
6 0.359230 0.259895 0.447301 0.158088 0.128853
7 0.368831 0.287564 0.469435 0.185757 0.142882 0.030105
8 0.223579 0.180350 0.322230 0.055680 0.015436 0.140679 0.147505
9 0.160906 0.140815 0.256919 0.119252 0.144057 0.265237 0.283528 0.136323
10 0.158896 0.212611 0.223909 0.198867 0.235151 0.350364 0.378033 0.230828 0.098203
11 0.257910 0.121439 0.319509 0.093316 0.122971 0.210650 0.240756 0.130473 0.110471 0.153193
# standardized by variable totals
clev1$bbeart <- clev1$bbear/sum(clev2$bbear)
clev1$gbeart <- clev1$gbear/sum(clev2$gbear)
clev1$cougart <- clev1$cougar/sum(clev2$cougar)
clev1$wolft <- clev1$wolf/sum(clev2$wolf)
clev1$deert <- clev1$deer/sum(clev2$deer)
clev1$elkt <- clev1$elk/sum(clev2$elk)
clev1$mooset <- clev1$moose/sum(clev2$moose)
clev4 <- clev1[c(9:15)]
vegdist(clev4, method="euclidean")
1 2 3 4 5 6 7 8 9 10
2 0.587437
3 0.210331 0.569987
4 0.649405 0.337775 0.634028
5 0.520620 0.226324 0.532035 0.377736
6 0.550062 0.257489 0.544644 0.437448 0.123196
7 0.549178 0.283523 0.573066 0.428273 0.112282 0.136567
8 0.515057 0.298973 0.555836 0.410199 0.090068 0.180977 0.117605
9 0.581751 0.275035 0.603003 0.371928 0.266971 0.310875 0.310006 0.281568
10 1.006231 0.862035 0.992908 0.589812 0.860770 0.873763 0.886843 0.870577 0.761342
11 0.558178 0.264687 0.539229 0.421003 0.163227 0.108094 0.202946 0.218363 0.249703 0.821275
vegdist(clev4, method="chisq")
1 2 3 4 5 6 7 8 9 10
2 2.19187
3 0.60603 2.10623
4 1.77748 1.22495 1.74109
5 1.65506 1.48619 1.73333 1.03660
6 1.95942 1.91779 1.84996 1.35287 0.87472
7 2.26797 2.27990 2.44799 1.85813 1.23327 1.31570
8 1.73978 1.97521 1.97518 1.41916 0.58811 1.15618 1.05876
9 1.83121 1.39560 1.91034 1.10275 1.15412 1.46269 1.88679 1.36391
10 2.03045 1.91801 2.00475 0.84465 1.64080 1.71943 2.28431 1.84542 1.29952
11 2.05497 1.92955 1.89151 1.42512 1.46811 1.19081 2.21990 1.74918 1.03842 1.38740
vegdist(clev4, method="bray")
1 2 3 4 5 6 7 8 9 10
2 0.79359
3 0.19465 0.73134
4 0.70258 0.43941 0.62732
5 0.55857 0.53405 0.63732 0.51030
6 0.79672 0.66678 0.81310 0.79955 0.51639
7 0.73272 0.76318 0.84233 0.69793 0.37841 0.64115
8 0.55346 0.72111 0.66889 0.57088 0.24953 0.60900 0.24933
9 0.62470 0.49350 0.70639 0.42436 0.42549 0.70013 0.66067 0.46379
10 0.77372 0.71967 0.71630 0.37308 0.70216 0.86873 0.85555 0.74065 0.47971
11 0.81541 0.56117 0.75058 0.65565 0.56217 0.47295 0.77004 0.71535 0.55019 0.74323
create pres/abs datafile
clev5 <- clevenger[c(16:22)]
clev5[clev5>0] <-1
clev5
vegdist(clev5, method="jaccard")
1 2 3 4 5 6 7 8 9 10
2 0.16667
3 0.00000 0.16667
4 0.28571 0.16667 0.28571
5 0.16667 0.00000 0.16667 0.16667
6 0.33333 0.20000 0.33333 0.33333 0.20000
7 0.33333 0.20000 0.33333 0.33333 0.20000 0.40000
8 0.16667 0.00000 0.16667 0.16667 0.00000 0.20000 0.20000
9 0.16667 0.00000 0.16667 0.16667 0.00000 0.20000 0.20000 0.00000
10 0.28571 0.16667 0.28571 0.00000 0.16667 0.33333 0.33333 0.16667 0.16667
11 0.33333 0.20000 0.33333 0.33333 0.20000 0.00000 0.40000 0.20000 0.20000 0.33333
vegdist(clev5, method="bray")
1 2 3 4 5 6 7 8 9 10
2 0.090909
3 0.000000 0.090909
4 0.166667 0.090909 0.166667
5 0.090909 0.000000 0.090909 0.090909
6 0.200000 0.111111 0.200000 0.200000 0.111111
7 0.200000 0.111111 0.200000 0.200000 0.111111 0.250000
8 0.090909 0.000000 0.090909 0.090909 0.000000 0.111111 0.111111
9 0.090909 0.000000 0.090909 0.090909 0.000000 0.111111 0.111111 0.000000
10 0.166667 0.090909 0.166667 0.000000 0.090909 0.200000 0.200000 0.090909 0.090909
11 0.200000 0.111111 0.200000 0.200000 0.111111 0.000000 0.250000 0.111111 0.111111 0.200000
Check outliers with Mahalanobis distances - no particularly unusual objects
# Problem with code; needs to be fixed
#mahalanobis(clev1, colMeans(clev1), cov(clev1))
clev6 <- clevenger[c(1,16:22)]
clev6.stand <- decostand(clev6[,2:8], "total")
clev6.stand.bc <- vegdist(clev6.stand,"bray")
clev6.mds <- metaMDS(clev6.stand.bc,k=2)
Run 0 stress 0.030762
Run 1 stress 0.030762
... Procrustes: rmse 0.00016469 max resid 0.00028331
... Similar to previous best
Run 2 stress 0.0541
Run 3 stress 0.0541
Run 4 stress 0.24894
Run 5 stress 0.0541
Run 6 stress 0.030762
... Procrustes: rmse 0.000114 max resid 0.00019548
... Similar to previous best
Run 7 stress 0.030762
... Procrustes: rmse 0.00019672 max resid 0.00034067
... Similar to previous best
Run 8 stress 0.0541
Run 9 stress 0.27024
Run 10 stress 0.030762
... Procrustes: rmse 0.00014038 max resid 0.00024084
... Similar to previous best
Run 11 stress 0.0541
Run 12 stress 0.030762
... Procrustes: rmse 0.00011851 max resid 0.00019952
... Similar to previous best
Run 13 stress 0.030762
... Procrustes: rmse 0.00015658 max resid 0.00025943
... Similar to previous best
Run 14 stress 0.030762
... Procrustes: rmse 0.00020352 max resid 0.00035062
... Similar to previous best
Run 15 stress 0.059499
Run 16 stress 0.0541
Run 17 stress 0.0541
Run 18 stress 0.030762
... Procrustes: rmse 0.00017433 max resid 0.00030013
... Similar to previous best
Run 19 stress 0.030762
... Procrustes: rmse 6.361e-05 max resid 0.00010849
... Similar to previous best
Run 20 stress 0.0595
*** Best solution repeated 9 times
plot(clev6.mds$points,type="n", )
text(clev6.mds$points,lab=clev6$underpass, col="black")
mdsplot <- as.data.frame(scores(clev6.mds))
mdsplot$underpass<-clev6$underpass
f14_3<-ggplot(data=mdsplot,aes(x=NMDS1, y=NMDS2, label=underpass))+
geom_text()+
theme_qk()+
theme(axis.text = element_blank())
f14_3