Worked example of cluster analysis
We introduced the study of Feinberg et al. (2014), who used
morphological, genetic, and acoustic (call) criteria to discriminate
five species (one new) of congeneric frogs (Box 15.4). We will use their acoustic
data (six variables) to hierarchically cluster their specimens based on
a Euclidean dissimilarity matrix calculated from standardized call
variables. We used UPGMA.
Preliminaries
Plots used for QK use the ggplot classic theme, with some tweaks.
Tweaks are consolidated into theme_QK; use this theme for figures and
tweak the theme to avoid repetitive code changes.
source("../R/libraries.R") #loads common set of packages
source("../R/appearance.R")
library(ggdendro)
Read data and standardize variables. We use the feinberg acoustic data
feinacoust <- read_csv("../data/feinacoust.csv", col_types = cols(frogid = col_skip()))
# standardize variablesfeinacoust$crt <- scale(feinacoust$crt)
feinacoust$cdc <- scale(feinacoust$cdc)
feinacoust$pn <- scale(feinacoust$pn)
feinacoust$df <- scale(feinacoust$df)
feinacoust$tcl <- scale(feinacoust$tcl)
feinacoust$tcr <- scale(feinacoust$tcr)
create Euclidean dissimilary matrix
#feinacoust.euc <- vegdist(feinacoust,'euclidean')
feinacoust.euc<-dist(as.matrix(feinacoust),'euclidean')
Warning: NAs introduced by coercion
Run cluster analysis
feinacoust.hc <- hclust(feinacoust.euc, "ave")
plot(feinacoust.hc, labels=feinacoust$spp)
feinacoust.hc$labels <- feinacoust$spp
plot(feinacoust.hc)
Better graphics
dhc <- as.dendrogram(feinacoust.hc)
# Rectangular lines
ddata <- dendro_data(dhc, type = "rectangle")
p <- ggplot(segment(ddata)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + geom_text(data=ddata$labels,aes(x=x,y=0),label=ddata$labels$label, hjust=0, size=2, nudge_y=0.05)+
coord_flip() +
scale_y_reverse(expand = c(0.2, 0))+
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
panel.background = element_blank(),
)+
labs(y="Distance")
p
Colour version
Note: Still needs work; with white b/g, species tags don’t show up
well when yellow.
ggplot(segment(ddata)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + geom_text(data=ddata$labels,aes(x=x,y=0,color=label),label=ddata$labels$label, hjust=0, size=2, nudge_y=0.05)+
coord_flip() +
scale_y_reverse(expand = c(0.2, 0))+
scale_color_viridis_d()+
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
panel.background = element_blank(),
legend.position = "none"
)+
labs(y="Distance")
LS0tCnRpdGxlOiAiUUsgQm94IDE2LjQiCm91dHB1dDoKICBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIyBXb3JrZWQgZXhhbXBsZSBvZiBjbHVzdGVyIGFuYWx5c2lzCgpXZSBpbnRyb2R1Y2VkIHRoZSBzdHVkeSBvZiBGZWluYmVyZyBldCBhbC4gKDIwMTQpLCB3aG8gdXNlZCBtb3JwaG9sb2dpY2FsLCBnZW5ldGljLCBhbmQgYWNvdXN0aWMgKGNhbGwpIGNyaXRlcmlhIHRvIGRpc2NyaW1pbmF0ZSBmaXZlIHNwZWNpZXMgKG9uZSBuZXcpIG9mIGNvbmdlbmVyaWMgZnJvZ3MgKFtCb3ggMTUuNF0oZmVpbmJlcmdMREEubmIuaHRtbCkpLiBXZSB3aWxsIHVzZSB0aGVpciBhY291c3RpYyBkYXRhIChzaXggdmFyaWFibGVzKSB0byBoaWVyYXJjaGljYWxseSBjbHVzdGVyIHRoZWlyIHNwZWNpbWVucyBiYXNlZCBvbiBhIEV1Y2xpZGVhbiBkaXNzaW1pbGFyaXR5IG1hdHJpeCBjYWxjdWxhdGVkIGZyb20gc3RhbmRhcmRpemVkIGNhbGwgdmFyaWFibGVzLiBXZSB1c2VkIFVQR01BLgoKIyMjIFByZWxpbWluYXJpZXMKClBsb3RzIHVzZWQgZm9yIFFLIHVzZSB0aGUgZ2dwbG90IGNsYXNzaWMgdGhlbWUsIHdpdGggc29tZSB0d2Vha3MuIFR3ZWFrcyBhcmUgY29uc29saWRhdGVkIGludG8gdGhlbWVfUUs7IHVzZSB0aGlzIHRoZW1lIGZvciBmaWd1cmVzIGFuZCB0d2VhayB0aGUgdGhlbWUgdG8gYXZvaWQgcmVwZXRpdGl2ZSBjb2RlIGNoYW5nZXMuCgpgYGB7ciByZXN1bHRzPSdoaWRlJ30Kc291cmNlKCIuLi9SL2xpYnJhcmllcy5SIikgICAgI2xvYWRzIGNvbW1vbiBzZXQgb2YgcGFja2FnZXMKc291cmNlKCIuLi9SL2FwcGVhcmFuY2UuUiIpCmxpYnJhcnkoZ2dkZW5kcm8pCmBgYAoKUmVhZCBkYXRhIGFuZCBzdGFuZGFyZGl6ZSB2YXJpYWJsZXMuIFdlIHVzZSB0aGUgW2ZlaW5iZXJnIGFjb3VzdGljIGRhdGFdKC4uL2RhdGEvZmVpbmFjb3VzdC5jc3YpCgpgYGB7ciB9CmZlaW5hY291c3QgPC0gcmVhZF9jc3YoIi4uL2RhdGEvZmVpbmFjb3VzdC5jc3YiLCBjb2xfdHlwZXMgPSBjb2xzKGZyb2dpZCA9IGNvbF9za2lwKCkpKQojIHN0YW5kYXJkaXplIHZhcmlhYmxlc2ZlaW5hY291c3QkY3J0IDwtIHNjYWxlKGZlaW5hY291c3QkY3J0KQpmZWluYWNvdXN0JGNkYyA8LSBzY2FsZShmZWluYWNvdXN0JGNkYykKZmVpbmFjb3VzdCRwbiA8LSBzY2FsZShmZWluYWNvdXN0JHBuKQpmZWluYWNvdXN0JGRmIDwtIHNjYWxlKGZlaW5hY291c3QkZGYpCmZlaW5hY291c3QkdGNsIDwtIHNjYWxlKGZlaW5hY291c3QkdGNsKQpmZWluYWNvdXN0JHRjciA8LSBzY2FsZShmZWluYWNvdXN0JHRjcikKYGBgCgojIyMgY3JlYXRlIEV1Y2xpZGVhbiBkaXNzaW1pbGFyeSBtYXRyaXgKCmBgYHtyfQojZmVpbmFjb3VzdC5ldWMgPC0gdmVnZGlzdChmZWluYWNvdXN0LCdldWNsaWRlYW4nKQpmZWluYWNvdXN0LmV1YzwtZGlzdChhcy5tYXRyaXgoZmVpbmFjb3VzdCksJ2V1Y2xpZGVhbicpCmBgYAoKIyMjIFJ1biBjbHVzdGVyIGFuYWx5c2lzCgpgYGB7cn0KZmVpbmFjb3VzdC5oYyA8LSBoY2x1c3QoZmVpbmFjb3VzdC5ldWMsICJhdmUiKQpwbG90KGZlaW5hY291c3QuaGMsIGxhYmVscz1mZWluYWNvdXN0JHNwcCkKZmVpbmFjb3VzdC5oYyRsYWJlbHMgPC0gZmVpbmFjb3VzdCRzcHAKcGxvdChmZWluYWNvdXN0LmhjKQpgYGAKCiMjIyBCZXR0ZXIgZ3JhcGhpY3MKCmBgYHtyfQpkaGMgPC0gYXMuZGVuZHJvZ3JhbShmZWluYWNvdXN0LmhjKQojIFJlY3Rhbmd1bGFyIGxpbmVzCmRkYXRhIDwtIGRlbmRyb19kYXRhKGRoYywgdHlwZSA9ICJyZWN0YW5nbGUiKQpwIDwtIGdncGxvdChzZWdtZW50KGRkYXRhKSkgKyAKICBnZW9tX3NlZ21lbnQoYWVzKHggPSB4LCB5ID0geSwgeGVuZCA9IHhlbmQsIHllbmQgPSB5ZW5kKSkgKyBnZW9tX3RleHQoZGF0YT1kZGF0YSRsYWJlbHMsYWVzKHg9eCx5PTApLGxhYmVsPWRkYXRhJGxhYmVscyRsYWJlbCwgaGp1c3Q9MCwgc2l6ZT0yLCBudWRnZV95PTAuMDUpKwogIGNvb3JkX2ZsaXAoKSArIAogIHNjYWxlX3lfcmV2ZXJzZShleHBhbmQgPSBjKDAuMiwgMCkpKwogIHRoZW1lKAogICAgYXhpcy50aXRsZS55ID0gZWxlbWVudF9ibGFuaygpLAogICAgYXhpcy50ZXh0LnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLnRpY2tzLnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLmxpbmUueCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIHBhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X2JsYW5rKCksCiAgICApKwogIGxhYnMoeT0iRGlzdGFuY2UiKQpwCmBgYAoKIyMjIyBDb2xvdXIgdmVyc2lvbgoKTm90ZTogU3RpbGwgbmVlZHMgd29yazsgd2l0aCB3aGl0ZSBiL2csIHNwZWNpZXMgdGFncyBkb24ndCBzaG93IHVwIHdlbGwgd2hlbiB5ZWxsb3cuCgpgYGB7cn0KZ2dwbG90KHNlZ21lbnQoZGRhdGEpKSArIAogIGdlb21fc2VnbWVudChhZXMoeCA9IHgsIHkgPSB5LCB4ZW5kID0geGVuZCwgeWVuZCA9IHllbmQpKSArIGdlb21fdGV4dChkYXRhPWRkYXRhJGxhYmVscyxhZXMoeD14LHk9MCxjb2xvcj1sYWJlbCksbGFiZWw9ZGRhdGEkbGFiZWxzJGxhYmVsLCBoanVzdD0wLCBzaXplPTIsIG51ZGdlX3k9MC4wNSkrCiAgY29vcmRfZmxpcCgpICsgCiAgc2NhbGVfeV9yZXZlcnNlKGV4cGFuZCA9IGMoMC4yLCAwKSkrCiAgc2NhbGVfY29sb3JfdmlyaWRpc19kKCkrCiAgdGhlbWUoCiAgICBheGlzLnRpdGxlLnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLnRleHQueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGF4aXMudGlja3MueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGF4aXMubGluZS54ID0gZWxlbWVudF9ibGFuaygpLAogICAgcGFuZWwuYmFja2dyb3VuZCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIgogICAgKSsKICBsYWJzKHk9IkRpc3RhbmNlIikKYGBgCg==