Worked example of cluster analysis

We introduced the study of Feinberg et al. (2014), who used morphological, genetic, and acoustic (call) criteria to discriminate five species (one new) of congeneric frogs (Box 15.4). We will use their acoustic data (six variables) to hierarchically cluster their specimens based on a Euclidean dissimilarity matrix calculated from standardized call variables. We used UPGMA.

Preliminaries

Plots used for QK use the ggplot classic theme, with some tweaks. Tweaks are consolidated into theme_QK; use this theme for figures and tweak the theme to avoid repetitive code changes.

source("../R/libraries.R")    #loads common set of packages
source("../R/appearance.R")
library(ggdendro)

Read data and standardize variables. We use the feinberg acoustic data

feinacoust <- read_csv("../data/feinacoust.csv", col_types = cols(frogid = col_skip()))
# standardize variablesfeinacoust$crt <- scale(feinacoust$crt)
feinacoust$cdc <- scale(feinacoust$cdc)
feinacoust$pn <- scale(feinacoust$pn)
feinacoust$df <- scale(feinacoust$df)
feinacoust$tcl <- scale(feinacoust$tcl)
feinacoust$tcr <- scale(feinacoust$tcr)

create Euclidean dissimilary matrix

#feinacoust.euc <- vegdist(feinacoust,'euclidean')
feinacoust.euc<-dist(as.matrix(feinacoust),'euclidean')
Warning: NAs introduced by coercion

Run cluster analysis

feinacoust.hc <- hclust(feinacoust.euc, "ave")
plot(feinacoust.hc, labels=feinacoust$spp)

feinacoust.hc$labels <- feinacoust$spp
plot(feinacoust.hc)

Better graphics

dhc <- as.dendrogram(feinacoust.hc)
# Rectangular lines
ddata <- dendro_data(dhc, type = "rectangle")
p <- ggplot(segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + geom_text(data=ddata$labels,aes(x=x,y=0),label=ddata$labels$label, hjust=0, size=2, nudge_y=0.05)+
  coord_flip() + 
  scale_y_reverse(expand = c(0.2, 0))+
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.line.x = element_blank(),
    panel.background = element_blank(),
    )+
  labs(y="Distance")
p

Colour version

Note: Still needs work; with white b/g, species tags don’t show up well when yellow.

ggplot(segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + geom_text(data=ddata$labels,aes(x=x,y=0,color=label),label=ddata$labels$label, hjust=0, size=2, nudge_y=0.05)+
  coord_flip() + 
  scale_y_reverse(expand = c(0.2, 0))+
  scale_color_viridis_d()+
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.line.x = element_blank(),
    panel.background = element_blank(),
    legend.position = "none"
    )+
  labs(y="Distance")

LS0tCnRpdGxlOiAiUUsgQm94IDE2LjQiCm91dHB1dDoKICBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIyBXb3JrZWQgZXhhbXBsZSBvZiBjbHVzdGVyIGFuYWx5c2lzCgpXZSBpbnRyb2R1Y2VkIHRoZSBzdHVkeSBvZiBGZWluYmVyZyBldCBhbC4gKDIwMTQpLCB3aG8gdXNlZCBtb3JwaG9sb2dpY2FsLCBnZW5ldGljLCBhbmQgYWNvdXN0aWMgKGNhbGwpIGNyaXRlcmlhIHRvIGRpc2NyaW1pbmF0ZSBmaXZlIHNwZWNpZXMgKG9uZSBuZXcpIG9mIGNvbmdlbmVyaWMgZnJvZ3MgKFtCb3ggMTUuNF0oZmVpbmJlcmdMREEubmIuaHRtbCkpLiBXZSB3aWxsIHVzZSB0aGVpciBhY291c3RpYyBkYXRhIChzaXggdmFyaWFibGVzKSB0byBoaWVyYXJjaGljYWxseSBjbHVzdGVyIHRoZWlyIHNwZWNpbWVucyBiYXNlZCBvbiBhIEV1Y2xpZGVhbiBkaXNzaW1pbGFyaXR5IG1hdHJpeCBjYWxjdWxhdGVkIGZyb20gc3RhbmRhcmRpemVkIGNhbGwgdmFyaWFibGVzLiBXZSB1c2VkIFVQR01BLgoKIyMjIFByZWxpbWluYXJpZXMKClBsb3RzIHVzZWQgZm9yIFFLIHVzZSB0aGUgZ2dwbG90IGNsYXNzaWMgdGhlbWUsIHdpdGggc29tZSB0d2Vha3MuIFR3ZWFrcyBhcmUgY29uc29saWRhdGVkIGludG8gdGhlbWVfUUs7IHVzZSB0aGlzIHRoZW1lIGZvciBmaWd1cmVzIGFuZCB0d2VhayB0aGUgdGhlbWUgdG8gYXZvaWQgcmVwZXRpdGl2ZSBjb2RlIGNoYW5nZXMuCgpgYGB7ciByZXN1bHRzPSdoaWRlJ30Kc291cmNlKCIuLi9SL2xpYnJhcmllcy5SIikgICAgI2xvYWRzIGNvbW1vbiBzZXQgb2YgcGFja2FnZXMKc291cmNlKCIuLi9SL2FwcGVhcmFuY2UuUiIpCmxpYnJhcnkoZ2dkZW5kcm8pCmBgYAoKUmVhZCBkYXRhIGFuZCBzdGFuZGFyZGl6ZSB2YXJpYWJsZXMuIFdlIHVzZSB0aGUgW2ZlaW5iZXJnIGFjb3VzdGljIGRhdGFdKC4uL2RhdGEvZmVpbmFjb3VzdC5jc3YpCgpgYGB7ciB9CmZlaW5hY291c3QgPC0gcmVhZF9jc3YoIi4uL2RhdGEvZmVpbmFjb3VzdC5jc3YiLCBjb2xfdHlwZXMgPSBjb2xzKGZyb2dpZCA9IGNvbF9za2lwKCkpKQojIHN0YW5kYXJkaXplIHZhcmlhYmxlc2ZlaW5hY291c3QkY3J0IDwtIHNjYWxlKGZlaW5hY291c3QkY3J0KQpmZWluYWNvdXN0JGNkYyA8LSBzY2FsZShmZWluYWNvdXN0JGNkYykKZmVpbmFjb3VzdCRwbiA8LSBzY2FsZShmZWluYWNvdXN0JHBuKQpmZWluYWNvdXN0JGRmIDwtIHNjYWxlKGZlaW5hY291c3QkZGYpCmZlaW5hY291c3QkdGNsIDwtIHNjYWxlKGZlaW5hY291c3QkdGNsKQpmZWluYWNvdXN0JHRjciA8LSBzY2FsZShmZWluYWNvdXN0JHRjcikKYGBgCgojIyMgY3JlYXRlIEV1Y2xpZGVhbiBkaXNzaW1pbGFyeSBtYXRyaXgKCmBgYHtyfQojZmVpbmFjb3VzdC5ldWMgPC0gdmVnZGlzdChmZWluYWNvdXN0LCdldWNsaWRlYW4nKQpmZWluYWNvdXN0LmV1YzwtZGlzdChhcy5tYXRyaXgoZmVpbmFjb3VzdCksJ2V1Y2xpZGVhbicpCmBgYAoKIyMjIFJ1biBjbHVzdGVyIGFuYWx5c2lzCgpgYGB7cn0KZmVpbmFjb3VzdC5oYyA8LSBoY2x1c3QoZmVpbmFjb3VzdC5ldWMsICJhdmUiKQpwbG90KGZlaW5hY291c3QuaGMsIGxhYmVscz1mZWluYWNvdXN0JHNwcCkKZmVpbmFjb3VzdC5oYyRsYWJlbHMgPC0gZmVpbmFjb3VzdCRzcHAKcGxvdChmZWluYWNvdXN0LmhjKQpgYGAKCiMjIyBCZXR0ZXIgZ3JhcGhpY3MKCmBgYHtyfQpkaGMgPC0gYXMuZGVuZHJvZ3JhbShmZWluYWNvdXN0LmhjKQojIFJlY3Rhbmd1bGFyIGxpbmVzCmRkYXRhIDwtIGRlbmRyb19kYXRhKGRoYywgdHlwZSA9ICJyZWN0YW5nbGUiKQpwIDwtIGdncGxvdChzZWdtZW50KGRkYXRhKSkgKyAKICBnZW9tX3NlZ21lbnQoYWVzKHggPSB4LCB5ID0geSwgeGVuZCA9IHhlbmQsIHllbmQgPSB5ZW5kKSkgKyBnZW9tX3RleHQoZGF0YT1kZGF0YSRsYWJlbHMsYWVzKHg9eCx5PTApLGxhYmVsPWRkYXRhJGxhYmVscyRsYWJlbCwgaGp1c3Q9MCwgc2l6ZT0yLCBudWRnZV95PTAuMDUpKwogIGNvb3JkX2ZsaXAoKSArIAogIHNjYWxlX3lfcmV2ZXJzZShleHBhbmQgPSBjKDAuMiwgMCkpKwogIHRoZW1lKAogICAgYXhpcy50aXRsZS55ID0gZWxlbWVudF9ibGFuaygpLAogICAgYXhpcy50ZXh0LnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLnRpY2tzLnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLmxpbmUueCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIHBhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X2JsYW5rKCksCiAgICApKwogIGxhYnMoeT0iRGlzdGFuY2UiKQpwCmBgYAoKIyMjIyBDb2xvdXIgdmVyc2lvbgoKTm90ZTogU3RpbGwgbmVlZHMgd29yazsgd2l0aCB3aGl0ZSBiL2csIHNwZWNpZXMgdGFncyBkb24ndCBzaG93IHVwIHdlbGwgd2hlbiB5ZWxsb3cuCgpgYGB7cn0KZ2dwbG90KHNlZ21lbnQoZGRhdGEpKSArIAogIGdlb21fc2VnbWVudChhZXMoeCA9IHgsIHkgPSB5LCB4ZW5kID0geGVuZCwgeWVuZCA9IHllbmQpKSArIGdlb21fdGV4dChkYXRhPWRkYXRhJGxhYmVscyxhZXMoeD14LHk9MCxjb2xvcj1sYWJlbCksbGFiZWw9ZGRhdGEkbGFiZWxzJGxhYmVsLCBoanVzdD0wLCBzaXplPTIsIG51ZGdlX3k9MC4wNSkrCiAgY29vcmRfZmxpcCgpICsgCiAgc2NhbGVfeV9yZXZlcnNlKGV4cGFuZCA9IGMoMC4yLCAwKSkrCiAgc2NhbGVfY29sb3JfdmlyaWRpc19kKCkrCiAgdGhlbWUoCiAgICBheGlzLnRpdGxlLnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICBheGlzLnRleHQueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGF4aXMudGlja3MueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGF4aXMubGluZS54ID0gZWxlbWVudF9ibGFuaygpLAogICAgcGFuZWwuYmFja2dyb3VuZCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIgogICAgKSsKICBsYWJzKHk9IkRpc3RhbmNlIikKYGBgCg==