--- title: "Customising Violin Plots with Formula Input" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots with Formula Input} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `vioplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`. As shown below for the `iris` dataset, violin plots show distribution information taking formula input that `boxplot` implements but `vioplot` is unable to. This demonstrates the customisation demonstrated in [the main vioplot vignette using vioplot syntax](violin_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` Whereas performing the same function does not work with `vioplot` (0.2). ```{r, message=FALSE, eval=FALSE} devtools::install_version("vioplot", version = "0.2") library("vioplot") vioplot(Sepal.Length~Species, data = iris) ``` ``` Error in min(data) : invalid 'type' (language) of argument ``` ## Plot Defaults ```{r, message=FALSE, eval=FALSE} vioplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ``` Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each violin: ```{r} vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` ### Enhanced Annotation Here we demonstrate additional annotation features to display outliers and group sizes. #### Labelling group size Note that y-axes limits need to be adjusted to avoid overlaying text. ```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'} data("iris") attach(iris) vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", ylab = "", col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(0, max(Sepal.Length) * 1.1)) legend("bottomright", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.8) add_labels(unlist(iris$Sepal.Length), iris$Species, height = 0.5, cex = 0.8) ``` #### Plotting outliers and medians Here we add outliers and show annotation features. ```{r, warning=FALSE} # add outliers to demo data iris2 <- iris iris2 <- rbind(iris2, c(7, 1, 0, 0, "setosa")) iris2 <- rbind(iris2, c(1, 10, 0, 0, "setosa")) iris2 <- rbind(iris2, c(9, 2, 0, 0, "versicolor")) iris2 <- rbind(iris2, c(2, 12, 0, 0, "versicolor")) iris2 <- rbind(iris2, c(10, 1, 0, 0, "virginica")) iris2 <- rbind(iris2, c(12, 7, 0, 0, "virginica")) iris2$Species <- factor(iris2$Species) iris2$Sepal.Length <- as.numeric(iris2$Sepal.Length) iris2$Sepal.Width <- as.numeric(iris2$Sepal.Width) table(iris2$Species) ``` This adds outliers to the plot. ```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'} attach(iris2) vioplot(Sepal.Length~Species, data = iris2, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(min(Sepal.Length) * 0.9, max(Sepal.Length) * 1.1)) Sepal.medians <- sapply(unique(Species), function(sp) median(Sepal.Length[Species == sp])) # highlights medians points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2, col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4")) # plots outliers above 2 SD add_outliers(unlist(iris2$Sepal.Length), iris2$Species, cutoff = 2, col = "black", bars = "grey85", lwd = 2, fill = c("palegreen3", "lightblue3", "palevioletred3")) legend("bottomright", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.6) add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8) ``` Annotation on split violins are shown here. See the split violin plot vignette for details on these parameters. ```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'} data(iris) summary(iris2$Sepal.Width) table(iris2$Sepal.Width > mean(iris2$Sepal.Width)) iris_large <- iris2[iris2$Sepal.Width > mean(iris2$Sepal.Width), ] iris_small <- iris2[iris2$Sepal.Width <= mean(iris2$Sepal.Width), ] attach(iris_large) vioplot(Sepal.Length~Species, data=iris_large, plotCentre = "line", side = "right", col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(min(iris2$Sepal.Length) * 0.9, max(iris2$Sepal.Length) * 1.1), names=c("setosa", "versicolor", "virginica")) Sepal.medians <- sapply(unique(Species), function(sp) median(iris_large$Sepal.Length[Species == sp])) # highlights medians points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2, col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4")) # plots outliers above 2 SD add_outliers(unlist(iris_large$Sepal.Length), iris2$Species, cutoff = 2, col = c("palegreen3", "lightblue3", "palevioletred3"), bars = "grey85", lwd = 2, fill = "grey85") legend("bottomright", legend=c("setosa", "versicolor", "virginica"), fill=c("palegreen3", "lightblue3", "palevioletred3"), cex = 0.6) add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8) attach(iris_small) vioplot(Sepal.Length~Species, data=iris_small, plotCentre = "line", side = "left", add = T, col=c("palegreen1", "lightblue1", "palevioletred1"), ylim = c(min(Sepal.Length) * 0.9, max(Sepal.Length) * 1.1), names=c("setosa", "versicolor", "virginica")) Sepal.medians <- sapply(unique(Species), function(sp) median(iris_small$Sepal.Length[Species == sp])) # highlights medians points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2, col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4")) # plots outliers above 2 SD add_outliers(unlist(iris2$Sepal.Length), iris2$Species, cutoff = 2, col = c("palegreen3", "lightblue3", "palevioletred3"), bars = "grey85", lwd = 2, fill = "grey50") legend("bottomright", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.6) add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8) # add legend and titles legend("topleft", fill = c("lightblue2", "lightblue3"), legend = c("small", "large"), title = "Sepal Width") title(xlab = "Species", ylab = "Sepal Length") ```