# Introduction

An R package implementing a Projection Pursuit algorithm based on finite Gaussian Mixtures Models for density estimation using Genetic Algorithms (PPGMMGA) to maximise an approximated Negentropy index. The ppgmmga algorithm provides a method to visualise high-dimensional data in a lower-dimensional space, with special reference to reveal clustering structures.

library(ppgmmga)
# Banknote data

library(mclust)
data("banknote")
X <- banknote[,-1]
Class <- banknote$Status table(Class) ## Class ## counterfeit genuine ## 100 100 Class_color <- ggthemes::tableau_color_pal("Classic 10")(2) clPairs(X, classification = Class, colors = Class_color) # 1-dimensional ppgmmga pp1D <- ppgmmga(data = X, d = 1, approx = "UT", seed = 1) pp1D ## Call: ## ppgmmga(data = X, d = 1, approx = "UT", seed = 1) ## ## 'ppgmmga' object containing: ## [1] "data" "d" "approx" "GMM" "GA" ## [6] "Negentropy" "basis" "Z" summary(pp1D) ## ── ppgmmga ───────────────────────────── ## ## Data dimensions = 200 x 6 ## Data transformation = center & scale ## Projection subspace dimension = 1 ## GMM density estimate = (VEE,4) ## Negentropy approximation = UT ## GA optimal negentropy = 0.6345935 ## GA encoded basis solution: ## x1 x2 x3 x4 x5 ## [1,] 3.268902 2.373044 1.051365 0.3131285 0.531718 ## ## Estimated projection basis: ## PP1 ## Length -0.01196531 ## Left -0.09347750 ## Right 0.16021052 ## Bottom 0.57406981 ## Top 0.34503463 ## Diagonal -0.71892026 plot(pp1D) plot(pp1D, class = Class) # 2-dimensional ppgmmga pp2D <- ppgmmga(data = X, d = 2, approx = "UT", seed = 1) summary(pp2D, check = TRUE) ## ── ppgmmga ───────────────────────────── ## ## Data dimensions = 200 x 6 ## Data transformation = center & scale ## Projection subspace dimension = 2 ## GMM density estimate = (VEE,4) ## Negentropy approximation = UT ## GA optimal negentropy = 1.13624 ## GA encoded basis solution: ## x1 x2 x3 x4 x5 x6 x7 ## [1,] 2.268667 2.929821 1.061407 1.084929 0.3044298 3.85462 0.9832903 ## x8 x9 x10 ## [1,] 1.11377 0.1671738 1.668403 ## ## Estimated projection basis: ## PP1 PP2 ## Length -0.03726866 -0.07183191 ## Left 0.03125553 -0.11981164 ## Right -0.15480788 0.06300918 ## Bottom -0.08569311 0.86390485 ## Top -0.10249897 0.46037272 ## Diagonal 0.97766012 0.13505761 ## ## Monte Carlo Negentropy approximation check: ## UT ## Approx Negentropy 1.136240194 ## MC Negentropy 1.137260367 ## MC se 0.003527379 ## Relative accuracy 0.999102956 summary(pp2D$GMM)
plot(pp2D$GA) plot(pp2D) plot(pp2D, class = Class, drawAxis = FALSE) # 3-dimensional ppgmmga pp3D <- ppgmmga(data = X, d = 3, center = TRUE, scale = FALSE, gatype = "gaisl", options = ppgmmga.options(numIslands = 2), seed = 1) summary(pp3D, check = TRUE) ## ── ppgmmga ───────────────────────────── ## ## Data dimensions = 200 x 6 ## Data transformation = center ## Projection subspace dimension = 3 ## GMM density estimate = (VVE,3) ## Negentropy approximation = UT ## GA optimal negentropy = 1.16915 ## GA encoded basis solution: ## x1 x2 x3 x4 x5 x6 x7 ## [1,] 4.306147 2.435962 1.072888 1.02168 1.039589 4.934657 2.005115 ## x8 x9 x10 ... x14 x15 ## [1,] 2.047029 1.950543 2.200697 1.534584 2.504773 ## ## Estimated projection basis: ## PP1 PP2 PP3 ## Length -0.3849309 0.5240368 -0.5116536 ## Left -0.1655861 -0.1697583 -0.3109141 ## Right 0.2462001 0.5001222 -0.4154481 ## Bottom 0.2973840 0.3653894 0.3867856 ## Top 0.3097231 0.4873071 0.3130374 ## Diagonal -0.7612025 0.2747140 0.4704789 ## ## Monte Carlo Negentropy approximation check: ## UT ## Approx Negentropy 1.169149621 ## MC Negentropy 1.173876686 ## MC se 0.004294694 ## Relative accuracy 0.995973116 plot(pp3D$GA)

plot(pp3D)

plot(pp3D, class = Class)

plot(pp3D, dim = c(1,2))

plot(pp3D, dim = c(1,3), class = Class)

# A rotating 3D plot can be obtained using
if(!require("msir")) install.packages("msir")
msir::spinplot(pp3D\$Z, markby = Class,
pch.points = c(19,17),
col.points = Class_color)

# References

Scrucca L, Serafini A (2019). “Projection pursuit based on Gaussian mixtures and evolutionary algorithms.” Journal of Computational and Graphical Statistics. doi: 10.1080/10618600.2019.1598871 (URL: https://doi.org/10.1080/10618600.2019.1598871).

