# Install the packages and activate them
library(mclust)
library(clustvarsel)
#
# To explore the sensitivity of Mclust, here I run the code 3 times on
# a random subset of observations.
for (kk in (1:3)) {
  ii<-sample(seq(1,150),100)
  mm<-Mclust(iris[ii,-5]) 
  print(summary(mm))
  # This print command tells you how many clusters and how complex 
  # (equal or varying volume, shape or orientation) the clusters have.
  print(table(iris[ii,5],apply(mm$z,1,sort.list)[mm$G,]))
  # checking if the clusters can find the original class labels
  plot(mm,what="classification")
  # the plotting command illustrates what the cluster distributions look like
  # If you only want to plot a subset of the features use the command
  # plot(mm,c(subset),what="classification") e.g. c(1,2) if you want 
  # to plot only the sepal width and length.
  p<-locator() }
#
# Below I use Mclust with specific modeling assumptions
# VVV=volume, shape and orientation varies between clusters
ii<-sample(seq(1,150),100)
mm<-Mclust(iris[ii,-5],G=3,modelName=c("VVV")) 
print(summary(mm))
print(table(iris[ii,5],apply(mm$z,1,sort.list)[mm$G,]))
plot(mm,what="classification")
p<-locator()
# EEE, volume, shape and orientation same for all clusters
mm<-Mclust(iris[ii,-5],G=3,modelName=c("EEE")) 
print(summary(mm))
print(table(iris[ii,5],apply(mm$z,1,sort.list)[mm$G,]))
plot(mm,what="classification")
p<-locator()
# VVI, volume and shape varies, no correlation
mm<-Mclust(iris[ii,-5],G=3,modelName=c("VVI")) 
print(summary(mm))
print(table(iris[ii,5],apply(mm$z,1,sort.list)[mm$G,]))
plot(mm,what="classification")
p<-locator()
#EII, equal volume, spherical clusters
mm<-Mclust(iris[ii,-5],G=3,modelName=c("EII")) 
print(summary(mm))
print(table(iris[ii,5],apply(mm$z,1,sort.list)[mm$G,]))
plot(mm,what="classification")
p<-locator()
#
# Clustvarsel with 3 clusters
for (kk in (1:3)) {
ii<-sample(seq(1,150),100)
cc<-clustvarsel(iris[ii,-5],G=3)
print(cc$step)
# summarizes the features selection procedure
print(cc$subset)
# final variable set
p<-locator() }
#
for (kk in (1:3)) {
ii<-sample(seq(1,150),100)
cc<-clustvarsel(iris[ii,-5],G=2)
print(cc$step)
print(cc$subset) 
p<-locator()}
#
# You can also play with different Mclust models
for (kk in (1:3)) {
ii<-sample(seq(1,150),100)
cc<-clustvarsel(iris[ii,-5],G=3,emModels2=mclust.options("VEV"))
print(cc$step)
# summarizes the features selection procedure
print(cc$subset)
# final variable set
p<-locator() }
#
# I add some noise features to the data. Is ClustVarSel able to handle this?
newiris<-iris
newiris<-as.data.frame(cbind(matrix(rnorm(150*4),150,4),iris))
names(newiris)<-c("1","2","3","4",names(iris))
for (kk in (1:3)) {
ii<-sample(seq(1,150),100)
cc<-clustvarsel(newiris[ii,-9],G=3)
print(cc$step)
# summarizes the features selection procedure
print(cc$subset)
# final variable set
p<-locator() }
#
# Here I add a set of features that are duplicates of the iris data but
# with added noise. ClustVarSel does a pretty good job at finding the 
# noiseless features as cluster related and leaving the noisy features 
# as indirectly related to the clustering. 
# Try different noise levels by changing the sd in the rnorm().
newiris<-iris
newiris<-as.data.frame(cbind(matrix(rnorm(150*4,sd=.1),150,4)+iris[,1:4],iris))
names(newiris)<-c("1","2","3","4",names(iris))
for (kk in (1:3)) {
ii<-sample(seq(1,150),100)
cc<-clustvarsel(newiris[ii,-9],G=3)
print(cc$step)
# summarizes the features selection procedure
print(cc$subset)
# final variable set
p<-locator() }
##