### Some exploration of the Cats and Dogs data
load("CATSnDOGS.RData")
cnd<-CATSnDOGS
rm(CATSnDOGS)
#
library(rgl)
#
ss<-svd(cnd)
plot3d(ss$u[,1:3],col=c(rep(1,99),rep(2,99)))
#
rotateM <- function(x) t(apply(x, 2, rev))
par(mfrow=c(2,2))
uv<-c(sample(seq(1,99),2),sample(seq(100,198),2))
for (kk in (1:4)) {
  image(rotateM(matrix(cnd[uv[kk],],64,64)),col=gray.colors(256)) }
par(mfrow=c(2,2))
for (kk in (1:4)) {
  image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) }
#
par(mfrow=c(1,1))
plot(ss$u[,c(1,2)],col=c(rep(1,99),rep(2,99)))
p<-identify(ss$u[,c(1,2)])
# 
par(mfrow=c(2,2))
for (kk in (1:4)) {
  image(rotateM(matrix(cnd[p[kk],],64,64)),col=gray.colors(256)) }
# mainly color of pet or background in 2 first leading components
par(mfrow=c(1,1)) 
plot(ss$u[,c(1,3)],col=c(rep(1,99),rep(2,99)))
p<-identify(ss$u[,c(1,3)])
# which cats and dogs are stretched out along similar PC
par(mfrow=c(2,2))
for (kk in (1:4)) {
  image(rotateM(matrix(cnd[p[kk],],64,64)),col=gray.colors(256)) }
# picks up color of image in 1st component.. and cat vs dog in 3rd component.
######
# remove every other pixel per row and column (downsampling to speed things up)
cnd2<-cnd[,rep(seq(1,64,by=2))+64*sort(rep(seq(0,63,by=2),32))]
#
library("NMF")
K<-25
aacnd<-nmf(cnd2,K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up
par(mfrow=c(1,2))
coefmap(aacnd)
# this is what the codebook looks like (each is a raster scan of a 16*16 image)
plot(basis(fit(aacnd))[,1:2],col=c(rep(1,99),rep(2,99)),
     pch=c(rep("c",99),rep("d",99)))
# 
par(mfrow=c(1,2))
image(rotateM(matrix(cnd2[2,],32,32)),col=gray.colors(256))
image(rotateM(matrix(fitted(aacnd)[2,],32,32)),col=gray.colors(256))
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(coef(aacnd)[kk,],32,32)),col=gray.colors(256))
}
#
par(mfrow=c(1,1))
ff<-basis(aacnd)
hh<-hclust(dist(ff))
plot(hh,labels=c(rep("C",99),rep("D",99)))
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
 ddc<-density(ff[1:99,kk])
 ddd<-density(ff[100:198,kk])
 plot(ddc$x,ddc$y,type="l",ylim=c(min(ddc$y,ddd$y),max(ddc$y,ddd$y)))
 lines(ddd$x,ddd$y,col=2) }
# some codebooks are much more "cat" than "dog"
#####
# Try soms
library(kohonen)
# 
par(mfrow=c(1,1))
ss<-som(cnd,grid=somgrid(5,5))
plot(ss,type="mapping",col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99)))
plot(ss,type="codes")
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256))
}
#
####
library(cluster)
dd<-daisy(cnd)
# 
ff<-cmdscale(dd,2)
par(mfrow=c(1,1))
plot(ff,col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99)))
ff<-cmdscale(dd,3)
plot3d(ff,col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99)))
# Like with the other methods - there is a spread in terms of color pet and a direction of cat/dog distinction...
######
# What is you run on just the cats?
ss<-svd(cnd[1:99,])
plot3d(ss$u[,1:3])
#
par(mfrow=c(2,2))
for (kk in (1:4)) {
  image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) }
# 1st svd = eyes and ears?
# 2nd svd = background vs foreground?
# 3rd svd = head
#
par(mfrow=c(1,1))
plot(ss$u[,c(1,2)])
p<-identify(ss$u[,c(1,2)])
# 
par(mfrow=c(3,2))
for (kk in (1:6)) {
  image(rotateM(matrix(cnd[1:99,][p[kk],],64,64)),col=gray.colors(256)) }
# 
#
K<-25
aacndc<-nmf(cnd2[1:99,],K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up
par(mfrow=c(1,2))
coefmap(aacndc)
# this is what the codebook looks like (each is a raster scan of a 16*16 image)
plot(basis(fit(aacndc))[,1:2])
# 
par(mfrow=c(1,2))
image(rotateM(matrix(cnd2[1:99,][25,],32,32)),col=gray.colors(256))
image(rotateM(matrix(fitted(aacndc)[25,],32,32)),col=gray.colors(256))
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(coef(aacndc)[kk,],32,32)),col=gray.colors(256))
}
# 
par(mfrow=c(1,1))
ss<-som(cnd[1:99,],grid=somgrid(5,5))
plot(ss,type="mapping")
plot(ss,type="codes")
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256))
}
# picks up on color, two-colored faces, etc
# BUT - there is a general catness with a large spread
####
# dogs
ss<-svd(cnd[100:198,])
plot3d(ss$u[,1:3])
#
par(mfrow=c(2,2))
for (kk in (1:4)) {
  image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) }
# 1st svd = eyes, nose
# 2nd svd = background vs foreground?
# 3rd svd = head?
#
par(mfrow=c(1,1))
plot(ss$u[,c(1,2)])
p<-identify(ss$u[,c(1,2)])
# 
par(mfrow=c(3,2))
for (kk in (1:6)) {
  image(rotateM(matrix(cnd[100:198,][p[kk],],64,64)),col=gray.colors(256)) }
# 
#
K<-25
aacndd<-nmf(cnd2[100:198,],K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up
par(mfrow=c(1,2))
coefmap(aacndd)
# this is what the codebook looks like (each is a raster scan of a 16*16 image)
plot(basis(fit(aacndd))[,1:2])
# 
par(mfrow=c(1,2))
image(rotateM(matrix(cnd2[100:198,][2,],32,32)),col=gray.colors(256))
image(rotateM(matrix(fitted(aacndd)[2,],32,32)),col=gray.colors(256))
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(coef(aacndd)[kk,],32,32)),col=gray.colors(256))
}
# 
par(mfrow=c(1,1))
ss<-som(cnd[100:198,],grid=somgrid(5,5))
plot(ss,type="mapping")
plot(ss,type="codes")
#
par(mfrow=c(5,5))
for (kk in (1:25)) {
  image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256))
}
#################
# OK - what have we learnt?
#
par(mfrow=c(1,2))
ssc<-svd(cnd[1:99,])
plot(ssc$d)
ssd<-svd(cnd[100:198,])
plot(ssd$d)
#
plot(cumsum(ssc$d^2)/sum(ssc$d^2),cumsum(ssd$d^2)/sum(ssd$d^2))
abline(0,1)
# for few components, dog data is less well summarized than cats...