### Some exploration of the Cats and Dogs data load("CATSnDOGS.RData") cnd<-CATSnDOGS rm(CATSnDOGS) # library(rgl) # ss<-svd(cnd) plot3d(ss$u[,1:3],col=c(rep(1,99),rep(2,99))) # rotateM <- function(x) t(apply(x, 2, rev)) par(mfrow=c(2,2)) uv<-c(sample(seq(1,99),2),sample(seq(100,198),2)) for (kk in (1:4)) { image(rotateM(matrix(cnd[uv[kk],],64,64)),col=gray.colors(256)) } par(mfrow=c(2,2)) for (kk in (1:4)) { image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) } # par(mfrow=c(1,1)) plot(ss$u[,c(1,2)],col=c(rep(1,99),rep(2,99))) p<-identify(ss$u[,c(1,2)]) # par(mfrow=c(2,2)) for (kk in (1:4)) { image(rotateM(matrix(cnd[p[kk],],64,64)),col=gray.colors(256)) } # mainly color of pet or background in 2 first leading components par(mfrow=c(1,1)) plot(ss$u[,c(1,3)],col=c(rep(1,99),rep(2,99))) p<-identify(ss$u[,c(1,3)]) # which cats and dogs are stretched out along similar PC par(mfrow=c(2,2)) for (kk in (1:4)) { image(rotateM(matrix(cnd[p[kk],],64,64)),col=gray.colors(256)) } # picks up color of image in 1st component.. and cat vs dog in 3rd component. ###### # remove every other pixel per row and column (downsampling to speed things up) cnd2<-cnd[,rep(seq(1,64,by=2))+64*sort(rep(seq(0,63,by=2),32))] # library("NMF") K<-25 aacnd<-nmf(cnd2,K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up par(mfrow=c(1,2)) coefmap(aacnd) # this is what the codebook looks like (each is a raster scan of a 16*16 image) plot(basis(fit(aacnd))[,1:2],col=c(rep(1,99),rep(2,99)), pch=c(rep("c",99),rep("d",99))) # par(mfrow=c(1,2)) image(rotateM(matrix(cnd2[2,],32,32)),col=gray.colors(256)) image(rotateM(matrix(fitted(aacnd)[2,],32,32)),col=gray.colors(256)) # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(coef(aacnd)[kk,],32,32)),col=gray.colors(256)) } # par(mfrow=c(1,1)) ff<-basis(aacnd) hh<-hclust(dist(ff)) plot(hh,labels=c(rep("C",99),rep("D",99))) # par(mfrow=c(5,5)) for (kk in (1:25)) { ddc<-density(ff[1:99,kk]) ddd<-density(ff[100:198,kk]) plot(ddc$x,ddc$y,type="l",ylim=c(min(ddc$y,ddd$y),max(ddc$y,ddd$y))) lines(ddd$x,ddd$y,col=2) } # some codebooks are much more "cat" than "dog" ##### # Try soms library(kohonen) # par(mfrow=c(1,1)) ss<-som(cnd,grid=somgrid(5,5)) plot(ss,type="mapping",col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99))) plot(ss,type="codes") # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256)) } # #### library(cluster) dd<-daisy(cnd) # ff<-cmdscale(dd,2) par(mfrow=c(1,1)) plot(ff,col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99))) ff<-cmdscale(dd,3) plot3d(ff,col=c(rep(1,99),rep(2,99)),pch=c(rep("C",99),rep("D",99))) # Like with the other methods - there is a spread in terms of color pet and a direction of cat/dog distinction... ###### # What is you run on just the cats? ss<-svd(cnd[1:99,]) plot3d(ss$u[,1:3]) # par(mfrow=c(2,2)) for (kk in (1:4)) { image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) } # 1st svd = eyes and ears? # 2nd svd = background vs foreground? # 3rd svd = head # par(mfrow=c(1,1)) plot(ss$u[,c(1,2)]) p<-identify(ss$u[,c(1,2)]) # par(mfrow=c(3,2)) for (kk in (1:6)) { image(rotateM(matrix(cnd[1:99,][p[kk],],64,64)),col=gray.colors(256)) } # # K<-25 aacndc<-nmf(cnd2[1:99,],K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up par(mfrow=c(1,2)) coefmap(aacndc) # this is what the codebook looks like (each is a raster scan of a 16*16 image) plot(basis(fit(aacndc))[,1:2]) # par(mfrow=c(1,2)) image(rotateM(matrix(cnd2[1:99,][25,],32,32)),col=gray.colors(256)) image(rotateM(matrix(fitted(aacndc)[25,],32,32)),col=gray.colors(256)) # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(coef(aacndc)[kk,],32,32)),col=gray.colors(256)) } # par(mfrow=c(1,1)) ss<-som(cnd[1:99,],grid=somgrid(5,5)) plot(ss,type="mapping") plot(ss,type="codes") # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256)) } # picks up on color, two-colored faces, etc # BUT - there is a general catness with a large spread #### # dogs ss<-svd(cnd[100:198,]) plot3d(ss$u[,1:3]) # par(mfrow=c(2,2)) for (kk in (1:4)) { image(rotateM(matrix(ss$v[,kk],64,64)),col=gray.colors(256)) } # 1st svd = eyes, nose # 2nd svd = background vs foreground? # 3rd svd = head? # par(mfrow=c(1,1)) plot(ss$u[,c(1,2)]) p<-identify(ss$u[,c(1,2)]) # par(mfrow=c(3,2)) for (kk in (1:6)) { image(rotateM(matrix(cnd[100:198,][p[kk],],64,64)),col=gray.colors(256)) } # # K<-25 aacndd<-nmf(cnd2[100:198,],K,nrun=1) # takes a while to run... here I run on every 4th pixel to speed things up par(mfrow=c(1,2)) coefmap(aacndd) # this is what the codebook looks like (each is a raster scan of a 16*16 image) plot(basis(fit(aacndd))[,1:2]) # par(mfrow=c(1,2)) image(rotateM(matrix(cnd2[100:198,][2,],32,32)),col=gray.colors(256)) image(rotateM(matrix(fitted(aacndd)[2,],32,32)),col=gray.colors(256)) # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(coef(aacndd)[kk,],32,32)),col=gray.colors(256)) } # par(mfrow=c(1,1)) ss<-som(cnd[100:198,],grid=somgrid(5,5)) plot(ss,type="mapping") plot(ss,type="codes") # par(mfrow=c(5,5)) for (kk in (1:25)) { image(rotateM(matrix(ss$codes[[1]][kk,],64,64)),col=gray.colors(256)) } ################# # OK - what have we learnt? # par(mfrow=c(1,2)) ssc<-svd(cnd[1:99,]) plot(ssc$d) ssd<-svd(cnd[100:198,]) plot(ssd$d) # plot(cumsum(ssc$d^2)/sum(ssc$d^2),cumsum(ssd$d^2)/sum(ssd$d^2)) abline(0,1) # for few components, dog data is less well summarized than cats...