# SVD of the handwritten digits ssn<-svd(Numbers[,-1]) plot(ssn$d) # most of variance explained by 50 components (of 256) plot(ssn$u[,1:2]) identify(ssn$u[,1:2]) [1] 2218 2253 3317 3819 4214 5651 image((matrix(as.numeric(Numbers[2218,-1]),16,16,byrow=T))) # highligthing the structure that the 1st components identify and separate plot(ssn$u[,1:2],pch=as.character(Numbers[,1]),col=as.numeric(Numbers[,1])+1) plot(ssn$u[,2:3],pch=as.character(Numbers[,1]),col=as.numeric(Numbers[,1])+1) ####### par(mfrow=c(3,3)) for (zz in (1:9)) { iz<-sample(seq(1,dim(Numbers)[1])[Numbers$number==zz],1) image(matrix(as.numeric(Numbers[iz,-1]),16,16,byrow=T)) } #### svd decomposition library(svdvis) svd.obj <- svd(Numbers[,-1]) svd.scree(svd.obj, subr=5, axis.title.x="Full scree plot", axis.title.y="% Var Explained") #### 3D plot of the scores library(plot3D) library(rgl) library(irlba) plot3d(svd.obj$u[,1:3],col=as.numeric(Numbers[,1])+1) legend3d("topright", legend = paste('Type', c(unique(Numbers[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) plot3d(Numbers[,sample(seq(2,257),3)],col=as.numeric(Numbers[,1])+1) legend3d("topright", legend = paste('Type', c(unique(Numbers[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) ####################### # Approximation of an image using svd Nmat<-as.matrix(Numbers[,-1]) iz<-sample(seq(1,7291),1) ss<-svd(matrix(Nmat[iz,],16,16,byrow=T)) par(mfrow=c(1,2)) image(ss$u[,1:3]%*%diag(ss$d[1:3])%*%t(ss$v[,1:3])) image(matrix(Nmat[iz,],16,16,byrow=T)) #### either sparse and many or dense and few components to approximate library(nsprcomp) iz<-sample(seq(1,7291),1) Im<-matrix(Nmat[iz,],16,16,byrow=T) ss<-nsprcomp(Im,k=c(rep(10,4)),ncomp=4,tol=.1,scale=F,center=F) UU<-Im%*%ss$rotation ImA<-UU%*%diag(ss$sdev)%*%t(ss$rotation) par(mfrow=c(1,2)) image(ImA) image(Im) ##### pp<-prcomp(Nmat) #Nbr.svd.sparse<-nsprcomp(Nmat,k=c(rep(25,10))) image(matrix(Nbr.svd.sparse$rotation[,1],16,16,byrow=T)) image(matrix(Nbr.svd.sparse$rotation[,2],16,16,byrow=T)) image(matrix(Nbr.svd.sparse$rotation[,3],16,16,byrow=T)) plot(cumsum(pp$sdev)/sum(pp$sdev),ylim=c(0,1),xlim=c(1,10)) lines(cumsum(Nbr.svd.sparse$sdev)/sum(pp$sdev)) UU<-as.matrix(Nmat)%*%Nbr.svd.sparse$rotation plot(UU[,1:2],col=as.numeric(Numbers[,1]),pch=as.character(Numbers[,1])) ################# heart disease data pp<-prcomp(SAheart,scale=T) plot(cumsum(pp$sdev)/sum(pp$sdev),ylim=c(0,1)) sp<-nsprcomp(SAheart,k=c(10,rep(10,9)),scale=T) lines(cumsum(sp$sdev)/sum(sp$sdev)) sp<-nsprcomp(SAheart,k=c(3,rep(2,9)),scale=T) lines(cumsum(sp$sdev)/sum(sp$sdev),col=2) sp<-nsprcomp(SAheart,k=c(2,rep(2,9)),scale=T) lines(cumsum(sp$sdev)/sum(sp$sdev),col=3) plot(sp$rotation[,1]) #adiposity,obseity plot(sp$rotation[,2]) #tobacco, age plot(pp$rotation[,1]) ############################################# #SOM library(kohonen) # data(crabs) crabs2<-crabs head(crabs) crabs2[,1]<-as.numeric(crabs2[,1]) crabs2[,2]<-as.numeric(crabs2[,1]) pp<-prcomp(crabs2[,-3],scale=T) CC<-as.matrix(crabs2[,-3])%*%pp$rot plot(CC[,1],CC[,2],col=crabs2$sp,pch=crabs2$sex) plot(CC[,3],CC[,4],col=crabs2$sp,pch=crabs2$sex) ss<-som(as.matrix(crabs2[,-3]),grid=somgrid(5,5)) plot(ss,type="mapping",col=crabs2$sp,pch=crabs2$sex) plot(ss,type="codes") som_model <- som(as.matrix(crabs2[,-3]), grid=somgrid(5,5), rlen=100, alpha=c(0.05,0.01), keep.data = TRUE, n.hood="circular" ) plot(som_model,type="count") plot(som_model,type="dist.neighbours") plot(som_model,type="codes") # visualize one variable across the map plot(som_model, type = "property", property = som_model$codes[,4], main=names(som_model$data)[4]) plot(som_model, type = "property", property = som_model$codes[,7], main=names(som_model$data)[7]) plot(som_model, type = "property", property = som_model$codes[,1], main=names(som_model$data)[1]) som_cluster <- cutree(hclust(dist(som_model$codes)), 6) plot(som_model, type="mapping", bgcol = som_cluster, main = "Clusters") add.cluster.boundaries(som_model, som_cluster) ###### #som_modelN <- som(as.matrix(Nmat), grid=somgrid(25,25), rlen=100, alpha=c(0.05,0.01), keep.data = TRUE, n.hood="circular" ) plot(som_modelN,type="changes") plot(som_modelN,type="count") plot(som_modelN,type="dist.neighbours") som_cluster <- cutree(hclust(dist(som_modelN$codes)), 10) plot(som_modelN, type="mapping", bgcol = som_cluster, main = "Clusters") add.cluster.boundaries(som_model, som_cluster) plot(som_modelN,type="mapping",pch=as.character(Numbers[,1]),col=as.numeric(Numbers[,1])+1) ###################### ####### Nbr.som2<-xyf(Nmat,Y=Numbers[,1],grid=somgrid(5,5)) plot(Nbr.som2) plot(Nbr.som2,type="mapping",pch=as.character(Numbers[,1])) plot(Nbr.som2,type="codes") ######## sh<-som(as.matrix(SAheart[,-10]),grid=somgrid(5,5)) plot(sh) plot(sh,type="count") plot(sh,type="mapping",pch=as.character(SAheart[,10])) plot(sh,type="dist.neighbours") # visualize one variable across the map plot(sh, type = "property", property = sh$codes[,4], main=names(sh$data)[4]) plot(sh, type = "property", property = sh$codes[,1], main=names(sh$data)[1]) sh_cluster <- cutree(hclust(dist(sh$codes)), 6) plot(sh, type="mapping", bgcol = som_cluster, main = "Clusters") add.cluster.boundaries(sh, sh_cluster) plot(sh,type="mapping",pch=as.character(SAheart[,10]),col=as.numeric(SAheart[,10])+1) sh2<-xyf(as.matrix(SAheart[,-10]),Y=SAheart[,10],grid=somgrid(5,5)) plot(sh2) plot(sh2,type="mapping",pch=as.character(SAheart[,10]),col=as.numeric(SAheart[,10])+1) ####### When data is really big! #irlba library(irlba) # first SVD components only dim(TCGA) ssTCGA<-irlba(TCGA,25,25) plot3d(ssTCGA$u[,1:3],col=TCGAclass+1) legend3d("topright", legend = paste('Type', c(unique(TCGAclassstr))), pch = 5, col=seq(1,6), cex=1, inset=c(0.02)) plot(ssTCGA$d) plot(ssTCGA$v[,1]) l<-identify(ssTCGA$v[,1],labels=gene.names) plot(TCGA[,l[1:2]],col=TCGAclass+1) lp<-identify(TCGA[,l[1:2]],labels=TCGAclassstr) plot(ssTCGA$v[,2]) l<-identify(ssTCGA$v[,2],labels=gene.names) plot(TCGA[,l[1:2]],col=TCGAclass+1) lp<-identify(TCGA[,l[1:2]],labels=TCGAclassstr) plot(ssTCGA$v[,3]) l<-identify(ssTCGA$v[,3],labels=gene.names) plot(TCGA[,l[1:2]],col=TCGAclass+1) lp<-identify(TCGA[,l[1:2]],labels=TCGAclassstr)