# iz<-sample(seq(1,7291),500) Nmat<-zip.train[iz,-1] Nlab<-zip.train[iz,1] # center<-function(x) { x<-(x-mean(x))} # Nmat<-t(apply(Nmat,1,center)) # ssp<-svd(Nmat) # SVD on X ssp2<-svd(t(Nmat)) # SVD on X' vfr2<-t(Nmat)%*%ssp2$v%*%diag(1/ssp2$d) # getting X'X pcs from XX' pcs # plot(ssp$v[,1],vfr2[,1]) abline(0,-1) plot(ssp$v[,2],vfr2[,2]) abline(0,-1) # So PCA from X'X can be obtained from PCA on XX' ######## pp<-prcomp(Nmat) md<-cmdscale(dist(Nmat),2) plot(md) plot(Nmat%*%pp$rot[,1],md[,1]) plot(Nmat%*%pp$rot[,2],md[,2]) # PCA and MDS are equivalent if distance is euclidean... #### #### install.packages("kernlab") library(kernlab) # zeroes<-zip.train[zip.train[,1]==0,-1] # ssn<-svd(zeroes) plot(ssn$d) plot(ssn$u[,1:2]) pp<-identify(ssn$u[,1:2]) # par(mfrow=c(2,2)) for (kk in (1:4)) { image(t(matrix(as.numeric(zeroes[pp[kk],]),16,16,byrow=T))[,16:1]) # highligthing the structure that the 1st components identify and separate } ssk<-kpca(zeroes, kernel = "rbfdot", kpar = list(sigma = 0.1), features = 50, th = 1e-4) plot(eig(ssk)) plot(pcv(ssk)[,1:2]) pp<-identify(pcv(ssk)[,1:2]) # par(mfrow=c(2,2)) for (kk in (1:4)) { image(t(matrix(as.numeric(zeroes[pp[kk],]),16,16,byrow=T))[,16:1]) } #### iz<-sample(seq(1,7291),2000) Nmat<-zip.train[iz,] ssn<-svd(Nmat[,-1]) plot(ssn$u[,1:2],pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # ssk<-kpca(Nmat[,-1], kernel = "rbfdot", kpar = list(sigma = 0.0005), features = 50, th = 1e-4) plot(pcv(ssk)[,1:2],pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # ssk<-kpca(Nmat[,-1], kernel = "polydot", kpar = list(degree = 3), features = 50, th = 1e-4) plot(pcv(ssk)[,1:2],pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # ssk<-kpca(Nmat[,-1], kernel = "polydot", kpar = list(degree = 5), features = 50, th = 1e-4) plot(pcv(ssk)[,1:2],pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # ### tSNE library(tsne) ecb = function(x,y){ plot(x,t='n'); text(x,labels=Nmat[,1], col=gray(Nmat[,1]/10)) } tsne_N = tsne(Nmat[,-1], epoch_callback = ecb, perplexity=25,max_iter=250) # takes a while to run if you run many iterations # perplexity - bandwidth choice par(mfrow=c(1,1)) plot(tsne_N,col="white") text(tsne_N,labels=Nmat[,1],col=gray(Nmat[,1]/10)) # MDS library(cluster) library(stats) ddNbr<-daisy(Nmat[,-1]) ffNbr<-cmdscale(ddNbr,2) # plot(ffNbr,pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # ddNbr<-as.dist(1-cor(t(Nmat[,-1]))) ffNbr<-cmdscale(ddNbr,2) # plot(ffNbr,pch=as.character(Nmat[,1]),col=gray(Nmat[,1]/10)) # library(vegan) isoNbr<-isomap(dist(Nmat[,-1]),ndim=2,k=10) # use only 10 nearest neighbors to produce the 2dim representation plot(isoNbr$points,col=gray(Nmat[,1]/10),pch=as.character(Nmat[,1])) # library(lle) lleNbr<-lle(Nmat[,-1],m=2,k=10,reg=1) plot(lleNbr$Y[,1:2],col=gray(Nmat[,1]/10),pch=as.character(Nmat[,1])) ####### install.packages("dimRed") # all the above functions with a wrapper library(dimRed) # lle <- LLE() emb <- lle@fun(Nmat[,-1], lle@stdpars) ## using embed(): emb2 <- embed(dat, "LLE", knn = 45) plot(emb, type = "2vars") plot(emb2, type = "2vars") ######