library(dplyr) library(tidyr) library(readr) library(ggplot2) library(gplots) library(lattice) library(svdvis) library(softImpute) library(ElemStatLearn) library(rpart.plot) library(randomForest) library(ranger) library(rgl) library(irlba) library(bigmemory) library(biglm) library(biglars) # bigpca,bigrf? #############Handwritten digits######### data(zip.train) Numbers<-as.data.frame(zip.train) Numbers[,1]<-as.factor(Numbers[,1]) names(Numbers)<-c("number",as.character(seq(1,256))) #### plot sample images of the 10 digits par(mfrow=c(3,3)) for (zz in (1:9)) { iz<-sample(seq(1,dim(Numbers)[1])[Numbers$number==zz],1) image(matrix(as.numeric(Numbers[iz,-1]),16,16,byrow=T)) } #### svd decomposition svd.obj <- svd(zip.train[,-1]) colnames(svd.obj$v) = paste0("V",1:7291) rownames(svd.obj$v) = paste0("Sample",1:256) svd.scree(svd.obj, subr=5, axis.title.x="Full scree plot", axis.title.y="% Var Explained") #### 3D plot of the scores plot3d(svd.obj$u[,1:3],col=zip.train[,1]+1) legend3d("topright", legend = paste('Type', c(unique(zip.train[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) plot3d(zip.train[,sample(seq(2,257),3)],col=zip.train[,1]+1) legend3d("topright", legend = paste('Type', c(unique(zip.train[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) #rgl.postscript("Nbrsvd2.pdf",fmt="pdf",drawText=TRUE) plot3d(svd.obj$u[,4:6],col=zip.train[,1]+1) legend3d("topright", legend = paste('Type', c(unique(zip.train[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) #### Trying CART ii<-sample(seq(1,dim(zip.train)[1]),5000) tree1<-rpart(number~.,data=Numbers[ii,],control=rpart.control(cp=0.01, minsplit=10,xval=3),method="class") prp(tree1,extra=100) #### Trying RandomForest library(randomForest) Nbr.rf<-randomForest(y=Numbers[ii,1],x=Numbers[ii,-1], ytest=Numbers[-ii,1],xtest=Numbers[-ii,-1], ntree=1000, proximity=T, keep.forest=TRUE, importance=TRUE) print(Nbr.rf$confusion) #### Plotting error-rates layout(matrix(c(1,2),nrow=1), width=c(4,1)) par(mar=c(5,4,4,0)) #No margin on the right side plot(Nbr.rf) par(mar=c(5,0,4,2)) #No margin on the left side plot(c(0,1),type="n", axes=F, xlab="", ylab="") legend("top", colnames(Nbr.rf$err.rate),col=1:11,lty=1:11,cex=0.8,fill=1:11) #### Variable importance varImpPlot(Nbr.rf) image(matrix(Nbr.rf$importance,16,16,byrow=T)) #### ############################### #### Diamonds data set qplot(carat,price,data=diamonds,col=clarity,size=depth,pch=cut) #### CART library(rpart) ii<-sample(seq(1,dim(diamonds)[1]),20000) tree1<-rpart(price~.,data=diamonds[ii,],control=rpart.control(cp=0.01, minsplit=10,xval=3)) prp(tree1, faclen = 0, type=2, cex = 0.7, extra = 1,uniform=T, tweak=1.2,compress=T,box.col=3,branch.type=1,space=2) #### Cross-validation to select the tree size plotcp(tree1) printcp(tree1) #### Random Forest diamonds.rf <- ranger(price ~ ., data=diamonds[ii,], num.trees=1000, write.forest=FALSE, importance="impurity") barplot(diamonds.rf$variable.importance/max(diamonds.rf$variable.importance),ylab="Importance") #### ######################## #### SVD of an image ssp<-svd(puppy-mean(puppy)) svd.scree(ssp, subr=15, axis.title.x="Full scree plot", axis.title.y="% Var Explained") plot3d(ssp$v[,1:3]) #### Low-rank approximations image(puppy,col=gray.colors(256,0,1)) k<-2 image(ssp$u[,1:k]%*%diag(ssp$d[1:k])%*%t(ssp$v[,1:k]),col=gray.colors(256,0,1)) k<-5 image(ssp$u[,1:k]%*%diag(ssp$d[1:k])%*%t(ssp$v[,1:k]),col=gray.colors(256,0,1)) k<-15 image(ssp$u[,1:k]%*%diag(ssp$d[1:k])%*%t(ssp$v[,1:k]),col=gray.colors(256,0,1)) k<-25 image(ssp$u[,1:k]%*%diag(ssp$d[1:k])%*%t(ssp$v[,1:k]),col=gray.colors(256,0,1)) #### Missing values (corrupted image) aa<-sample(seq(1,322),100,replace=T) bb<-sample(seq(1,300),100,replace=T) puppy2<-puppy puppy2[aa,bb]<-NA image(puppy2,col=gray.colors(256,0,1)) #### Using low-rank approximations to fill in missing values pcomp<-softImpute(puppy2,rank=10,lambda=.85) image(pcomp$u%*%diag(pcomp$d)%*%t(pcomp$v),col=gray.colors(256,0,1)) pcomp<-softImpute(puppy2,rank=35,lambda=.9) image(pcomp$u%*%diag(pcomp$d)%*%t(pcomp$v),col=gray.colors(256,0,1)) #### or denoising puppy3<-puppy puppy3<-(puppy+matrix(rnorm(322*300,sd=.12),322,300)) image(puppy3,col=gray.colors(256,0,1)) ssp<-svd(puppy3) k<-15 image(ssp$u[,1:k]%*%diag(ssp$d[1:k])%*%t(ssp$v[,1:k]),col=gray.colors(256,0,1)) #####