library(dplyr) library(tidyr) library(readr) #### Reading big data sets into R library(ggplot2) library(gplots) library(lattice) library(svdvis) library(softImpute) library(ElemStatLearn) library(rpart.plot) library(randomForest) library(ranger) library(rgl) ######### library(irlba) library(bigmemory) library(biglm) library(biglars) # BigR - packages for modeling big data but staying in R #############Handwritten digits######### data(zip.train) Numbers<-as.data.frame(zip.train) Numbers[,1]<-as.factor(Numbers[,1]) names(Numbers)<-c("number",as.character(seq(1,256))) #### plot sample images of the 10 digits par(mfrow=c(3,3)) for (zz in (1:9)) { iz<-sample(seq(1,dim(Numbers)[1])[Numbers$number==zz],1) mm<-matrix(as.numeric(Numbers[iz,-1]),16,16) image(mm[1:16,16:1]) } #### svd decomposition svd.obj <- svd(zip.train[,-1]) colnames(svd.obj$v) = paste0("V",1:256) rownames(svd.obj$v) = paste0("Sample",1:256) svd.scree(svd.obj, subr=5, axis.title.x="Full scree plot", axis.title.y="% Var Explained") #### 3D plot of the scores plot3d(svd.obj$u[,1:3],col=zip.train[,1]+1) legend3d("topright", legend = paste('Type', c(unique(zip.train[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) plot3d(zip.train[,sample(seq(2,257),3)],col=zip.train[,1]+1) legend3d("topright", legend = paste('Type', c(unique(zip.train[,1]))), pch = 5, col=seq(1,10), cex=1, inset=c(0.02)) #### library(glmnet) ii<-sample(seq(1,dim(Numbers)[1]),1000) gg<-glmnet(x=as.matrix(Numbers[ii,-1]),Numbers[ii,1],family="multinomial") par(mfrow=c(4,3)) plot(gg,xvar="lambda") ### image(gg$beta[[2]]) #### ggm<-glmnet(x=as.matrix(Numbers[ii,-1]),Numbers[ii,1],family="multinomial",type.multinomial="grouped") par(mfrow=c(3,3)) plot(ggm,xvar="lambda") ### image(ggm$beta[[2]]) ##### cv.gg<-cv.glmnet(x=as.matrix(Numbers[ii,-1]),y=Numbers[ii,1],family="multinomial") par(mfrow=c(1,1)) plot(cv.gg) names(cv.gg) # CV error for selecting lambda par(mfrow=c(1,1)) image(do.call(cbind,coef(cv.gg,s="lambda.1se"))) cc<-coef(cv.gg,s="lambda.1se") par(mfrow=c(3,3)) for (zz in (1:9)) { ccm<-matrix(cc[[zz]][-1],16,16) image(ccm[1:16,16:1]) } pp<-predict(cv.gg,newx=as.matrix(Numbers[-ii,-1]),type="class") table(pp,Numbers[-ii,1]) ##### cv.ggm<-cv.glmnet(x=as.matrix(Numbers[ii,-1]),y=Numbers[ii,1],family="multinomial",type.multinomial="grouped") par(mfrow=c(1,1)) plot(cv.ggm) names(cv.ggm) # CV error for selecting lambda par(mfrow=c(1,1)) image(do.call(cbind,coef(cv.ggm,s="lambda.1se"))) cc<-coef(cv.ggm,s="lambda.1se") par(mfrow=c(3,3)) for (zz in (1:9)) { ccm<-matrix(cc[[zz]][-1],16,16) image(ccm[1:16,16:1]) } pp<-predict(cv.ggm,newx=as.matrix(Numbers[-ii,-1]),type="class") table(pp,Numbers[-ii,1]) ####### cc<-do.call(cbind,coef(cv.gg,s="lambda.1se")) bb<-matrix(cc[-1,5],16,16) image(bb[1:16,16:1]) cc<-do.call(cbind,gg$beta) cc<-cc[,seq(20,950,by=95)] bb<-matrix(cc[,5],16,16) image(bb[1:16,16:1]) #####