library(scatterplot3d) par(mfrow = c(2, 2)) mar0 = c(2, 3, 2, 3) scatterplot3d(wine[, 2], wine[, 3], wine[, 4], mar = mar0, color = c("blue", "black", "red")[wine$class], pch = 19) scatterplot3d(wine[, 3], wine[, 4], wine[, 5], mar = mar0, color = c("blue", "black", "red")[wine$class], pch = 19) scatterplot3d(wine[, 4], wine[, 5], wine[, 2], mar = mar0, color = c("blue", "black", "red")[wine$class], pch = 19) scatterplot3d(wine[, 5], wine[, 2], wine[, 3], mar = mar0, color = c("blue", "black", "red")[wine$class], pch = 19) library(klaR) mywine<-wine newfac<-rep(0,dim(wine)[1]) newfac[wine$class==1]<-"Zero" newfac[wine$class==2]<-"One" newfac[wine$class==3]<-"Two" newfac<-as.factor(newfac) mywine$class<-newfac ### plotting classification boundaries partimat(class~.,data=mywine[,c(1,2:5)],method="lda") partimat(class~.,data=mywine[,c(1,2:5)],method="qda") partimat(class~.,data=mywine[,c(1,2:5)],method="naiveBayes") partimat(class~.,data=mywine[,c(1,2:5)],method="rpart") ############ library(caret) # use 2/3 for training ii<-createDataPartition(wine$class,p=2/3,list=F) x.train<-as.matrix(wine[ii,-1]) x.test<-as.matrix(wine[-ii,-1]) y<-as.factor(wine[,1]) y.train<-y[ii] y.test<-y[-ii] ### LDA cc<-train(x=x.train,y=y.train,method="lda") confusionMatrix(cc) # training error pp<-predict(cc,x.test) confusionMatrix(pp,y.test) # test error pp<-predict(cc,x.test,type="prob") pp$obs<-y.test head(pp) pp$pred<-predict(cc,x.test) multiClassSummary(pp,lev=levels(pp$obs)) ## summaries like specifity and sensitivity # Regularizing between LDA, QDA and DQDA cc2<-train(x=x.train,y=y.train,method="rda") plot(cc2) # half-way between qda and lda (gamma= 0.5) pp<-predict(cc2,x.test,type="prob") pp$obs<-y.test head(pp) pp$pred<-predict(cc2,x.test) multiClassSummary(pp,lev=levels(pp$obs)) #### LDA DLDA regularization cc2<-train(x=x.train,y=y.train,method="sda") plot(cc2) pp<-predict(cc2,x.test,type="prob") pp$obs<-y.test head(pp) pp$pred<-predict(cc2,x.test) multiClassSummary(pp,lev=levels(pp$obs)) ##### mixture components for each class for a richer classifier cc<-mda(class~.,data=wine[ii,],subclasses=c(1,1,1)) plot(cc) cc<-mda(class~.,data=wine[ii,],subclasses=c(2,3,1)) plot(cc) cc2<-train(x=x.train,y=y.train,method="mda",tuneLength=5) plot(cc2) pp<-predict(cc2,x.test,type="prob") pp$obs<-y.test head(pp) pp$pred<-predict(cc2,x.test) multiClassSummary(pp,lev=levels(pp$obs)) #### sparse LDA - reducing the number of features used. cc<-train(x=x.train,y=y.train,method="sparseLDA") plot(cc) cc$bestTune Y<-matrix(0,dim(wine)[1],3) Y[wine$class==1,1]<-1 Y[wine$class==2,2]<-1 Y[wine$class==3,3]<-1 colnames(Y)<-c("A","B","C") ss<-sda(x=x.train,y=Y[ii,],lambda=.1,stop=-7) pp<-predict(ss,x.test) plot(pp$x,col=y.test,pch=apply(pp$posterior,1,sort.list)[3,]) ##################### Repeated runs AccL<-matrix(0,25,10) AccQ<-matrix(0,25,10) AccR<-matrix(0,25,10) AccS<-matrix(0,25,10) AccM<-matrix(0,25,10) RDAtune<-matrix(0,25,2) Mtune<-matrix(0,25,1) Stune<-matrix(0,25,2) for (b in (1:25)) { ii<-createDataPartition(wine$class,p=2/3,list=F) x.train<-as.matrix(wine[ii,-1]) x.test<-as.matrix(wine[-ii,-1]) y<-as.factor(wine[,1]) y.train<-y[ii] y.test<-y[-ii] # cclda<-train(x=x.train,y=y.train,method="lda") pp<-predict(cclda,x.test,type="prob") pp$obs<-y.test pp$pred<-predict(cclda,x.test) AccL[b,]<-multiClassSummary(pp,lev=levels(pp$obs)) # ccqda<-train(x=x.train,y=y.train,method="qda") pp<-predict(ccqda,x.test,type="prob") pp$obs<-y.test pp$pred<-predict(ccqda,x.test) AccQ[b,]<-multiClassSummary(pp,lev=levels(pp$obs)) # ccrda<-train(x=x.train,y=y.train,method="rda") pp<-predict(ccrda,x.test,type="prob") pp$obs<-y.test pp$pred<-predict(ccrda,x.test) AccR[b,]<-multiClassSummary(pp,lev=levels(pp$obs)) RDAtune[b,1]<-ccrda$bestTune$gamma RDAtune[b,2]<-ccrda$bestTune$lambda # # ccsda<-train(x=x.train,y=y.train,method="sda") pp<-predict(ccsda,x.test,type="prob") pp$obs<-y.test pp$pred<-predict(ccsda,x.test) AccS[b,]<-multiClassSummary(pp,lev=levels(pp$obs)) Stune[b,1]<-ccsda$bestTune$diagonal Stune[b,2]<-ccsda$bestTune$lambda # # ccmda<-train(x=x.train,y=y.train,method="mda") pp<-predict(ccmda,x.test,type="prob") pp$obs<-y.test pp$pred<-predict(ccmda,x.test) AccM[b,]<-multiClassSummary(pp,lev=levels(pp$obs)) Mtune[b,1]<-ccmda$bestTune$subclasses # print(b) } # comparing methods par(mfrow=c(2,2)) boxplot(cbind(AccL[,3],AccQ[,3],AccR[,3],AccS[,3],AccM[,3]),names=c("LDA","QDA","RDA","SDA","MDA"),main="Accuracy") boxplot(cbind(AccL[,9],AccQ[,9],AccR[,9],AccS[,9],AccM[,9]),names=c("LDA","QDA","RDA","SDA","MDA"),main="Detection rate") boxplot(cbind(AccL[,5],AccQ[,5],AccR[,5],AccS[,5],AccM[,5]),names=c("LDA","QDA","RDA","SDA","MDA"),main="Sensititivy") boxplot(cbind(AccL[,6],AccQ[,6],AccR[,6],AccS[,6],AccM[,6]),names=c("LDA","QDA","RDA","SDA","MDA"),main="Specificity") # tuning parameters used par(mfrow=c(1,3)) boxplot(RDAtune,names=c("gamma","lambda")) boxplot(Stune,names=c("diagonal","lambda")) boxplot(Mtune,names=c("subclasses")) ####################### ####### regression analysis on the cars data library(car) pairs(cars) head(cars[sample(seq(1,dim(cars)[1])),]) plot(cars$horsepwr,cars$mid.price) # probably need to transform: nonconstant error variance # try log or sqrt # first a quick check mm<-lm(mid.price~.,data=cars) summary(mm) plot(mm) residualPlots(mm) leveragePlots(mm) qqPlot(mm,id.n=3) avPlots(mm,id.n=2) outlierTest(mm) influenceIndexPlot(mm,id.n=3) influencePlot(mm,id.n=3) vif(mm) cor(cars) ### remove 52 and take logs of price ### Note, vif>3-4 indicates a collinearity problem ### We have a problem!!! ### Regularization might help. cars2<-cars cars2<-cars[-52,] cars2$mid.price<-log(cars2$mid.price) plot(cars2$horsepwr,cars2$mid.price) mm<-lm(mid.price~.,data=cars2) residualPlots(mm) leveragePlots(mm) qqPlot(mm,id.n=3) influenceIndexPlot(mm,id.n=3) influencePlot(mm,id.n=3) #### Perhaps sqrt a better transform - try at home ### R-squared contribution of variables library(relaimpo) # Bootstrap Measures of Relative Importance (b samples) boot <- boot.relimp(mm, b = 10, type = c("lmg", "last", "first", "genizi"), rank = TRUE, diff = TRUE, rela = TRUE) booteval.relimp(boot) # print result plot(booteval.relimp(boot,sort=TRUE)) ##### all subset selection library(leaps) ll<-regsubsets(mid.price~.,data=cars2) plot(ll) plot(ll,scale="adjr2") ### which variables are picked in best model? # subsets(ll,statistics="rsq") # ll<-regsubsets(mid.price~.,really.big=T,nbest=50,data=cars2) plot(ll) ### more models for each model size ### rr<-summary(ll) names(rr) head(rr$which) plot(apply(rr$which,1,sum),rr$cp,xlab="Model size",ylab="IC") plot(apply(rr$which,1,sum),rr$bic,xlab="Model size",ylab="IC") plot(apply(rr$which,1,sum),rr$adjr2,xlab="Model size",ylab="IC") ##### # elastic net modeling library(glmnet) gg<-glmnet(x=as.matrix(cars2[,-1]),y=cars2[,1]) plot(gg,xvar="lambda") vnat<-coef(gg) vnat<-vnat[-1,ncol(vnat)] axis(2,at=vnat,line=-.5,label=names(cars2[,-1]),las=1,tick=FALSE,cex.axis=.5) ### which variables enter first (with a high penalty)? # use cross-validation to pick the size of the penalty factor cv.gg<-cv.glmnet(x=as.matrix(cars2[,-1]),y=cars2[,1]) plot(cv.gg) names(cv.gg) coef(cv.gg,s="lambda.1se") coef(cv.gg,s="lambda.min") ## should you use lasso, ridge or elasticnet (alpha between 0 and 1) cv.gg1<-cv.glmnet(x=as.matrix(cars2[,-1]),y=cars2[,1],alpha=1) cv.gg.5<-cv.glmnet(x=as.matrix(cars2[,-1]),y=cars2[,1],alpha=.5) cv.gg.25<-cv.glmnet(x=as.matrix(cars2[,-1]),y=cars2[,1],alpha=.25) plot(log(cv.gg1$lambda),cv.gg1$cvm,xlab="Log(Lambda)",ylab="MSE") points(log(cv.gg.5$lambda),cv.gg.5$cvm,col=2) points(log(cv.gg.25$lambda),cv.gg.25$cvm,col=3) legend("topleft",legend=c("alpha=1","alpha=.5","alpha=.25"),pch=1,col=c("black","red","green")) ## which curve is lowest? thats the best alpha ##### library(caret) ii<-sample(seq(1,dim(cars2)[1]),60) # x.train<-as.matrix(cars2[ii,-1]) x.test<-as.matrix(cars2[-ii,-1]) y<-cars2[,1] y.train<-y[ii] y.test<-y[-ii] # ccenet<-train(x=x.train,y=y.train,method="glmnet",tuneLength=25) plot(ccenet) ccenet$bestTune ### best tuning parameters in enet ### ppenet<-predict(ccenet,x.test) plot(ppenet,y.test) # prediction and real data - pretty good! varImp(ccenet) # relative size of the coefficients ### LASSO cclars<-train(x=x.train,y=y.train,method="lars",tuneLength=25) plot(cclars) cclars$bestTune pplars<-predict(cclars,x.test) plot(ppenet,y.test) points(pplars,y.test,col=2) abline(0,1) ### Least squares cclm<-train(x=x.train,y=y.train,method="lm",tuneLength=25) pplm<-predict(cclm,x.test) plot(ppenet,y.test) points(pplars,y.test,col=2) points(pplm,y.test,col=3) abline(0,1) ### Random forest ccrf<-train(x=x.train,y=y.train,method="rf",tuneLength=25) pprf<-predict(ccrf,x.test) plot(ppenet,y.test) points(pplars,y.test,col=2) points(pplm,y.test,col=3) points(pprf,y.test,col=4) abline(0,1) plot(ccrf) ### Run 25 times (or more - it takes a while) MSEmat<-matrix(0,25,4) for (b in (1:25)) { ii<-sample(seq(1,dim(cars2)[1]),60) # x.train<-as.matrix(cars2[ii,-1]) x.test<-as.matrix(cars2[-ii,-1]) y<-cars2[,1] y.train<-y[ii] y.test<-y[-ii] # ccenet<-train(x=x.train,y=y.train,method="glmnet",tuneLength=5) ppenet<-predict(ccenet,x.test) cclars<-train(x=x.train,y=y.train,method="lars",tuneLength=5) pplars<-predict(cclars,x.test) cclm<-train(x=x.train,y=y.train,method="lm",tuneLength=5) pplm<-predict(cclm,x.test) ccrf<-train(x=x.train,y=y.train,method="rf",tuneLength=5) pprf<-predict(ccrf,x.test) MSEmat[b,1]<-sum((ppenet-y.test)^2)/length(y.test) MSEmat[b,2]<-sum((pplars-y.test)^2)/length(y.test) MSEmat[b,3]<-sum((pplm-y.test)^2)/length(y.test) MSEmat[b,4]<-sum((pprf-y.test)^2)/length(y.test) # print(b) } boxplot(MSEmat[1:5,],names=c("enet","lars","lm","rf")) ### Comparing the models - which method provides the best MSE? ##### ##### High-dimensional inference library(hdi) ii<-sample(seq(1,dim(cars2)[1]),60) # x.train<-as.matrix(cars2[ii,-1]) x.test<-as.matrix(cars2[-ii,-1]) y<-cars2[,1] y.train<-y[ii] y.test<-y[-ii] ll<-lasso.proj(x=as.matrix(cars2[ii,-1]),y=cars2[ii,1]) ll$pval ll$pval.corr confint(ll) ### which variables are significant? #### cc<-ll$clusterGroupTest() plot(cc) cc$clusters cc$pval ### can the significance of variables be attributed to a smaller set ### correlations between features is a problem - this looks into that