library(ElemStatLearn) data(SAheart) # make the famhist variable numeric # plotting the data... pairs(SAheart) mm<-lm(ldl~., data=SAheart) mm<-lm(ldl~., data=SAheart,subset=-c(17,214,347,433)) library(car) # package for nicer diagnostic plots, amongst other things residualPlots(mm) leveragePlots(mm) qqPlot(mm,id.n=3) # perhaps better to transform some of the data? SAheart$ldl<-log(SAheart$ldl) SAheart$obesity<-log(SAheart$obesity) SAheart$age<-log(SAheart$age) mm<-lm(ldl~., data=SAheart) residualPlots(mm) leveragePlots(mm) qqPlot(mm,id.n=3) # drop some outliers? mm<-lm(ldl~., data=SAheart,subset=-c(17,214,347,433)) residualPlots(mm) leveragePlots(mm) qqPlot(mm,id.n=3) # a bunch of tools for spotting influential observations influenceIndexPlot(mm,id.n=3) influencePlot(mm,id.n=3) # variable importance in terms of Rsquared # What's the Rsquared contribution of a variable, if averaged out over # order it is entered, if it's the first or last (aka usefulness) variable # and one Rsquared decomposition (there are many) library(relaimpo) calc.relimp(mm,type=c("lmg","last","first","genizi"), rela=TRUE) # Bootstrap Measures of Relative Importance (takes some time) #boot <- boot.relimp(mm, b = 250, type = c("lmg", # "last", "first", "genizi"), rank = TRUE, # diff = TRUE, rela = TRUE) #booteval.relimp(boot) # print result #plot(booteval.relimp(boot,sort=TRUE)) ################### # all-subset selection library(leaps) mm<-regsubsets(ldl~., data=SAheart) plot(mm) # model map plot(mm,scale="adjr2") # R2 instead of bic to rank models subsets(mm,statistics="rsq") # which subsets correspond to which models? ## redo but ask for more candidate models of each size mm<-regsubsets(ldl~., data=SAheart, nbest=50, really.big=T) ms<-summary(mm) plot(mm) #BIC curve plot(apply(ms$which,1,sum),ms$bic,xlab="size",ylab="ic") # Importance measure # Here, I score variables by BIC of the models where they # enter so variables that are present in the best models # get a big score ms$which[sort.list(ms$bic)[1:5],] wt<-exp(-ms$bic) mat<-ms$which mat[mat==T]<-1 mat[mat==F]<-0 mat<-wt*mat impwt<-apply(mat,2,sum) barplot(impwt[-1],names.arg=c(names(SAheart)[-3])) ######## library(lars) ll<-lars(x=as.matrix(SAheart[,-3]),y=SAheart[,3]) plot(ll) # library(elasticnet) ee<-enet(x=as.matrix(SAheart[,-3]),y=SAheart[,3]) x11() plot(ee) ee<-enet(x=as.matrix(SAheart[,-3]),y=SAheart[,3],lambda=.5,normalize=T) x11() plot(ee) # library(glmnet) gg<-glmnet(x=as.matrix(SAheart[,-3]),y=SAheart[,3],family="gaussian",alpha=.5) plot(gg) gg<-cv.glmnet(x=as.matrix(SAheart[,-3]),y=SAheart[,3],family="gaussian",alpha=.5) plot(gg) gg<-glmnet(x=as.matrix(SAheart[,-3]),y=SAheart[,3],family="gaussian",alpha=.5,lambda=exp(-4)) gg$beta