frac<-.67
# I will use 2/3 for training - try another value if you like.
K<-100
# The number of splits to perform - this takes a while. You can do
# fewer reps if you want.
#
PEK<-matrix(0,K,2)
avPEK<-PEK
modselbic<-rep(0,dim(polltrain2)[2])
modselaic<-modselbic
modsizebic<-rep(0,K)
modsizeaic<-rep(0,K)
# The above commands just create empty vectors and matrices so that I can
# store results from the K random splits of data. 
# The for lopp below repeats the random splitting and model selection
# K times.
for (kk in (1:K)) {
ii2<-sample(seq(1,dim(polltrain2)[1]),round(dim(polltrain2)[1]*frac))
yy<-polltrain2[ii2,16]
xx<-as.matrix(polltrain2[ii2,-16])
yyt<-polltrain2[-ii2,16]
xxt<-as.matrix(polltrain2[-ii2,-16])
#
rleaps<-regsubsets(xx,yy,int=T,nbest=50,nvmax=dim(polltrain2)[2],really.big=T,method=c("ex"))
cleaps<-summary(rleaps,matrix=T)
tt<-apply(cleaps$which,1,sum)
cleaps2<-cleaps
cleaps2$which<-cleaps$which[tt>1,]
BIC<-length(yy)*log(cleaps$rss/length(yy))+tt*log(length(yy))
AIC<-length(yy)*log(cleaps$rss/length(yy))+tt*2
# various model selection criteria
#
aicmod<-cleaps$which[AIC==min(AIC),]
bicmod<-cleaps$which[BIC==min(BIC),]
mmbic<-lm(yy~xx[,bicmod[2:length(bicmod)]==T])
PEbic<-sum((yyt-cbind(rep(1,dim(xxt)[1]),xxt[,bicmod[2:length(bicmod)]==T])%*%mmbic$coef)^2)/length(yyt)
PEK[kk,1]<-PEbic 
mmaic<-lm(yy~xx[,aicmod[2:length(aicmod)]==T])
PEaic<-sum((yyt-cbind(rep(1,dim(xxt)[1]),xxt[,aicmod[2:length(aicmod)]==T])%*%mmaic$coef)^2)/length(yyt)
PEK[kk,2]<-PEaic 
modselbic[bicmod==T]<-modselbic[bicmod==T]+1
modsizebic[kk]<-sum(bicmod)-1 
modselaic[aicmod==T]<-modselaic[aicmod==T]+1
modsizeaic[kk]<-sum(aicmod)-1 
}
# After the loop ends, the PEK matrix contains the prediction errors for 
# each split for the AIC (2nd column) and the BIC (1st column).
# You can do boxplots or look at the difference between prediction error
# for the two criteria.
#
# modsizeaic and modsizebic records the size of the models picked for the
# different criteria. You can look at boxplots of these as well.
# modselaic and modselbic record the number of times a variable was 
# picked to be in the model.
modseltab<-cbind(names(polltrain2)[-16],modselbic[-1],modselaic[-1])
# modelseltab tabulates which variables are selected and how often
# modsize will tell you the size of the models selected
print(modseltab)
print(modsize)