# Model selection simulation # Try different sample size (ns), noiselevel, correlation within groups of variables, size model B<-25 ns<-50 noiselev<-0.25 p<-c(10,10,10,50) # how many features in each of 4 groups - change as you will ps<-sum(p) # total number of features pr<-0.1 # proportion of features in true model sigs<-0.25 # signal strength - how large true coefficients are is controlled by this # 3 groups of variables that are correlated grcorr1<-.3 # smaller value here makes xs in each group more correlated grcorr2<-1 grcorr3<-1 # Truth<-rep(0,ps) #Truth[c(1,2,11,21,31)]<-1 Truth<-round(rbinom(n=ps,size=1,prob=pr)) # generates the true model, either at random or by choice # # create matrices to store the results Coeflm<-matrix(0,ps,B) CoefR<-matrix(0,ps,B) Coefla<-matrix(0,ps,B) Coefala<-matrix(0,ps,B) Coefala2<-matrix(0,ps,B) MSElm<-matrix(0,B,1) MSER<-matrix(0,B,1) MSEla<-matrix(0,B,1) MSEala<-matrix(0,B,1) # nt<-sum(Truth) tr.coef<-rnorm(nt,mean=1,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1) all.coef<-rep(0,ps) all.coef[Truth==1]<-tr.coef # coefficients in true model for (b in (1:B)) { # create the x variabes in 3 correlated groups x1<-rnorm(ns) x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1]) x1<-apply(x1,2,standardize) # x2<-rnorm(ns) x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2]) x2<-apply(x2,2,standardize) # x3<-rnorm(ns) x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3]) x3<-apply(x3,2,standardize) # x4<-matrix(rnorm(ns*p[4]),ns,p[4]) x4<-apply(x4,2,standardize) # y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev df<-as.data.frame(cbind(x1,x2,x3,x4,y)) names(df)<-c(as.character(seq(1,ps)),"y") #### y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new)) names(df.new)<-c(as.character(seq(1,ps)),"y") #### Now data set is created # Run OLS if possible if (ns>ps) { mm<-lm(y~., data=df) Coeflm[,b]<-mm$coef[-1] pm<-predict(mm,newdata=df.new) MSElm[b]<-sum((y.new-pm)^2)/length(pm) } # # lasso cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y) Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1] pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se") MSEla[b]<-sum((y.new-pm)^2)/length(pm) # # adaptive lasso with ridge initialization gamma<-1 ma<-lm.ridge(y~., data=df,lambda=1) CoefR[,b]<-coef(ma)[-1] pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma) MSER[b]<-sum((y.new-pm)^2)/length(pm) # penf<-1/(abs(coef(ma)[-1])^gamma) cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf) cc<-coef(cv.gg2,s="lambda.1se")[-1] Coefala[,b]<-cc pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se") MSEala[b]<-sum((y.new-pm)^2)/length(pm) # # alternative adaptive lasso where you rescale the data by the ridge estimates xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma) gg3<-cv.glmnet(x=xx3,y=y,standardize=F) cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma) Coefala2[,b]<-cc ##### } # Result summaries # Looking at all the coefficients par(mfrow=c(2,2)) if (ns>ps) { boxplot(t(Coeflm),ylim=c(-1.2,1.2)) } if (ns<=ps) { boxplot(t(CoefR),ylim=c(-1.2,1.2)) } abline(h=0) points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3) boxplot(t(Coefla),ylim=c(-1.2,1.2),main="Lasso") abline(h=0) points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3) boxplot(t(Coefala),ylim=c(-2,1.2),main="ALasso") abline(h=0) points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3) boxplot(t(Coefala2),ylim=c(-2,1.2),main="ALasso-2") abline(h=0) points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3) #### if (ns>ps) { Bias<-rbind(apply(Coeflm,1,mean)[Truth==1]-tr.coef, apply(CoefR,1,mean)[Truth==1]-tr.coef, apply(Coefla,1,mean)[Truth==1]-tr.coef, apply(Coefala,1,mean)[Truth==1]-tr.coef, apply(Coefala2,1,mean)[Truth==1]-tr.coef) Var<-rbind(apply(Coeflm,1,var)[Truth==1], apply(CoefR,1,var)[Truth==1], apply(Coefla,1,var)[Truth==1], apply(Coefala,1,var)[Truth==1], apply(Coefala2,1,var)[Truth==1]) # bias and variance of each of the methods row.names(Bias)<-c("LM","R","Lasso","aLasso","aLasso2") row.names(Var)<-c("LM","R","Lasso","aLasso","aLasso2") print(Bias) print(Var) } if (ns<=ps) { Bias<-rbind(apply(CoefR,1,mean)[Truth==1]-tr.coef, apply(Coefla,1,mean)[Truth==1]-tr.coef, apply(Coefala,1,mean)[Truth==1]-tr.coef, apply(Coefala2,1,mean)[Truth==1]-tr.coef) Var<-rbind(apply(CoefR,1,var)[Truth==1], apply(Coefla,1,var)[Truth==1], apply(Coefala,1,var)[Truth==1], apply(Coefala2,1,var)[Truth==1]) # bias and variance of each of the methods row.names(Bias)<-c("R","Lasso","aLasso","aLasso2") row.names(Var)<-c("R","Lasso","aLasso","aLasso2") print(Bias) print(Var) } TPFP<-function(Coefs,truth) { tr<-truth cp<-Coefs TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1]) FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0]) FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0]) return(list(FP=FP,FDR=FDR,TP=TP))} locator() par(mfrow=c(2,2)) LassoSel<-apply(Coefla,2,TPFP,truth=Truth) ll<-unlist(LassoSel) boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO") boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1)) ALassoSel<-apply(Coefala,2,TPFP,truth=Truth) ll<-unlist(ALassoSel) boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="ALASSO") boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1)) locator() plot(cv.gg) plot(cv.gg2) plot(Bias[2,],type="l",ylim=c(min(Bias),max(Bias)),main="Bias") abline(h=0) lines(Bias[3,],col=2) lines(Bias[4,],col=3) legend("topleft",legend=c("Ridge","lasso","alasso"),pch=1,col=c("black","red","green")) locator() boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","ALasso"),main="pMSE")