# Play with sample size and model structure # I.e, change ns, the model coefficients, the noise error # Run several times to see what happens across simulations B<-25 Coeflm<-matrix(0,80,B) CoefR<-matrix(0,80,B) Coefla<-matrix(0,80,B) Coefala<-matrix(0,80,B) Coefala2<-matrix(0,80,B) for (b in (1:B)) { ns<-50 x1<-rnorm(ns) x1<-matrix(rep(x1,10),ns,10,byrow=F)+matrix(rnorm(ns*10,sd=2),ns,10) x1<-apply(x1,2,standardize) # x2<-rnorm(ns) x2<-matrix(rep(x2,10),ns,10,byrow=F)+matrix(rnorm(ns*10,sd=4),ns,10) x2<-apply(x2,2,standardize) # x3<-rnorm(ns) x3<-matrix(rep(x3,10),ns,10,byrow=F)+matrix(rnorm(ns*10,sd=1),ns,10) x3<-apply(x3,2,standardize) # x4<-matrix(rnorm(ns*50),ns,50) x4<-apply(x4,2,standardize) # y<-2-1*x1[,1]+1*x1[,2]+1*x2[,1]+1*x3[,1]+x4[,1]+rnorm(ns)*.25 df<-as.data.frame(cbind(x1,x2,x3,x4,y)) names(df)<-c(as.character(seq(1,80)),"y") #### if (ns>80) { mm<-lm(y~., data=df) Coeflm[,b]<-mm$coef[-1] } #library(glmnet) cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y) Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1] gamma<-1 ma<-lm.ridge(y~., data=df,lambda=1) CoefR[,b]<-coef(ma)[-1] penf<-1/(abs(coef(ma)[-1])^gamma) cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf) #gg2<-glmnet(x=cbind(x1,x2,x3,x4),y=y,lambda=cv.gg$lambda.min,penalty.factor=penf) cc<-coef(cv.gg2,s="lambda.1se")[-1] Coefala[,b]<-cc # alternative adaptive lasso xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma) gg3<-cv.glmnet(x=xx3,y=y,standardize=F) cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma) Coefala2[,b]<-cc } par(mfrow=c(2,2)) if (ns>80) { boxplot(t(Coeflm),ylim=c(-1.2,1.2)) } if (ns<=80) { boxplot(t(CoefR),ylim=c(-1.2,1.2)) } abline(h=0) points(c(1,2,11,21,31),c(-1,1,1,1,1),col=2,pch=3) boxplot(t(Coefla),ylim=c(-1.2,1.2),main="Lasso") abline(h=0) points(c(1,2,11,21,31),c(-1,1,1,1,1),col=2,pch=3) boxplot(t(Coefala),ylim=c(-2,1.2),main="ALasso") abline(h=0) points(c(1,2,11,21,31),c(-1,1,1,1,1),col=2,pch=3) boxplot(t(Coefala2),ylim=c(-2,1.2),main="ALasso-2") abline(h=0) points(c(1,2,11,21,31),c(-1,1,1,1,1),col=2,pch=3) Bias<-rbind(apply(Coeflm,1,mean)[c(1,2,11,21,31)]-c(-1,1,1,1,1), apply(CoefR,1,mean)[c(1,2,11,21,31)]-c(-1,1,1,1,1), apply(Coefla,1,mean)[c(1,2,11,21,31)]-c(-1,1,1,1,1), apply(Coefala,1,mean)[c(1,2,11,21,31)]-c(-1,1,1,1,1), apply(Coefala2,1,mean)[c(1,2,11,21,31)]-c(-1,1,1,1,1)) Var<-rbind(apply(Coeflm,1,var)[c(1,2,11,21,31)], apply(CoefR,1,var)[c(1,2,11,21,31)], apply(Coefla,1,var)[c(1,2,11,21,31)], apply(Coefala,1,var)[c(1,2,11,21,31)], apply(Coefala2,1,var)[c(1,2,11,21,31)]) print(Bias) print(Var) tr<-rep(0,80) tr[c(1,2,11,21,31)]<-1 TPFP<-function(Coefs,truth) { cp<-Coefs TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1]) FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0]) FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0]) return(list(FP=FP,FDR=FDR,TP=TP))} locator() par(mfrow=c(2,2)) LassoSel<-apply(Coefla,2,TPFP,truth=tr) ll<-unlist(LassoSel) boxplot(cbind(ll[seq(1,(B*3),by=3)],ll[seq(3,(B*3),by=3)]),names=c("FPR","TPR")) boxplot(ll[seq(2,(B*3),by=3)],names=c("FDR"),ylim=c(0,1)) ALassoSel<-apply(Coefala,2,TPFP,truth=tr) ll<-unlist(ALassoSel) boxplot(cbind(ll[seq(1,(B*3),by=3)],ll[seq(3,(B*3),by=3)]),names=c("FPR","TPR")) boxplot(ll[seq(2,(B*3),by=3)],names=c("FDR"),ylim=c(0,1)) locator() plot(cv.gg) plot(cv.gg2) plot(Bias[2,],type="l",ylim=c(-.5,.5),main="Bias") abline(h=0) lines(Bias[3,],col=2) lines(Bias[4,],col=3)