# Model selection simulation
# Try different sample size (ns), noiselevel, correlation within groups of variables, size model
B<-25
ns<-50
noiselev<-0.25
p<-c(10,10,10,50) # how many features in each of 4 groups - change as you will
ps<-sum(p) # total number of features
pr<-0.1 # proportion of features in true model
sigs<-0.25 # signal strength - how large true coefficients are is controlled by this
# 3 groups of variables that are correlated
grcorr1<-.3 # smaller value here makes xs in each group more correlated
grcorr2<-1
grcorr3<-1
#
Truth<-rep(0,ps) 
#Truth[c(1,2,11,21,31)]<-1
Truth<-round(rbinom(n=ps,size=1,prob=pr))
# generates the true model, either at random or by choice
#
# create matrices to store the results
Coeflm<-matrix(0,ps,B)
CoefR<-matrix(0,ps,B)
Coefla<-matrix(0,ps,B)
Coefala<-matrix(0,ps,B)
Coefala2<-matrix(0,ps,B)
MSElm<-matrix(0,B,1)
MSER<-matrix(0,B,1)
MSEla<-matrix(0,B,1)
MSEala<-matrix(0,B,1)
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=1,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1)
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# coefficients in true model
for (b in (1:B)) {
  # create the x variabes in 3 correlated groups
  x1<-rnorm(ns)
  x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
  x1<-apply(x1,2,standardize)
  #
  x2<-rnorm(ns)
  x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
  x2<-apply(x2,2,standardize)
  #
  x3<-rnorm(ns)
  x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
  x3<-apply(x3,2,standardize)
  #
  x4<-matrix(rnorm(ns*p[4]),ns,p[4])
  x4<-apply(x4,2,standardize)
  #
  y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df<-as.data.frame(cbind(x1,x2,x3,x4,y))
  names(df)<-c(as.character(seq(1,ps)),"y")
  ####
  y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
  names(df.new)<-c(as.character(seq(1,ps)),"y")
  #### Now data set is created
  # Run OLS if possible
  if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm[,b]<-mm$coef[-1] 
    pm<-predict(mm,newdata=df.new)
    MSElm[b]<-sum((y.new-pm)^2)/length(pm) }
  #
  # lasso
  cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
  Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
  MSEla[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # adaptive lasso with ridge initialization
  gamma<-1
  ma<-lm.ridge(y~., data=df,lambda=1)
  CoefR[,b]<-coef(ma)[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
  MSER[b]<-sum((y.new-pm)^2)/length(pm)
  #
  penf<-1/(abs(coef(ma)[-1])^gamma)
  cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
  cc<-coef(cv.gg2,s="lambda.1se")[-1]
  Coefala[,b]<-cc
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
  MSEala[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # alternative adaptive lasso where you rescale the data by the ridge estimates
  xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma)
  gg3<-cv.glmnet(x=xx3,y=y,standardize=F)
  cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma)
  Coefala2[,b]<-cc
  ##### 
}


# Result summaries
# Looking at all the coefficients
par(mfrow=c(2,2))
if (ns>ps) {
  boxplot(t(Coeflm),ylim=c(-1.2,1.2)) }
if (ns<=ps) {
  boxplot(t(CoefR),ylim=c(-1.2,1.2)) }
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefla),ylim=c(-1.2,1.2),main="Lasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala),ylim=c(-2,1.2),main="ALasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala2),ylim=c(-2,1.2),main="ALasso-2")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
####

if (ns>ps) {
Bias<-rbind(apply(Coeflm,1,mean)[Truth==1]-tr.coef,
            apply(CoefR,1,mean)[Truth==1]-tr.coef,
            apply(Coefla,1,mean)[Truth==1]-tr.coef,
            apply(Coefala,1,mean)[Truth==1]-tr.coef,
            apply(Coefala2,1,mean)[Truth==1]-tr.coef)

Var<-rbind(apply(Coeflm,1,var)[Truth==1],
           apply(CoefR,1,var)[Truth==1],
           apply(Coefla,1,var)[Truth==1],
           apply(Coefala,1,var)[Truth==1],
           apply(Coefala2,1,var)[Truth==1])
# bias and variance of each of the methods
row.names(Bias)<-c("LM","R","Lasso","aLasso","aLasso2")
row.names(Var)<-c("LM","R","Lasso","aLasso","aLasso2")
print(Bias)
print(Var)
}

if (ns<=ps) {
  Bias<-rbind(apply(CoefR,1,mean)[Truth==1]-tr.coef,
              apply(Coefla,1,mean)[Truth==1]-tr.coef,
              apply(Coefala,1,mean)[Truth==1]-tr.coef,
              apply(Coefala2,1,mean)[Truth==1]-tr.coef)
  
  Var<-rbind(apply(CoefR,1,var)[Truth==1],
             apply(Coefla,1,var)[Truth==1],
             apply(Coefala,1,var)[Truth==1],
             apply(Coefala2,1,var)[Truth==1])
  # bias and variance of each of the methods
  row.names(Bias)<-c("R","Lasso","aLasso","aLasso2")
  row.names(Var)<-c("R","Lasso","aLasso","aLasso2")
  print(Bias)
  print(Var)
}

TPFP<-function(Coefs,truth) {
  tr<-truth
  cp<-Coefs
  TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1])
  FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0])
  FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0])
  return(list(FP=FP,FDR=FDR,TP=TP))}

locator()
par(mfrow=c(2,2))
LassoSel<-apply(Coefla,2,TPFP,truth=Truth)
ll<-unlist(LassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))
ALassoSel<-apply(Coefala,2,TPFP,truth=Truth)
ll<-unlist(ALassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="ALASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))

locator()
plot(cv.gg)
plot(cv.gg2)
plot(Bias[2,],type="l",ylim=c(min(Bias),max(Bias)),main="Bias")
abline(h=0)
lines(Bias[3,],col=2)
lines(Bias[4,],col=3)
legend("topleft",legend=c("Ridge","lasso","alasso"),pch=1,col=c("black","red","green"))

locator()
boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","ALasso"),main="pMSE")