library(ElemStatLearn)
data(zip.train)
Nmat<-as.matrix(zip.train[,-1])
Numbers<-as.matrix(zip.train)
#
library(huge)
# The network modeling package using sparse gaussian graphical models
help(huge)
#
its<-sample(seq(1,dim(Nmat)[1]),1000)
# select 1000 digits at random
gg<-huge(t(Nmat[its,]))
names(gg)
plot(gg)
#
library(network)
par(mfrow=c(1,1))
plot.network(network(gg$path[[2]]),usearrows=F)
plot.network(network(gg$path[[2]]),usearrows=F,displayisolates=F,label=Numbers[its,1],label.pos=5,label.cex=.5,vertex.col=Numbers[its,1])
# plotting the graphical lasso output
# Change 4 to a different number to look at a different sparsity level
load("CATSnDOGS.RData")
cnd<-CATSnDOGS
gg<-huge(t(cnd))
plot.network(network(gg$path[[4]]),usearrows=F,displayisolates=F,label=c(rep("c",99),rep("d",99)),label.pos=5,label.cex=.5,vertex.col=c(rep(2,99),rep(4,99)))
#
load("TCGAdata.RData")
vv<-apply(TCGA,2,sd)
vv<-rev(sort.list(vv))
# Sort by top variance genes
library(irlba)
ss<-irlba(TCGA[,vv[1:5000]],nu=25,nv=25)
# leading 100 PCs
gg<-huge(t(ss$u))
plot.network(network(gg$path[[7]]),usearrows=F,displayisolates=F,label=TCGAclassstr,label.pos=5,label.col="white",label.cex=.5,vertex.col=as.numeric(as.factor(TCGAclassstr)))
#
# Clustering genes 
install.packages("JGL")
library(JGL)
library(help="JGL")
###
#Let's focus on breast cancer
BC<-TCGA[TCGAclassstr=="BC",]
vv<-apply(BC,2,sd)
vv<-rev(sort.list(vv))
#
hh<-hclust(dist(BC[,vv[1:250]]))
plot(hh)
cc<-cutree(hh,2)
# 2 subtypes of breast cancer? You can try more subtypes also
# Create a list of covariance matrices for each type 
NN<-list()
for (kk in (1:length(unique(cc)))) {
  aa<-cor(BC[cc==unique(cc)[kk],vv[1:250]])
  NN[[kk]]<-aa
}
# group-penalized glasso
gg<-JGL(NN,penalty="group",0.05,0.05,weights="sample.size",return.whole.theta = T)
# Plot for the 2 cancer classes
gA<-gg$theta[[1]]
gA[gA!=0]<-1
gB<-gg$theta[[2]]
gB[gB!=0]<-2
par(mfrow=c(1,1))
plot(network(gA+gB),edge.col=(gA+gB),displayisolates=F,vertex.col="white",usearrows=F)
### different penalties
gg<-JGL(NN,penalty="group",0.015,0.01,weights="sample.size",return.whole.theta = T,truncate=1e-3)
for (kk in (1:2)) {
  plot.network(network(gg$theta[[kk]]),displayisolates=F,usearrows=F,label=seq(1,250),vertex.col="white",label.cex=0.75) }
# Plot together
# green is edges in both cancers, red and black unique to one of the subtypes
gA<-gg$theta[[1]]
gA[gA!=0]<-1
gB<-gg$theta[[2]]
gB[gB!=0]<-2
par(mfrow=c(1,1))
plot(network(gA+gB),edge.col=(gA+gB),displayisolates=F,vertex.col="white",usearrows=F)
###