library(ElemStatLearn) data(zip.train) Nmat<-as.matrix(zip.train[,-1]) Numbers<-as.matrix(zip.train) # library(huge) # The network modeling package using sparse gaussian graphical models help(huge) # its<-sample(seq(1,dim(Nmat)[1]),1000) # select 1000 digits at random gg<-huge(t(Nmat[its,])) names(gg) plot(gg) # library(network) par(mfrow=c(1,1)) plot.network(network(gg$path[[2]]),usearrows=F) plot.network(network(gg$path[[2]]),usearrows=F,displayisolates=F,label=Numbers[its,1],label.pos=5,label.cex=.5,vertex.col=Numbers[its,1]) # plotting the graphical lasso output # Change 4 to a different number to look at a different sparsity level load("CATSnDOGS.RData") cnd<-CATSnDOGS gg<-huge(t(cnd)) plot.network(network(gg$path[[4]]),usearrows=F,displayisolates=F,label=c(rep("c",99),rep("d",99)),label.pos=5,label.cex=.5,vertex.col=c(rep(2,99),rep(4,99))) # load("TCGAdata.RData") vv<-apply(TCGA,2,sd) vv<-rev(sort.list(vv)) # Sort by top variance genes library(irlba) ss<-irlba(TCGA[,vv[1:5000]],nu=25,nv=25) # leading 100 PCs gg<-huge(t(ss$u)) plot.network(network(gg$path[[7]]),usearrows=F,displayisolates=F,label=TCGAclassstr,label.pos=5,label.col="white",label.cex=.5,vertex.col=as.numeric(as.factor(TCGAclassstr))) # # Clustering genes install.packages("JGL") library(JGL) library(help="JGL") ### #Let's focus on breast cancer BC<-TCGA[TCGAclassstr=="BC",] vv<-apply(BC,2,sd) vv<-rev(sort.list(vv)) # hh<-hclust(dist(BC[,vv[1:250]])) plot(hh) cc<-cutree(hh,2) # 2 subtypes of breast cancer? You can try more subtypes also # Create a list of covariance matrices for each type NN<-list() for (kk in (1:length(unique(cc)))) { aa<-cor(BC[cc==unique(cc)[kk],vv[1:250]]) NN[[kk]]<-aa } # group-penalized glasso gg<-JGL(NN,penalty="group",0.05,0.05,weights="sample.size",return.whole.theta = T) # Plot for the 2 cancer classes gA<-gg$theta[[1]] gA[gA!=0]<-1 gB<-gg$theta[[2]] gB[gB!=0]<-2 par(mfrow=c(1,1)) plot(network(gA+gB),edge.col=(gA+gB),displayisolates=F,vertex.col="white",usearrows=F) ### different penalties gg<-JGL(NN,penalty="group",0.015,0.01,weights="sample.size",return.whole.theta = T,truncate=1e-3) for (kk in (1:2)) { plot.network(network(gg$theta[[kk]]),displayisolates=F,usearrows=F,label=seq(1,250),vertex.col="white",label.cex=0.75) } # Plot together # green is edges in both cancers, red and black unique to one of the subtypes gA<-gg$theta[[1]] gA[gA!=0]<-1 gB<-gg$theta[[2]] gB[gB!=0]<-2 par(mfrow=c(1,1)) plot(network(gA+gB),edge.col=(gA+gB),displayisolates=F,vertex.col="white",usearrows=F) ###