#Review Lecture ############## GBM<-TCGA[TCGAclassstr=="GBM",] # 172 cancer tumors library(rsvd) # random SVD for fast computation ss<-rsvd(as.matrix(GBM)) plot3d(ss$u[,1:3]) # No clear group structure library(diptest) dipval<-apply(GBM,2,dip) hist(dipval) ds<-rev(sort.list(dipval)) ss<-rsvd(as.matrix(GBM[,ds[1:2000]])) plot3d(ss$u[,1:3]) # Now we can see it plot(ss$d) # About 50 components summarize the data NMFgbm<-nmf(GBM[,ds[1:1000]]-min(GBM[,ds[1:1000]]),rank=5) coefmap(NMFgbm) plot3d(basis(fit(NMFgbm))[,3:5]) # Are there subtypes of GBM? # on data cc<-ConsensusClusterPlus(as.matrix(t(GBM[,ds[1:500]])),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") # on leading svds cc<-ConsensusClusterPlus(as.matrix(t(ss$u[,1:10])),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") # on NFM cc<-ConsensusClusterPlus(as.matrix(t(basis(fit(NMFgbm)))),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") ##### modelbased clustering library(mclust) library(clustvarsel) mm<-Mclust(ss$u[,1:10]) print(summary(mm)) plot(mm,c(6:10),what="classification") plot3d(ss$u[,1:3],col=mm$classification) #### ######## # clustering genes # Using regression models to construct clusters or networks ha<-glasso(cor(GBM[,ds[1:2000]]),approx=T,rho=.5) pp<-apply(abs(ha$wi),1,sum) qgraph(ha$wi[pp>0,pp>0],layout="spring",directed=F,labels=F) ccGBM<-ConsensusClusterPlus(as.matrix((GBM[,ds[1:2000]])),maxK=6,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") ### Do it yourself, line by line regression model zz<-2000 cv.gg<-cv.glmnet(x=as.matrix(GBM[,ds[1:2000]])[,-zz],y=GBM[,ds[1:2000]][,zz]) plot(cv.gg) NetworkMatrix<-matrix(0,2000,2000) for (zz in (1:2000)) { cv.gg<-cv.glmnet(x=as.matrix(GBM[,ds[1:2000]])[,-zz],y=GBM[,ds[1:2000]][,zz]) NetworkMatrix[zz,-zz]<-coef(cv.gg,s="lambda.1se")[-1] print(zz) } Network2<-NetworkMatrix Network2[abs(Network2)<.12]<-0 pp<-apply(abs(Network2),1,sum) qgraph(Network2[pp>0,pp>0],layout="spring",directed=T,labels=F) plot(network(NetworkMatrix[1:250,1:250]),usearrows=F,displayisolates=F) # Biclustering heatmap(GBM[,ds[1:500]]) heatmap(basis(fit(NMFgbm))) # Supports there are many groups of but only some that divide the cancers into # 2-3 subgroups image(matrix(CATSnDOGS[104,4096:1],64,64,byrow=T),col=gray.colors(256,0,1))