#Review Lecture ############## GBM<-TCGA[TCGAclassstr=="GBM",] # 172 cancer tumors library(rsvd) # random SVD for fast computation ss<-rsvd(as.matrix(GBM)) library(rgl) plot3d(ss$u[,1:3]) # No clear group structure - though in reality there are several types of braincancer library(diptest) dipval<-apply(GBM,2,dip) hist(dipval) ds<-rev(sort.list(dipval)) ss<-rsvd(as.matrix(GBM[,ds[1:2000]])) plot3d(ss$u[,1:3]) # Now we can see it - groups that separate plot(ss$d) # About 50 components summarize the data library(NMF) NMFgbm<-nmf(GBM[,ds[1:1000]]-min(GBM[,ds[1:1000]]),rank=5) # takes a while (there's a new package for fast nmf - nnls - check it out) coefmap(NMFgbm) plot3d(basis(fit(NMFgbm))[,3:5]) # Are there subtypes of GBM? Let's try consensus clustering # on data library(ConsensusClusterPlus) cc<-ConsensusClusterPlus(as.matrix(t(GBM[,ds[1:500]])),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") # on leading svds cc<-ConsensusClusterPlus(as.matrix(t(ss$u[,1:10])),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") # on NFM cc<-ConsensusClusterPlus(as.matrix(t(basis(fit(NMFgbm)))),maxK=5,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") # Try different clustering methods # 2 clusters are strongly supported (there are 3-4 "known") - perhaps a different filtering will allow you to spot this... ##### modelbased clustering library(mclust) library(clustvarsel) mm<-Mclust(ss$u[,1:10]) # takes a while print(summary(mm)) # found 4 clusters! plot(mm,c(6:10),what="classification") plot3d(ss$u[,1:3],col=mm$classification) #### ######## # clustering genes # Using regression models to construct clusters or networks library(glasso) ha<-glasso(cor(GBM[,ds[1:2000]]),approx=T,rho=.5) # takes a while library(network) plot(network(ha$wi),usearrow=F,displayisolates=F) # fast version - huge package library(huge) hh<-huge(cor(GBM[,ds[1:2000]])) # runs an approximative estimateion of the sparse precision matrix print(hh$sparsity) # plot(network(hh$path[[2]]),displayisolates=F,usearrow=F) plot(network(hh$path[[4]]),displayisolates=F,usearrow=F) ####### ccGBM<-ConsensusClusterPlus(as.matrix((GBM[,ds[1:2000]])),maxK=6,reps=100,pItem=.6,pFeature=.6,clusterAlg="km") ### You can do network modeling yourself, line by line it's approximated by a regression model zz<-2000 # number of genes library(glmnet) cv.gg<-cv.glmnet(x=as.matrix(GBM[,ds[1:2000]])[,-zz],y=GBM[,ds[1:2000]][,zz]) # run regression of one of the other plot(cv.gg) # NetworkMatrix<-matrix(0,2000,2000) for (zz in (1:2000)) { cv.gg<-cv.glmnet(x=as.matrix(GBM[,ds[1:2000]])[,-zz],y=GBM[,ds[1:2000]][,zz]) NetworkMatrix[zz,-zz]<-coef(cv.gg,s="lambda.1se")[-1] print(zz) # takes a while to run all 2000 with CV - can also run for a fixed lambda to speed this up } # Network2<-NetworkMatrix Network2[abs(Network2)<.12]<-0 plot(network(Network2), displayisolates=F,usearrows=F) # Biclustering - both tumors and genes library(ggplot2) heatmap(GBM[,ds[1:500]]) heatmap(basis(fit(NMFgbm)))