# Spectral clusering demo # # Simulated data with clear block-diagonal structure # to illustrate the method A<-matrix(rep(1,9),3,3) A<-rbind(cbind(A,0*A),cbind(0*A,A)) rno<-matrix(rnorm(36),6,6)*.25 rno<-t(rno)%*%rno A2<-A+rno # noisy blockmatrix # image(A) # what the perfect block-diagonal looks like print(eigen(A)) # 2 non-zero plot(eigen(A)$values) # 2 blocks - 2 nonzero eigvenvalues image(A2) print(eigen(A2)) plot(eigen(A2)$values) # 2 large eigenvalues and the rest non-zero # ### # Code from http://www.di.fc.ul.pt/~jpn/r/spectralclustering/spectralclustering.html ## s <- function(x1, x2, alpha=1) { exp(- alpha * norm(as.matrix(x1-x2), type="F")) } # computing the distance between observations - gaussian kernel make.similarity <- function(my.data, similarity) { N <- nrow(my.data) S <- matrix(rep(NA,N^2), ncol=N) for(i in 1:N) { for(j in 1:N) { S[i,j] <- similarity(my.data[i,], my.data[j,]) } } S } # computing similarity matrix make.affinity <- function(S, n.neighboors=2) { N <- length(S[,1]) if (n.neighboors >= N) { # fully connected A <- S } else { A <- matrix(rep(0,N^2), ncol=N) for(i in 1:N) { # for each line # only connect to those points with larger similarity best.similarities <- sort(S[i,], decreasing=TRUE)[1:n.neighboors] for (s in best.similarities) { j <- which(S[i,] == s) A[i,j] <- S[i,j] A[j,i] <- S[i,j] # to make an undirected graph, ie, the matrix becomes symmetric } } } A } ######### # ### apply the above to data library(kernlab) data(spirals) ## plot(spirals) # "weird data" # S <- make.similarity(spirals, s) # compute similarity image(S) # some are close, some are far... # A <- make.affinity(S, 4) D <- diag(apply(A, 1, sum)) U <- D - A # Computing the graph (affinity matrix) and centering it compared to the node-degree # k <- 2 # ask for 2 components evL <- eigen(U, symmetric=TRUE) # the eigendecomposition # Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)] km <- kmeans(Z, centers=k, nstart=5) # cluster using the leading k components plot(spirals, col=km$cluster) # # #### illustration of graph similarity par(mfrow=c(1,1)) data(spirals) plot(spirals) p<-locator() dd<-exp(-1*as.matrix(dist(spirals))/.25) image(dd) # library(network) dd<-exp(-1*as.matrix(dist(spirals))/.25) dd[dd<.5]<-0 pn<-network(dd) plot(pn) # Illustration of the linked observations in the affinity matrix # it picked up essneiually two components as we saw above ##### S <- make.similarity(iris[,1:4], s) image(S) # nice and blocky - but a bit of an overlap between the two blocks # A <- make.affinity(S, 4) D <- diag(apply(A, 1, sum)) U <- D - A # k <- 3 evL <- eigen(U, symmetric=TRUE) Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)] km <- kmeans(Z, centers=k, nstart=5) plot3d(iris[,2:4], col=km$cluster) # ##### kernlab function library(kernlab) kk1<-kmeans(spirals,2) plot(spirals,col=kk1$cluster,main="k-means on data") # kmeans doesn't work p<-locator() kk2<-specc(spirals,centers=2) plot(spirals,col=kk2,main="k-means on eigen vectors")