# Spectral clusering # # Simulated data with clear block-diagonal structure # to illustrate the method A<-matrix(rep(1,9),3,3) A<-rbind(cbind(A,0*A),cbind(0*A,A)) rno<-matrix(rnorm(36),6,6)*.25 rno<-t(rno)%*%rno A2<-A+rno # noisy blockmatrix image(A) print(eigen(A)) plot(eigen(A)\$values) # 2 blocks - 2 nonzero eigvenvalues image(A2) print(eigen(A2)) plot(eigen(A2)\$values) ### # Code from http://www.di.fc.ul.pt/~jpn/r/spectralclustering/spectralclustering.html ## s <- function(x1, x2, alpha=1) { exp(- alpha * norm(as.matrix(x1-x2), type="F")) } make.similarity <- function(my.data, similarity) { N <- nrow(my.data) S <- matrix(rep(NA,N^2), ncol=N) for(i in 1:N) { for(j in 1:N) { S[i,j] <- similarity(my.data[i,], my.data[j,]) } } S } make.affinity <- function(S, n.neighboors=2) { N <- length(S[,1]) if (n.neighboors >= N) { # fully connected A <- S } else { A <- matrix(rep(0,N^2), ncol=N) for(i in 1:N) { # for each line # only connect to those points with larger similarity best.similarities <- sort(S[i,], decreasing=TRUE)[1:n.neighboors] for (s in best.similarities) { j <- which(S[i,] == s) A[i,j] <- S[i,j] A[j,i] <- S[i,j] # to make an undirected graph, ie, the matrix becomes symmetric } } } A } ######### ### apply to data library(kernlab) data(spirals) ## plot(spirals) # S <- make.similarity(spirals, s) image(S) A <- make.affinity(S, 4) D <- diag(apply(A, 1, sum)) U <- D - A k <- 2 evL <- eigen(U, symmetric=TRUE) Z <- evL\$vectors[,(ncol(evL\$vectors)-k+1):ncol(evL\$vectors)] km <- kmeans(Z, centers=k, nstart=5) plot(spirals, col=km\$cluster) #### illustration of graph similarity par(mfrow=c(1,1)) data(spirals) plot(spirals) p<-locator() dd<-exp(-1*as.matrix(dist(spirals))/.25) image(dd) # library(network) dd<-exp(-1*as.matrix(dist(spirals))/.25) dd[dd<.5]<-0 pn<-network(dd) plot(pn) ##### S <- make.similarity(iris[,1:4], s) image(S) A <- make.affinity(S, 4) D <- diag(apply(A, 1, sum)) U <- D - A k <- 3 evL <- eigen(U, symmetric=TRUE) Z <- evL\$vectors[,(ncol(evL\$vectors)-k+1):ncol(evL\$vectors)] km <- kmeans(Z, centers=k, nstart=5) plot3d(iris[,2:4], col=km\$cluster) ##### kernlab function library(kernlab) kk1<-kmeans(spirals,2) plot(spirals,col=kk1\$cluster,main="k-means on data") # kmeans doesn't work p<-locator() kk2<-specc(spirals,centers=2) plot(spirals,col=kk2,main="k-means on eigen vectors")