require(stream) require(streamMOA) library(MASS) library(tictoc) #### stream<-DSD_Gaussians(k=3,d=2) dstream<-DSC_DStream(gridsize=.1) update(dstream,stream, n=1500) km<-DSC_Kmeans(k=3) recluster(km,dstream) plot(km,stream, type="both") ### # data from scratch xx<-mvrnorm(500000,mu=c(0,0),Sigma=diag(c(1,1))) xx<-rbind(xx,mvrnorm(500000,mu=c(3,3),Sigma=diag(c(1,1)))) xx<-rbind(xx,mvrnorm(500000,mu=c(0,5),Sigma=diag(c(1,1)))) xx<-xx[sample(seq(1,dim(xx)[1]),dim(xx)[1]),] # dstream<-DSC_DStream(gridsize=.1) update(dstream,xx[1:500,]) km<-DSC_Kmeans(k=3) recluster(km,dstream) plot(km,type="both") points(xx[1:500,],col=rgb(0,0,0,alpha=0.1)) update(dstream,xx[600:1000,]) recluster(km,dstream) plot(km,type="both") points(xx[600:1000,],col=rgb(0,0,0,alpha=0.1)) #### using DSD_memory # data with drift... xx<-mvrnorm(1500000,mu=c(0,0),Sigma=diag(c(1,1))) xx<-rbind(xx,mvrnorm(1500000,mu=c(3,3),Sigma=diag(c(1,1)))) xx<-rbind(xx,mvrnorm(1500000,mu=c(0,5),Sigma=diag(c(1,1)))) xx<-xx[sample(seq(1,dim(xx)[1]),dim(xx)[1]),] xx[,1]<-xx[,1]+sort(rnorm(seq(1,dim(xx)[1]))) tic() kk<-kmeans(xx,3) toc() stream<-DSD_Memory(xx) # initialize tic() dstream<-DSC_DStream(gridsize=.1,lambda=0.01) #lambda is a fading factor # to handle concept drift... update(dstream,stream,n=50000) km<-DSC_Kmeans(k=3) recluster(km,dstream) plot(km,stream, xlim=c(-6,6),ylim=c(-3,6),type="both") # zz<-0 while((stream$state$counter+50000)<=dim(xx)[1]) { zz<-zz+1 update(dstream,stream,n=50000) recluster(km,dstream) if (round(zz/10)==zz/10) { plot(km,stream, xlim=c(-6,6),ylim=c(-3,6), type="both")} } recluster(km,dstream) toc() kk<-kmeans(xx,3) points(kk$centers,pch=6,cex=5,col=3)