Iris example in ddmatrix

This is a ddmatrix version for the iris example. This intends to perform parallel computing on block-cyclic data.

It should be run by four processors by Rscript as

SHELL> mpiexec -np 4 Rscript iris_dmat.r

A file Rplot.pdf or a similar file will be created. The plot contains visualization of clustering results on the first two principal components.


ddmatrix Code: (iris_dmat.r)

# File name: iris_dmat.r
# Run: mpiexec -np 4 Rscript iris_dmat.r

rm(list = ls())                                       # Clean environment
library(pbdDMAT, quiet = TRUE)                        # Load library
init.grid()
if(comm.size() != 4)
  comm.stop("4 processors are required.")

### Load data
X <- as.matrix(iris[, -5])                            # Dimension 150 by 4
X.cid <- as.numeric(iris[, 5])                        # True id

### Convert to ddmatrix
X.dmat <- as.ddmatrix(X)

### Standardized
X.std <- scale(X.dmat)
mu <- as.matrix(colMeans(X.std))
cov <- as.matrix(cov(X.std))
comm.print(mu)
comm.print(cov)

### SVD
X.svd <- svd(X.std)

### Project on column space of singular vectors
A <- X.svd$u %*% as.ddmatrix(diag(X.svd$d))
B <- X.std %*% X.svd$v                                # A ~ B
X.prj <- as.matrix(A[, 1:2])                          # Only useful for plot

### Clustering
library(pmclust, quiet = TRUE)
comm.set.seed(123, diff = TRUE)

X.dmat <- X.std
PARAM.org <- set.global.dmat(K = 3)                   # Preset storage
.pmclustEnv$CONTROL$debug <- 0                        # Disable debug messages
PARAM.org <- initial.center.dmat(PARAM.org)
PARAM.kms <- kmeans.step.dmat(PARAM.org)              # K-means
X.kms.cid <- as.vector(.pmclustEnv$CLASS.dmat)

### Validation
X.kms.adjR <- EMCluster::RRand(X.cid, X.kms.cid)$adjRand
comm.print(X.kms.adjR)

### Swap classification id
tmp <- X.kms.cid
X.kms.cid[tmp == 1] <- 3
X.kms.cid[tmp == 2] <- 1
X.kms.cid[tmp == 3] <- 2

### Display on first 2 components
if(comm.rank() == 0){
  par(mfrow = c(2, 2))
  plot(X.prj, col = X.cid + 1, pch = X.cid,
       main = "iris (true)", xlab = "PC1", ylab = "PC2")
  plot(X.prj, col = X.kms.cid + 1, pch = X.kms.cid,
       main = paste("iris (kmeans)", sprintf("%.4f", X.kms.adjR)),
       xlab = "PC1", ylab = "PC2")
}

### Finish
finalize()