require(mva)
require(cclust)
clustindex <- function( clres, x, index="all" )
  {
###needed measures 
    ##withins <- withinss1(x, centers, cluster)
    ## varwithins <- varwithinss(x, clres$centers, clres$cluster)
    zgss <- gss(x,clres$size, clres$withins)
    ## zvargss <- vargss(x, clres$size,varwithins)
    zttw <- ttww(x, clres$size, clres$cluster)
    ## minmaxd <-maxmindist(clres$size,distdata)
    ## distdata <- countdist(x)
    
    
###indexes calculations
    index <- pmatch(index, c("calinski", "cindex", "db", "hartigan",
                          "ratkovsky", "scott", "marriot", "ball",
                          "trcovw", "tracew", "friedman",
                          "rubin","ssi","likelihood","all"))

    if (is.na(index)) 
      stop("invalid clustering index")
    if (index == -1) 
      stop("ambiguous index")

    vecallindex <- numeric(14)

    if ((index==1) || (index==15))
      vecallindex[1] <- calinski(zgss,clres$size)
    if ((index==2) || (index==15))
      {
        if (length(unique(x))==2)
          {
            distdata <- countdist(x)
            minmaxd <- maxmindist(clres$size,distdata)
            vecallindex[2] <- cindex(clres$withins, minmaxd, clres$size)
          }
        else  vecallindex[2] <- NA
      }
    if ((index==3) || (index==15))
      vecallindex[3] <- db(clres$withins, clres$centers, clres$cluster)
    if ((index==4) || (index==15))
      vecallindex[4] <- hartigan(zgss)
    if ((index==5) || (index==15))
      {
        varwithins <- varwithinss(x, clres$centers, clres$cluster)
        zvargss <- vargss(x, clres$size,varwithins)
        vecallindex[5] <-ratkowsky(zvargss, clres$size)
      }
    if ((index==6) || (index==15))
       vecallindex[6] <- scott(zttw, clres$size)
    if ((index==7) || (index==15))
       vecallindex[7] <- marriot(zttw,clres$size)
    if ((index==8) || (index==15))
       vecallindex[8] <- ball(clres$withins, clres$size)
    if ((index==9) || (index==15))
       vecallindex[9] <- tracecovw(zttw)
    if ((index==10) || (index==15))
       vecallindex[10] <- tracew(zttw)
    if ((index==11) || (index==15))
       vecallindex[11] <- friedman(zttw)
    if ((index==12) || (index==15))
       vecallindex[12] <- rubin(zttw)
    if ((index==13) || (index==15))
       vecallindex[13] <- ssi(clres$centers, clres$size)$ssiw 
    if ((index==14) || (index==15))
       vecallindex[14] <- likelihood(x,clres$centers, clres$cluster)
    
    names(vecallindex) <- c("calinski", "cindex", "db", "hartigan",
                              "ratkovsky", "scott", "marriot", "ball",
                              "trcovw", "tracew", "friedman",
                              "rubin","ssi","likelihood")

    if (index < 15)
      vecallindex <- vecallindex[index]

    return(vecallindex)
  }









cclust <- function (x, centers, iter.max = 100, verbose = FALSE, dist = "euclidean", 
    method = "kmeans", rate.method = "polynomial", rate.par = NULL) 
{
    xrows <- dim(x)[1]
    xcols <- dim(x)[2]
    xold <- x
    perm <- sample(xrows)
    x <- x[perm, ]
    # initial values are given
    if (is.matrix(centers)) 
        ncenters <- dim(centers)[1]
    else {
        # take centers random vectors as initial values
        ncenters <- centers
        centers <- x[rank(runif(xrows))[1:ncenters], ]
    }
    dist <- pmatch(dist, c("euclidean", "manhattan"))
    if (is.na(dist)) 
        stop("invalid distance")
    if (dist == -1) 
      stop("ambiguous distance")
    method <- pmatch(method, c("kmeans", "hardcl", "neuralgas"))
    if (is.na(method)) 
        stop("invalid clustering method")
    if (method == -1) 
        stop("ambiguous clustering method")
    rate.method <- pmatch(rate.method, c("polynomial", "exponentially.decaying"))
    if (is.na(rate.method)) 
        stop("invalid learning rate method")
    if (rate.method == -1) 
        stop("ambiguous learning rate method")
    if (method == 2) {
        if (rate.method == 1 && missing(rate.par)) {
            rate.par <- c(1e-00, 0e-00)
        }
        else if (rate.method == 2 && missing(rate.par)) {
            rate.par <- c(0.1, 1e-04)
        }
    }
    if (method == 3 && missing(rate.par)) {
        rate.par <- c(0.5, 0.005, 10, 0.01)
    }
    initcenters <- centers
   # dist <- matrix(0, xrows, ncenters)
    # necessary for empty clusters
    pos <- as.factor(1:ncenters)
    rownames(centers) <- pos
    iter <- integer(1)
    changes <- integer(iter.max)
    cluster <- integer(xrows)
    clustersize <- integer(ncenters)
    if (method == 1) {
        retval <- .C("kmeans", xrows = as.integer(xrows),
                     xcols = as.integer(xcols), 
            x = as.double(x), ncenters = as.integer(ncenters), 
            centers = as.double(centers), cluster = as.integer(cluster), 
            clustnew = integer(xrows), xk=double(xrows),
            iter.max = as.integer(iter.max), iter = as.integer(iter), 
            changes = as.integer(changes), clustersize = as.integer(clustersize), 
            verbose = as.integer(verbose), dist = as.integer(dist-1))
    }
    else if (method == 2) {
        retval <- .C("hardcl", xrows = as.integer(xrows), xcols = as.integer(xcols), 
            x = as.double(x), ncenters = as.integer(ncenters), 
            centers = as.double(centers), cluster = as.integer(cluster), 

            t = integer(ncenters),
            iter.max = as.integer(iter.max), iter = as.integer(iter), 
            clustersize = as.integer(clustersize), verbose = as.integer(verbose), 
            dist = as.integer(dist-1), methrate = as.integer(rate.method-1), 
            par = as.double(rate.par))
    }
    else if (method == 3) {
        retval <- .C("neuralgas", xrows = as.integer(xrows), 
            xcols = as.integer(xcols), x = as.double(x), ncenters = as.integer(ncenters), 
            centers = as.double(centers), cluster = as.integer(cluster), 
            iter.max = as.integer(iter.max), iter = as.integer(iter), 
            clustersize = as.integer(clustersize),
                     verbose = as.integer(verbose), 
            dist = as.integer(dist-1), par = as.double(rate.par),
            dista=double(ncenters), ordd=integer(ncenters))
    }
    centers <- matrix(retval$centers, ncol = xcols, dimnames = dimnames(initcenters))
    cluster <- retval$cluster + 1
    cluster <- cluster[order(perm)]
    if (method == 1) {
        methrate <- NA
        par <- NA
    }
  if (method == 3) {
        methrate <- NA
    }
    within <- withinss(list(centers = centers, cluster = cluster), xold)
    
    retval <- list(centers = centers, initcenters = initcenters, 
        ncenters = ncenters, cluster = cluster, size = retval$clustersize, 
        iter = retval$iter - 1, changes = retval$changes, dist = dist, 
        method = method, rate.method = rate.method, rate.par = rate.par, 
        call = match.call(), withinss = within)
    class(retval) <- c("cclust")
    return(retval)
  }



print.cclust <- function (clobj)
  {
  
    if (!is.null(clobj$iter))
      cat("\n                            Clustering on Training Set\n\n\n")
    else
      cat("\n                              Clustering on Test Set\n\n\n")
    
    cat("Number of Clusters: ", clobj$ncenters, "\n")
    cat("Sizes  of Clusters: ", clobj$size, "\n\n")
    if (clobj$method!=1)
      cat("Learning Parameters:",clobj$rate.par,"\n\n")
    
  if (clobj$method==1)
    {
  if (!is.null(clobj$iter))
      {
        if (clobj$iter < length(clobj$changes))
          cat("Algorithm converged after", clobj$iter, "iterations.\n")
        else
          cat("Algorithm did not converge after", clobj$iter, "iterations.\n")
        cat("Changes:", clobj$changes[1:clobj$iter], "\n\n")
      }
    }
 
  }

plot.cclust <- function(clobj, x, centers=TRUE, initcenters=TRUE,
                         color=rainbow(clobj$ncenters),...){
  
  x <- as.matrix(x)
  
  cl <- predict(clobj, x)

  
  if(dim(x)[2]>2){
    pairs(x, col=color[cl$cluster], ...)
    }
  else{
    plot(x, col=color[cl$cluster], ...)
    if(centers)
      points(cl$centers, pch=4,col=color,cex=2)
    if(initcenters)
      points(clobj$initcenters, pch=3,col=color,cex=2)
  }
}


predict.cclust <- function(clobj, x){

  xrows<-dim(x)[1]
  xcols<-dim(x)[2]
  ncenters <- clobj$ncenters
  cluster <- integer(xrows)
  clustersize <- integer(ncenters)
  

  if(dim(clobj$centers)[2] != xcols){
    stop("Number of variables in cluster object and x are not the same!")
  }

  
  retval <- .C("assign",
               xrows = as.integer(xrows),
               xcols = as.integer(xcols),
               x = as.double(x),
               ncenters = as.integer(ncenters),
               centers = as.double(clobj$centers),
               cluster = as.integer(cluster),
               clustersize = as.integer(clustersize),
               dist = as.integer(clobj$dist-1))

  
     

  clobj$initcenters <- NULL
  clobj$iter <- NULL
  clobj$changes <- NULL
  clobj$cluster <- retval$cluster+1
  clobj$size <- retval$clustersize

  return(clobj)
}



countdist <- function(x)
  {
    nr <- nrow(x)
    nc <- ncol(x)
    d <- numeric(nc+1)

    for (i in 1:(nr-2))
      {
        y <- sweep(x[(i+1):nr,],2,x[i,],"-")
        z <- apply(abs(y), 1, sum)
        for (j in 0:nc)
          d[j+1] <- d[j+1] + sum(z==j)
      }

    ## last pair
    z <- sum(abs(x[nr,]-x[nr-1,]))
    d[z+1] <- d[z+1] + 1
    
    names(d) <- 0:nc
    return(d)
  }

countdist <- function(x)
  {
    nr <- nrow(x)
    nc <- ncol(x)
    d <- numeric(nc+1)

    for (i in 1:(nr-2))
      {
        y <- matrix(0,nr-i,nc)
        for (j in 1:nc)
          y[,j] <- x[(i+1):nr,j] - x[i,j]
        y <- abs(y)
        z <- y[,1]
        for (j in 2:nc)
          z <- z + y[,j]
        for (j in 0:nc)
          d[j+1] <- d[j+1] + sum(z==j)
      }

    ## last pair
    z <- sum(abs(x[nr,]-x[nr-1,]))
    d[z+1] <- d[z+1] + 1
    
    names(d) <- 0:nc
    return(d)
  }

      
        
gss <- function(x, clsize, withins)
  {
    n <- sum(clsize)
    k <- length(clsize)
    allmean <- apply(x,2,mean)
    dmean <- sweep(x,2,allmean,"-")
    allmeandist <- sum(dmean^2)
    wgss <- sum(withins)
    bgss <- allmeandist - wgss
    
    zgss <- list(wgss=wgss, bgss=bgss)
    return(zgss)
  }





calinski <- function(zgss, clsize)
  {
    n <- sum(clsize)
    k <- length(clsize)
    vrc <- (zgss$bgss/(k-1))/(zgss$wgss/(n-k))
    return(vrc=vrc)
  }

cindex <- function (withins, minmaxd, clsize)
  {
    dw <- sum(withins*clsize)    
    ##c-index
    cindex <- (dw -minmaxd$mindw)/(minmaxd$maxdw - minmaxd$mindw)
    return(cindex)
  }



db <- function(withins, centers, cluster)
  {
    mse <- withins/table(cluster)
    r <- outer(mse, mse, "+") / as.matrix(dist(centers, diag=TRUE))
    diag(r) <- 0
    db <- mean(apply(r,1,max))
    return(db)
  }


hartigan <- function(zgss)
  {
    hart <- log(zgss$bgss/zgss$wgss)
    return(hart)
  }


ratkowsky <- function(zvargss, clsize)
  {
    k <- length(clsize)
    ratio<-mean(sqrt(zvargss$varbgss/zvargss$vartss))
    rat <- ratio/sqrt(k)
    return(rat)
  }


scott <- function(zttw, clsize)
  {
    n <- sum(clsize)
    dettt<-prod(eigen(zttw$tt)$values)
    detw<-prod(eigen(zttw$w)$values)
    scott <- n * log(dettt/detw)
    return(scott)
  }

marriot <- function(zttw, clsize)
  {
    k <- length(clsize)
    detw<-prod(eigen(zttw$w)$values)
    mar <- (k**2) * detw
    return(mar)
  }


ball <- function(withins, clsize)
  {
    ball <- sum(withins)/length(clsize)
  }


tracecovw <- function(zttw)
  {
    trcovw <- sum(diag(cov(zttw$w)))
    return(trcovw)
  }


tracew <- function(zttw)
  {
    tracew <- sum(diag(zttw$w))
    return(tracew)
  }


friedman <- function(zttw)
  {
    b <- zttw$tt-zttw$w
    fried <- sum(diag(solve(zttw$w)%*%b))
    return(fried)
  }


rubin <- function(zttw)
  {
    dettt<-prod(eigen(zttw$tt)$values)
    detw<-prod(eigen(zttw$w)$values)
    friedm <- dettt/detw
    return(friedm)
  }


ssi <- function (centers, clsize)
  {
    ncl <- dim(centers)[1]
    nvar <- dim(centers)[2]
    n <- sum(clsize)

    cmax <- apply(centers, 2, max)
    cmin <- apply(centers, 2, min)
    cord <- apply(centers, 2, order)
    cmaxi <- cord[ncl,]
    cmini <- cord[1,]

    meanmean <- mean(centers)
    absmdif <- abs(apply(centers, 2, mean) - meanmean)
    span <- cmax - cmin
    csizemax <- clsize[cmaxi]
    csizemin <- clsize[cmini]

    hiest <- nvar
    hiestw <- hiest * max(max(csizemax), max(csizemin)) * exp(-min(absmdif))

    sist <- sum(span)/hiest

    sistw <- (span * exp(-absmdif)) %*% sqrt(csizemax*csizemin) / hiestw

    return(list(ssi=sist, ssiw=sistw))
  }

likelihood <- function (x, centers, cluster)
  {
    n <- nrow(x)
    l <- 0

    for (i in 1:n)
      l <- l - log(prod(x[i,]*centers[cluster[i],] +
                    (1-x[i,])*(1-centers[cluster[i],])))
    
    return(l)
  }
maxmindist <-  function (clsize, distscen)
  {
    ##only for binary data

     ncl <- length(clsize)
       
    npairs <- 0
    for (i in 1:ncl)
      npairs <- npairs + clsize[i]*(clsize[i]-1)/2
   
    ##minimum distance
    mindw <- 0
    nfound <- distscen[1]
    i <- 1
    while (nfound < npairs)
      {
        if ((nfound+distscen[i+1]) < npairs)
          {
            mindw <- mindw + i*distscen[i+1]
            nfound <- nfound+distscen[i+1]
          }
        else
          {
            mindw <- mindw + i*(npairs-nfound)
            nfound <- nfound+distscen[i+1]
          }
        i <- i+1
      }
    
    ##maximum distance
    maxdw <- 0
    nfound <- 0
    i <- length(distscen) - 1
    while (nfound < npairs)
      {
        if ((nfound+distscen[i+1]) < npairs)
          {
            maxdw <- maxdw + i*distscen[i+1]
            nfound <- nfound+distscen[i+1]
          }
        else
          {
            maxdw <- maxdw + i*(npairs-nfound)
            nfound <- nfound+distscen[i+1]
          }
        i <- i-1
      }
    
    minmaxd <-  list(mindw=mindw, maxdw=maxdw)
    
    return(minmaxd)
  }
ttww <- function(x, clsize, cluster)
  {
    n <- sum(clsize)
    k <- length(clsize)
    w<-0
    tt <- cov(x)*n
    for (l in 1:k)
      w<- w+cov(x[cluster==l,])*clsize[l]
    zttw <- list(tt=tt, w=w)
    return(zttw)
  }

vargss <- function(x, clsize, varwithins)
  {
    nvar<-dim(x)[2]
    n <- sum(clsize)
    k <- length(clsize)
    varallmean<-rep(0,nvar)
    varallmeandist<-rep(0,nvar)
    varwgss<-rep(0,nvar)
    for (l in 1:nvar)
      varallmean[l] <- mean(x[,l])
    vardmean <- sweep(x,2,varallmean,"-")
    for (l in 1:nvar)
      {
        varallmeandist[l] <- sum((vardmean[,l])^2)
        varwgss[l] <- sum(varwithins[,l])
      }
    varbgss <- varallmeandist - varwgss
    vartss<-varbgss+varwgss
    zvargss <- list(vartss=vartss, varbgss=varbgss)
    return(zvargss)
  }
varwithinss <- function(x, centers, cluster)
{
  nrow<-dim(centers)[1]
  nvar<-dim(x)[2]
  varwithins <- matrix(0, nrow,nvar)
  x <- (x - centers[cluster, ])^2
  for(l in 1:nvar){
    for(k in 1:nrow){
      varwithins[k,l] <- sum(x[cluster==k,l])
    }
  }
  return(varwithins)
}





withinss <- function(clobj, x){
  
    retval <- rep(0, nrow(clobj$centers))
    x <- (x - clobj$centers[clobj$cluster, ])^2
    for(k in 1:nrow(clobj$centers)){
        retval[k] <- sum(x[clobj$cluster==k,])
    }
    retval
}



.First.lib <- function(lib, pkg) library.dynam("cclust", pkg, lib)
