## file biplot.R
## copyright (C) 1998 W. N. Venables and B. D. Ripley
##
biplot <- function(x, ...) UseMethod("biplot")

biplot.default <-
    function(x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
	     xlabs = NULL, ylabs = NULL, expand=1, xlim = NULL, ylim = NULL,
	     arrow.len = 0.1, ...)
{
    n <- nrow(x)
    p <- nrow(y)
    if(missing(xlabs)) {
	xlabs <- dimnames(x)[[1]]
	if(is.null(xlabs)) xlabs <- 1:n
    }
    xlabs <- as.character(xlabs)
    dimnames(x) <- list(xlabs, dimnames(x)[[2]])
    if(missing(ylabs)) {
	ylabs <- dimnames(y)[[1]]
	if(is.null(ylabs)) ylabs <- paste("Var", 1:p)
    }
    ylabs <- as.character(ylabs)
    dimnames(y) <- list(ylabs, dimnames(y)[[2]])

    if(length(cex) == 1) cex <- c(cex, cex)
    if(missing(col)) {
	col <- par("col")
	if (!is.numeric(col)) col <- match(col, palette())
	col <- c(col, col + 1)
    }
    else if(length(col) == 1) col <- c(col, col)

    unsigned.range <- function(x) c(-abs(min(x)), abs(max(x)))
    rangx1 <- unsigned.range(x[, 1])
    rangx2 <- unsigned.range(x[, 2])
    rangy1 <- unsigned.range(y[, 1])
    rangy2 <- unsigned.range(y[, 2])

    if(missing(xlim) && missing(ylim))
	xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
    else if(missing(xlim)) xlim <- rangx1 else ylim <- rangx2
    ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
    on.exit(par(oldpar))
    oldpar <- par(pty = "s")
    plot(x, type = "n", xlim = xlim, ylim = ylim, col = col[1], ...)
    text(x, xlabs, cex = cex[1], col = col[1], ...)
    par(new = TRUE)
    plot(y, axes = FALSE, type = "n", xlim = xlim*ratio, ylim = ylim*ratio,
	 xlab = "", ylab = "", col = col[1], ...)
    axis(3, col = col[2])
    axis(4, col = col[2])
    box(col = col[1])
    text(y, labels=ylabs, cex = cex[2], col = col[2], ...)
    if(var.axes)
	arrows(0, 0, y[,1] * 0.8, y[,2] * 0.8, col = col[2], length=arrow.len)
    invisible()
}

biplot.princomp <- function(x, choices = 1:2, scale = 1, pc.biplot=FALSE, ...)
{
    if(length(choices) != 2) stop("length of choices must be 2")
    if(!length(scores <- x$scores))
	stop(paste("object", deparse(substitute(x)), "has no scores"))
    lam <- x$sdev[choices]
    if(is.null(n <- x$n.obs)) n <- 1
    lam <- lam * sqrt(n)
    if(scale < 0 || scale > 1) warning("scale is outside [0, 1]")
    if(scale != 0) lam <- lam^scale
    if(pc.biplot) lam <- lam / sqrt(n)
    biplot.default(t(t(scores[, choices]) / lam),
		   t(t(x$loadings[, choices]) * lam), ...)
    invisible()
}
## Seber pages 506-507, after a Golub original

cancor <- function(x, y, xcenter=TRUE, ycenter=TRUE)
{
    x <- as.matrix(x)
    y <- as.matrix(y)
    if((nr <- nrow(x)) != nrow(y)) stop("unequal number of rows in cancor")
    ncx <- ncol(x)
    ncy <- ncol(y)
    if(is.logical(xcenter)) {
	if(xcenter) {
	    xcenter <- apply(x, 2, mean)
	    x <- x - rep(xcenter, rep(nr, ncx))
	}
	else xcenter <- rep(0, ncx)
    }
    else {
	xcenter <- rep(xcenter, length = ncx)
	x <- x - rep(xcenter, rep(nr, ncx))
    }
    if(is.logical(ycenter)) {
	if(ycenter) {
	    ycenter <- apply(y, 2, mean)
	    y <- y - rep(ycenter, rep(nr, ncy))
	}
	else ycenter <- rep(0, ncy)
    }
    else {
	ycenter <- rep(ycenter, length = ncy)
	y <- y - rep(ycenter, rep(nr,ncy))
    }
    qx <- qr(x)
    qy <- qr(y)
    dx <- qx$rank
    dy <- qy$rank
    ## compute svd(Qx'Qy)
    z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1:dx,, drop = FALSE],
	     dx, dy)
    list(cor = z$d,
	 xcoef = backsolve((qx$qr)[1:dx, 1:dx, drop = FALSE], z$u),
	 ycoef = backsolve((qy$qr)[1:dy, 1:dy, drop = FALSE], z$v),
	 xcenter = xcenter,
	 ycenter = ycenter)
}
cmdscale <- function (d, k = 2, eig = FALSE) {
    if (any(is.na(d)))
	stop("NA values not allowed in d")
    if (is.null(n <- attr(d, "Size"))) {
	x <- as.matrix(d^2)
	if ((n <- nrow(x)) != ncol(x))
	    stop("Distances must be result of dist or a square matrix")
    }
    else {
	x <- matrix(0, n, n)
	x[row(x) > col(x)] <- d^2
	x <- x + t(x)
    }
    storage.mode(x) <- "double"
    Tmat <- -0.5 * .C("dblcen", x, as.integer(n), PACKAGE="mva")[[1]]
    e <- eigen(Tmat, symmetric = TRUE)
    ev <- e$values[1:k]
    points <- e$vectors[, 1:k] %*% diag(sqrt(ev))
    dimnames(points) <- list(dimnames(d)[[1]], NULL)
    if (eig) list(points = points, eig = ev)
    else points
}
dist <- function(x, method="euclidean", diag=FALSE, upper=FALSE)
{
    ## account for possible spellings of euclid?an
    if(!is.na(pmatch(method, "euclidian")))
	method <- "euclidean"

    method <- pmatch(method, c("euclidean", "maximum",
			       "manhattan", "canberra", "binary"))
    if(is.na(method))
	stop("invalid distance method")
    if(method == -1)
	stop("ambiguous distance method")

    N <- nrow(x <- as.matrix(x))
    d <- .C("distance",
	    x = as.double(x),
	    nr= N,
	    nc= ncol(x),
	    d = double(N*(N - 1)/2),
	    diag  = as.integer(FALSE),
	    method= as.integer(method),
	    DUP = FALSE, PACKAGE="base")$d
    attr(d, "Size") <- N
    attr(d, "Labels") <- dimnames(x)[[1]]
    attr(d, "Diag") <- diag
    attr(d, "Upper") <- upper
    class(d) <- "dist"
    return(d)
}

names.dist <- function(d) attr(d, "Labels")

"names<-.dist" <- function(d, n)
{
    if(length(n) != attr(d, "Size"))
	stop("invalid names for dist object")
    attr(d, "Labels") <- n
    d
}

as.matrix.dist <- function(obj)
{
    size <- attr(obj, "Size")
    df <- matrix(0, size, size)
    df[row(df) > col(df)] <- obj
    df <- df + t(df)
    labels <- attr(obj, "Labels")
    dimnames(df) <-
	if(is.null(labels)) list(1:size,1:size) else list(labels,labels)
    df
}


as.dist <- function(m, diag = FALSE, upper=FALSE)
{
    m <- as.matrix(m)

    retval <-  m[row(m) > col(m)]

    attributes(retval) <- NULL

    if(!is.null(rownames(m)))
        attr(retval,"Labels") <- rownames(m)
    else if(!is.null(colnames(m)))
        attr(retval,"Labels") <- colnames(m)

    attr(retval,"Size") <- nrow(m)
    attr(retval,"Diag") <- diag
    attr(retval,"Upper") <- upper
    class(retval) <- "dist"
    retval
}


print.dist <- function(obj, diag=NULL, upper=NULL)
{
    if(is.null(diag))
	diag <- if(is.null(attr(obj, "Diag"))) FALSE else attr(obj, "Diag")
    if(is.null(upper))
	upper <- if(is.null(attr(obj,"Upper")))FALSE else attr(obj, "Upper")

    size <- attr(obj, "Size")
    df <- as.matrix.dist(obj)
    if(!upper)
	df[row(df) < col(df)] <- NA
    if(!diag)
	df[row(df) == col(df)] <- NA
    print(if(diag || upper) df else df[-1,-size], na="")
    invisible(obj)
}


## Hierarchical clustering, on raw input data; we will use Euclidean
## distance.  A range of criteria are supported; also there is a
## storage-economic option.
##
## We use the general routine, `hc', which caters for 7 criteria,
## using a half dissimilarity matrix; (BTW, this uses the very efficient
## nearest neighbor chain algorithm, which makes this algorithm of
## O(n^2) computational time, and differentiates it from the less
## efficient -- i.e. O(n^3) -- implementations in all commercial
## statistical packages -- as far as I am aware -- except Clustan.)
##
## Clustering Methods:
##
## 1. Ward's minimum variance or error sum of squares method.
## 2. single linkage or nearest neighbor method.
## 3. complete linkage or diameter.
## 4. average linkage, group average, or UPGMA method.
## 5. McQuitty's or WPGMA method.
## 6. median, Gower's or WPGMC method.
## 7. centroid or UPGMC method (7).
##
## Original author: F. Murtagh, May 1992
## R Modifications: Ross Ihaka, Dec 1996
##		    Friedrich Leisch, Apr 1998

hclust <- function(d, method="complete")
{
    method <-  pmatch(method, c("ward", "single",
				"complete", "average", "mcquitty",
				"median", "centroid"))
    if(is.na(method))
	stop("invalid clustering method")
    if(method == -1)
	stop("ambiguous clustering method")

    n <- attr(d, "Size")
    if(is.null(n))
	stop("invalid dissimilarities")
    labels <- attr(d, "Labels")

    len <- n*(n-1)/2
    hcl <- .Fortran("hclust",
		    n = as.integer(n),
		    len = as.integer(len),
		    method = as.integer(method),
		    ia = integer(n),
		    ib = integer(n),
		    crit = double(n),
		    membr = double(n),
		    nn = integer(n),
		    disnn = double(n),
		    flag = logical(n),
		    diss = as.double(d), PACKAGE="mva")

    ## 2nd step: interpret the information that we now have
    ## as merge, height, and order lists.

    hcass <- .Fortran("hcass2",
		      n = as.integer(n),
		      ia = as.integer(hcl$ia),
		      ib = as.integer(hcl$ib),
		      order = integer(n),
		      iia = integer(n),
		      iib = integer(n), PACKAGE="mva")

    tree <- list(
		 merge=cbind(hcass$iia[1:(n-1)], hcass$iib[1:(n-1)]),
		 height=hcl$crit[1:(n-1)],
		 order=hcass$order,
		 labels=attr(d, "Labels"))
    class(tree) <- "hclust"
    tree
}

plot.hclust <-
    function (tree, hang = 0.1, labels=NULL, ...)
{
    merge <- tree$merge
    if (!is.matrix(merge) || ncol(merge) != 2)
	stop("invalid dendrogram")
    n <- nrow(merge)
    height <- as.double(tree$height)
    order <- as.double(order(tree$order))

    labels <-
	if(missing(labels)){
	    if (is.null(tree$labels))
		paste(1:(n+1))
	    else
		as.character(tree$labels)
	} else {
	    if(labels==FALSE)
		character(n+1)
	    else
		as.character(labels)
	}

    plot.new()
    .Internal(dend.window(n, merge, height, order, hang, labels, ...))
    .Internal(dend(n, merge, height, order, hang, labels, ...))
    axis(2, at=pretty(range(height)))
    invisible()
}

as.hclust <- function(x, ...) UseMethod("as.hclust")

as.hclust.twins <- function(x)
{
    retval <- list(merge = x$merge,
                   height = sort(x$height),
                   order = x$order,
                   labels = rownames(x$data))
    class(retval) <- "hclust"
    retval
}

kmeans <- function(x, centers, iter.max = 10)
{
    x <- as.matrix(x)
    m <- nrow(x)
    if(missing(centers))
	stop("centers must be a number or a matrix")
    if(length(centers) == 1) {
	k <- centers
	if(m < k)
	    stop("more cluster centers than data points.")
	centers <- x[sample(1:m, k), ]
    } else {
	centers <- as.matrix(centers)
	k <- nrow(centers)
    }
    if(iter.max < 1) stop("iter.max must be positive.")
    if(m < k)
	stop("more cluster centers than data points.")
    if(ncol(x) != ncol(centers))
	stop("must have same number of columns in x and centers.")
    Z <- .Fortran("kmns",
		  as.double(x),
		  as.integer(m),
		  as.integer(ncol(x)),
		  centers = as.double(centers),
		  as.integer(k),
		  c1 = integer(m),
		  integer(m),
		  nc =integer(k),
		  double(k),
		  double(k),
		  integer(k),
		  double(m),
		  integer(k),
		  integer(k),
		  as.integer(iter.max),
		  wss = double(k),
		  ifault = as.integer(0), PACKAGE="mva")
    switch(Z$ifault,
	   stop("empty cluster: try a better set of initial centers"),
	   warning("did not converge in iter.max iterations"),
	   stop("number of cluster centres must lie between 1 and nrow(x)")
	   )
    centers <- matrix(Z$centers, k)
    dimnames(centers) <- list(1:k, dimnames(x)[[2]])
    list(cluster = Z$c1, centers = centers, withinss = Z$wss, size = Z$nc)
}

plot.prcomp <- function(x, ...) { screeplot(x, ...) }

prcomp <- function(x, retx = TRUE, center = TRUE, scale. = FALSE,
                   tol = NULL) {
    x <- as.matrix(x)
    s <- svd(scale(x, center = center, scale = scale.), nu = 0)
    if (!is.null(tol)) {
        rank <- sum(s$d > (s$d[1]*tol))
        if (rank < ncol(x))
            s$v <- s$v[, 1:rank, drop = FALSE]
    }
    s$d <- s$d / sqrt(max(1, nrow(x) - 1))
    dimnames(s$v) <-
        list(colnames(x), paste("PC", seq(len = ncol(s$v)), sep = ""))
    r <- list(sdev = s$d, rotation = s$v)
    if (retx) r$x <- x %*% s$v
    class(r) <- "prcomp"
    r
}

print.prcomp <- function(x, print.x = FALSE, ...) {
    cat("Standard deviations:\n")
    print(x$sdev)
    cat("\nRotation:\n")
    print(x$rotation)
    if (print.x && length(x$x)) {
        cat("\nRotated variables:\n")
        print(x$x)
    }
    invisible(x)
}

summary.prcomp <- function(object) {
    vars <- object$sdev^2
    vars <- vars/sum(vars)
    importance <- rbind("Standard deviation" = object$sdev,
                        "Proportion of Variance" = round(vars, 5),
                        "Cumulative Proportion" = round(cumsum(vars), 5))
    colnames(importance) <- colnames(object$rotation)
    object$importance <- importance
    class(object) <- "summary.prcomp"
    object
}

print.summary.prcomp <- function(x, digits = min(3, .Options$digits-3),
                                 ...) {
    cat("Importance of components:\n")
    print(x$importance, digits = digits)
    invisible(x)
}
## copyright (C) 1998 W. N. Venables and B. D. Ripley
##
predict.princomp <- function(object, newdata, ...) {
    if (missing(newdata)) return(object$scores)
    scale(newdata, object$center, object$scale) %*% object$loadings
}

summary.princomp <-
function(object, loadings = FALSE, cutoff = 0.1, digits = 3, ...) {
    vars <- object$sdev^2
    vars <- vars/sum(vars)
    cat("Importance of components:\n")
    print(rbind("Standard deviation" = object$sdev,
                "Proportion of Variance" = vars,
                "Cumulative Proportion" = cumsum(vars)))
    if(loadings) {
        cat("\nLoadings:\n")
        cx <- format(round(object$loadings, digits = digits))
        cx[abs(object$loadings) < cutoff] <-
            substring("       ", 1, nchar(cx[1,1]))
        print(cx, quote = FALSE, ...)
    }
    invisible(object)
}

plot.princomp <- function(x, ...) { screeplot(x, ...) }

screeplot <-
function(x, npcs = min(10, length(x$sdev)),
         type = c("barplot", "lines"),
         main = deparse(substitute(x)), ...) {
    eval(main)
    type <- match.arg(type)
    pcs <- x$sdev^2
    xp <- seq(length=npcs)
    if(type=="barplot")
        barplot(pcs[xp], names = names(pcs[xp]), main = main,
                ylab = "Variances", ...)
    else {
        plot(xp, pcs[xp], type = "b", axes = FALSE, main = main,
             xlab = "", ylab = "Variances", ...)
        axis(2)
        axis(1, at = xp, labels = names(pcs[xp]))
    }
    invisible()
}

loadings <- function(x) x$loadings
princomp <- function(x, cor = FALSE, scores = TRUE,
                     subset = rep(TRUE, nrow(as.matrix(x)))) {
    ## it is tempting to add use="all.obs" which could be passed to cov
    ## or cor but then the calculation of N is complicated.
    z<- as.matrix(x)[subset, , drop = FALSE]
    N <- nrow(z)
    if (cor)
        cv <- get("cor", envir = .GlobalEnv)(z)
    else
        cv <- cov(z)
    ## (svd can be used but gives different signs for some vectors)
    edc <- eigen(cv)
    cn <- paste("Comp.", 1:ncol(cv), sep = "")
    names(edc$values) <- cn
    dimnames(edc$vectors) <- list(dimnames(x)[[2]], cn)
    scr <- NULL
    if (cor) {
        sdev <- sqrt(edc$values)
        sc <- (apply(z, 2, var)*(N-1)/N)^0.5
        if (scores)
            scr<- (scale(z, center = TRUE, scale = TRUE)
                   %*% edc$vectors)*sqrt(N/(N-1))
    } else {
        sdev <- sqrt(edc$values*(N-1)/N)
        sc <- rep(1, ncol(z))
        if (scores)
            scr<- (scale(z, center = TRUE, scale = FALSE)
                   %*% edc$vectors)
   }
    names(sc) <- dimnames(x)[[2]]
    edc <-list(sdev = sdev, loadings = edc$vectors,
               center = apply(z, 2, mean), scale = sc, n.obs = N,
               scores = scr, call = match.call())
    ## The Splus function also return list elements factor.sdev,
    ## correlations and coef, but these are not documented in the help.
    ## coef seems to equal load.  The Splus function also returns list
    ## element terms which is not supported here.
    class(edc) <- "princomp"
    edc
}

print.princomp <- function(x) {
    cat("Call:\n"); dput(x$call)
    cat("\nStandard deviations:\n")
    print(x$sdev)
    cat("\n", length(x$scale), " variables and ", x$n.obs,
        "observations.\n")
    invisible(x)
}
.First.lib <- function(lib, pkg) {
    library.dynam("mva", pkg, lib)
    provide(mva)
}
