# file biplot.R
# copyright (C) 1998 W. N. Venables and B. D. Ripley
#
biplot <- function(x, ...) UseMethod("biplot")

biplot.default <-
function(x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
	xlabs = NULL, ylabs = NULL, expand=1, xlim = NULL, ylim = NULL,
         arrow.len = 0.1, ...)
{
  n <- nrow(x)
  p <- nrow(y)
  if(missing(xlabs)) {
    xlabs <- dimnames(x)[[1]]
    if(is.null(xlabs)) xlabs <- 1:n
  }
  xlabs <- as.character(xlabs)
  dimnames(x) <- list(xlabs, dimnames(x)[[2]])
  if(missing(ylabs)) {
    ylabs <- dimnames(y)[[1]]
    if(is.null(ylabs)) ylabs <- paste("Var", 1:p)
  }
  ylabs <- as.character(ylabs)
  dimnames(y) <- list(ylabs, dimnames(y)[[2]])

  if(length(cex) == 1) cex <- c(cex, cex)
  if(missing(col)) {
    col <- par("col")
    if (!is.numeric(col)) col <- match(col, palette())
    col <- c(col, col + 1)
  }
  else if(length(col) == 1) col <- c(col, col)

  unsigned.range <- function(x) c(-abs(min(x)), abs(max(x)))
  rangx1 <- unsigned.range(x[, 1])
  rangx2 <- unsigned.range(x[, 2])
  rangy1 <- unsigned.range(y[, 1])
  rangy2 <- unsigned.range(y[, 2])

  if(missing(xlim) && missing(ylim))
    xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
  else if(missing(xlim)) xlim <- rangx1 else ylim <- rangx2
  ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
  on.exit(par(oldpar))
  oldpar <- par(pty = "s")
  plot(x, type = "n", xlim = xlim, ylim = ylim, col = col[1], ...)
  text(x, xlabs, cex = cex[1], col = col[1], ...)
  par(new = TRUE)
  plot(y, axes = FALSE, type = "n", xlim = xlim*ratio, ylim = ylim*ratio,
       xlab = "", ylab = "", col = col[1], ...)
  axis(3, col = col[2])
  axis(4, col = col[2])
  box(col = col[1])
  text(y, labels=ylabs, cex = cex[2], col = col[2], ...)
  if(var.axes)
      arrows(0, 0, y[,1] * 0.8, y[,2] * 0.8, col = col[2], length=arrow.len)
  invisible()
}

biplot.princomp <- function(x, choices = 1:2, scale = 1, pc.biplot=F, ...)
{
  if(length(choices) != 2) stop("length of choices must be 2")
  if(!length(scores <- x$scores))
    stop(paste("object", deparse(substitute(x)), "has no scores"))
  lam <- x$sdev[choices]
  if(is.null(n <- x$n.obs)) n <- 1
  lam <- lam * sqrt(n)
  if(scale < 0 || scale > 1) warning("scale is outside [0, 1]")
  if(scale != 0) lam <- lam^scale
  if(pc.biplot) lam <- lam / sqrt(n)
  biplot.default(t(t(scores[, choices]) / lam),
                 t(t(x$loadings[, choices]) * lam), ...)
  invisible()
}
# Seber pages 506-507, after a Golub original

cancor <-
function(x, y, xcenter=TRUE, ycenter=TRUE)
{
	x <- as.matrix(x)
	y <- as.matrix(y)
	if(nrow(x) != nrow(y)) stop("unequal number of rows in cancor")
	nr <- nrow(x)
	ncx <- ncol(x)
	ncy <- ncol(y)
	if(is.logical(xcenter)) {
		if(xcenter) {
			xcenter <- apply(x, 2, mean)
			x <- x - rep(xcenter, rep(nr, ncx))
		}
		else xcenter <- rep(0, ncx)
	}
	else {
		xcenter <- rep(xcenter, length = ncx)
		x <- x - rep(xcenter, rep(nr, ncx))
	}
	if(is.logical(ycenter)) {
		if(ycenter) {
			ycenter <- apply(y, 2, mean)
			y <- y - rep(ycenter, rep(nr, ncy))
		}
		else ycenter <- rep(0, ncy)
	}
	else {
		ycenter <- rep(ycenter, length = ncy)
		y <- y - rep(ycenter, rep(nr,ncy))
	}
	qx <- qr(x)
	qy <- qr(y)
	dx <- qx$rank
	dy <- qy$rank
	# compute svd(Qx'Qy)
        z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1:dx,, drop = F],
		dx, dy)
        list(cor = z$d,
		xcoef = backsolve((qx$qr)[1:dx, 1:dx, drop = F], z$u),
		ycoef = backsolve((qy$qr)[1:dy, 1:dy, drop = F], z$v),
		xcenter = xcenter,
		ycenter = ycenter)
}
cmdscale <- function (d, k = 2, eig = FALSE) {
  if (any(is.na(d)))
    stop("NA values not allowed in d")
  if (is.null(n <- attr(d, "Size"))) {
    x <- as.matrix(d^2)
    if ((n <- nrow(x)) != ncol(x)) 
      stop("Distances must be result of dist or a square matrix")
  }
  else {
    x <- matrix(0, n, n)
    x[row(x) > col(x)] <- d^2
    x <- x + t(x)
  }
  storage.mode(x) <- "double"
  Tmat <- -0.5 * .C("dblcen", x, as.integer(n))[[1]]
  e <- eigen(Tmat, symmetric = T)
  ev <- e$values[1:k]
  points <- e$vectors[, 1:k] %*% diag(sqrt(ev))
  dimnames(points) <- list(dimnames(d)[[1]], NULL)
  if (eig) list(points = points, eig = ev)
  else points
}
dist <-
function(x, method="euclidian", diag=FALSE, upper=FALSE)
{
	method <-  pmatch(method, c("euclidian", "maximum",
		"manhattan", "canberra", "binary"))
	if(is.na(method))
		stop("invalid distance method")
	if(method == -1)
		stop("ambiguous distance method")

	x <- as.matrix(x)
	len <- nrow(x)*(nrow(x) - 1)/2

	d <- .C("dist",
		as.double(x),
		nrow(x),
		ncol(x),
		double(len),
		as.integer(method))[[4]]
	attr(d, "Size") <- nrow(x)
	attr(d, "Labels") <- dimnames(x)[[1]]
        attr(d, "Diag") <- diag
        attr(d, "Upper") <- upper
	class(d) <- "dist"
	return(d)
}

names.dist <-
function(d)
attr(d, "Labels")

"names<-.dist" <- function(d, n)
{
	if(length(n) != attr(d, "Size"))
		stop("invalid names for dist object")
	attr(d, "Labels") <- n
	d
}

as.matrix.dist <- function(obj)
{
  size <- attr(obj, "Size")
  df <- matrix(0, size, size)
  df[row(df) > col(df)] <- obj
  df <- df + t(df)
  labels <- attr(obj, "Labels")
  if(is.null(labels))
    dimnames(df) <- list(1:size,1:size)
  else
    dimnames(df) <- list(labels,labels)
  
  df
}

print.dist <-
function(obj, diag=NULL, upper=NULL)
{
  if(missing(diag)){
    if(is.null(attr(obj, "Diag")))
      diag <- FALSE
    else
     diag <-  attr(obj, "Diag")
  }
  
  if(missing(upper)){
    if(is.null(attr(obj, "Upper")))
      upper <- FALSE
    else
      upper <-  attr(obj, "Upper")
  }
  
  size <- attr(obj, "Size")
  df <- as.matrix(obj)
  
  if(!upper) {
    df[row(df) < col(df)] <- NA
  }

  if(!diag) {
    df[row(df) == col(df)] <- NA
  }

  if(diag || upper) {
    print(df, na="")
  }
  else {
    print(df[-1,-size], na="")
  }
}


# Hierarchical clustering, on raw input data; we will use Euclidean
# distance.  A range of criteria are supported; also there is a
# storage-economic option.
#
# We use the general routine, `hc', which caters for 7 criteria,
# using a half dissimilarity matrix; (BTW, this uses the very efficient
# nearest neighbor chain algorithm, which makes this algorithm of
# O(n^2) computational time, and differentiates it from the less
# efficient -- i.e. O(n^3) -- implementations in all commercial
# statistical packages -- as far as I am aware -- except Clustan.)
#
# Clustering Methods:
#
# 1. Ward's minimum variance or error sum of squares method.
# 2. single linkage or nearest neighbor method.
# 3. complete linkage or diameter.
# 4. average linkage, group average, or UPGMA method.
# 5. McQuitty's or WPGMA method.
# 6. median, Gower's or WPGMC method.
# 7. centroid or UPGMC method (7).
#
# Original author: F. Murtagh, May 1992
# R Modifications: Ross Ihaka, Dec 1996
#                  Friedrich Leisch, Apr 1998

hclust <-
function(d, method="complete")
{
	method <-  pmatch(method, c("ward", "single",
			"complete", "average", "mcquitty",
			"median", "centroid"))
	if(is.na(method))
		stop("invalid clustering method")
	if(method == -1)
		stop("ambiguous clustering method")

	n <- attr(d, "Size")
	if(is.null(n))
		stop("invalid dissimilarities")
	labels <- attr(d, "Labels")

	len <- n*(n-1)/2
	hcl <- .Fortran("hclust",
		n = as.integer(n),
		len = as.integer(len),
		method = as.integer(method),
		ia = integer(n),
		ib = integer(n),
		crit = double(n),
		membr = double(n),
		nn = integer(n),
		disnn = double(n),
		flag = logical(n),
		diss = as.double(d))

	# 2nd step: interpret the information that we now have
	# as merge, height, and order lists.

	hcass <- .Fortran("hcass2",
		n = as.integer(n),
		ia = as.integer(hcl$ia),
		ib = as.integer(hcl$ib),
		order = integer(n),
		iia = integer(n),
		iib = integer(n))

	tree <- list(
		merge=cbind(hcass$iia[1:(n-1)], hcass$iib[1:(n-1)]),
		height=hcl$crit[1:(n-1)],
		order=hcass$order,
		labels=attr(d, "Labels"))
	class(tree) <- "hclust"
	tree
}

plot.hclust <-
function (tree, hang = 0.1, labels=NULL, ...)
{
  merge <- tree$merge
  if (!is.matrix(merge) || ncol(merge) != 2)
    stop("invalid dendrogram")
  n <- nrow(merge)
  height <- as.double(tree$height)
  order <- as.double(order(tree$order))

  if(missing(labels)){
    if (is.null(tree$labels))
      labels <- paste(1:(n+1))
    else
      labels <- as.character(tree$labels)
  }
  else{
    if(labels==FALSE)
      labels <- character(n+1)
    else
      labels <- as.character(labels)
  }
  
  plot.new()
  .Internal(dend.window(n, merge, height, order, hang,
                        labels, ...))
  .Internal(dend(n, merge, height, order, hang, labels,
                 ...))
  axis(2, at=pretty(range(height)))
  invisible()
}
  
prcomp <- function(x, scale=FALSE, use="all.obs") {
	if(scale) cv <- cor(as.matrix(x), use=use)
	else cv <- cov(as.matrix(x), use=use)
	edc <- svd(cv)[c("d", "u")]
	cn <- paste("Comp.", 1:ncol(cv), sep="")
	vn <- dimnames(x)[[2]]
	names(edc$d) <- cn
	dimnames(edc$u) <- list(vn, cn)
	edc <- list(var=edc$d, load=edc$u, scale=scale)
	class(edc) <- "prcomp"
	edc
}

print.prcomp <- function(x) {
	cat("\nPrincipal Components:", if(x$scale) "Correlation" else "Covariance",
		"matrix\n\n")
	cat("Component Variances:\n")
	print(x$var)
	cat("\nLoadings:\n")
	print(x$load)
	cat("\n")
}

plot.prcomp <- function(x, main="Scree Plot", ylab="Variance",
		xlab="Component", ...) {
	plot(x$var, main=main, xlab=xlab, ylab=ylab, ...)
}
# file princomp-add.R
# copyright (C) 1998 W. N. Venables and B. D. Ripley
#
predict.princomp <- function(object, newdata,...)
{
  if (missing(newdata)) return(object$scores)
  scale(newdata, object$center, object$scale) %*% object$loadings  
}

print.princomp <- function(x, ...)
{
  cat("Standard deviations:\n")
  print(x$sdev, ...)
  invisible(x)
}

summary.princomp <- 
function(object, loadings = F, cutoff = 0.1, digits=3, ...)
{
  vars <- object$sdev^2
  vars <- vars/sum(vars)
  cat("Importance of components:\n")
  print(rbind("Standard deviation" = object$sdev,
              "Proportion of Variance" = vars,
              "Cumulative Proportion" = cumsum(vars)))
  if(loadings) {
    cat("\nLoadings:\n")
    cx <- format(round(object$loadings, digits=digits))
    cx[abs(object$loadings) < cutoff] <-
      substring("       ", 1, nchar(cx[1,1]))
    print(cx, quote = F, ...)
  }
  invisible(object)
}

plot.princomp <- function(...) screeplot(...)

screeplot <-
function(x, npcs=min(10, length(x$sdev)), type=c("barplot", "lines"),
         main = deparse(substitute(x)), ...)
{
  eval(main)
  type <- match.arg(type)
  pcs <- x$sdev^2
  xp <- seq(length=npcs)
  if(type=="barplot") barplot(pcs[xp], names = names(pcs[xp]), 
       main = main, ylab = "Variances", ...)
  else {
    plot(xp, pcs[xp], type = "b", axes = F, main = main, xlab="",
            ylab = "Variances", ...)
    axis(2)
    axis(1, at = xp, labels = names(pcs[xp]))
  }
  invisible()
}

loadings <- function(x) x$loadings

princomp <- function(x, cor=FALSE, scores=TRUE,
                     subset=rep(TRUE, nrow(as.matrix(x)))) {
  z<- as.matrix(x)[subset,, drop=F]
  N <- nrow(z)
  if(cor)
    cv <- get("cor",envir=.GlobalEnv)(z)
  else
    cv <- cov(z)
  ##  (svd can be used but gives different signs for some vectors)
  edc <- eigen(cv)
  cn <- paste("Comp.", 1:ncol(cv), sep="")
  names(edc$values) <- cn
  dimnames(edc$vectors) <- list(dimnames(x)[[2]], cn)
  scr<- NULL
  if (cor) {
    sdev <- sqrt(edc$values)
    sc <- (apply(z,2,var)*(N-1)/N)^0.5
    if (scores)
      scr<-(scale(z,center=T,scale=T) %*% edc$vectors)*sqrt(N/(N-1))
  } else {
    sdev <- sqrt(edc$values*(N-1)/N)
    sc <- rep(1, ncol(z))
    if (scores)
      scr<- (scale(z,center=T,scale=F) %*% edc$vectors)
  }
  names(sc) <- dimnames(x)[[2]]
  edc <-list(sdev=sdev, loadings=edc$vectors,
             center=apply(z,2,mean), scale=sc, n.obs=N, scores=scr)
  ## The splus function also return list elements factor.sdev,
  ## correlations and coef, but these are not documented in the
  ## help. coef seems to equal load.  The splus function also return
  ## list elements call and terms which are not supported here.
  class(edc) <- "princomp"
  edc
}
library.dynam("mva.so")
provide(mva)
