|
| 1 | +#' Jackknife Statistics |
| 2 | +#' |
| 3 | +#' Compute selected jackknife statistics for a rating-curve load-estimation model. |
| 4 | +#' |
| 5 | +#' @param fit an object of class "loadReg"---output from \code{loadReg}. Can also |
| 6 | +#'be an object of class "censReg." |
| 7 | +#' @param which a character string indicating the "load" or |
| 8 | +#'"concentration" model for an object of class "loadReg" or "censReg" for |
| 9 | +#'an object of class "censReg." |
| 10 | +#' @return An object of class "jackStats" containing these components: |
| 11 | +#'coef, the table of coefficient estimates, the jackknife bias and standard errors\cr |
| 12 | +#'coefficients, the jackknifed coefficients\cr |
| 13 | +#'pctcens, the percentage of left-censored values. \cr |
| 14 | +#'The PRESS statistic and individual jackknife differences are also returned |
| 15 | +#'when the percentage of censoring is 0. |
| 16 | +#' @note The \code{jackStats} function can only be used when the analysis is AMLE. |
| 17 | +#' |
| 18 | +#'Abdi and Williams (2010) describe the jackknife as refering to two related techniques: the first |
| 19 | +#'estimates the parameters, their bias and standard errors and the second evaluates the |
| 20 | +#'predictive performance of the model. The second technique is the PRESS statistic (Helsel |
| 21 | +#'and Hirsch, 2002), but can only be used on uncensored data; it is computed by \code{jackStats} |
| 22 | +#'when no data are censored. The first technique can be used to assess the coefficients of the |
| 23 | +#'regression---the bias should be small and the jackknife standard errors should not be much |
| 24 | +#'different from the standard errors reported for the regression. Efron and Tibshirani (1993) |
| 25 | +#'suggest that the bias is small if the relative bias (biuas divided by the jackknife standard |
| 26 | +#'error) is less than 0.25. |
| 27 | +#' @seealso \code{\link{loadReg}} |
| 28 | +#' @keywords utilities |
| 29 | +#' @references |
| 30 | +#' Abdi, H. and Williams, L.J., 2010, Jackknife, in encyclopedia of research design, |
| 31 | +#'Salkind, N.J., editor: Thousand Oaks, Calif., SAGE Publications, 1719 p. |
| 32 | +#' |
| 33 | +#'Efron, B. and Tibshirani, R.J., 1993, An introduction to the bootstrap: Boca Raton, |
| 34 | +#'Fla., Chapman and Hall/CRC, 436 p. |
| 35 | +#' |
| 36 | +#' Helsel, D.R. and Hirsch, R.M., 2002, Statistical methods in water resources: |
| 37 | +#'U.S. Geological Survey Techniques of Water-Resources Investigations, book 4, |
| 38 | +#'chap. A3, 522 p. |
| 39 | +#'Salkind, |
| 40 | +#' @examples |
| 41 | +#'# From application 1 in the vignettes |
| 42 | +#'data(app1.calib) |
| 43 | +#'app1.lr <- loadReg(Phosphorus ~ model(1), data = app1.calib, |
| 44 | +#' flow = "FLOW", dates = "DATES", conc.units="mg/L", |
| 45 | +#' station="Illinois River at Marseilles, Ill.") |
| 46 | +#'jackStats(app1.lr) |
| 47 | +#' @export |
| 48 | +jackStats <- function(fit, which="load") { |
| 49 | + ## Compute some stats |
| 50 | + which <- match.arg(which, c("load", "concentration", "censReg")) |
| 51 | + if(which == "load") { |
| 52 | + # Verify AMLE |
| 53 | + if(fit$method != "AMLE") { |
| 54 | + stop("The analysis method must be AMLE") |
| 55 | + } |
| 56 | + # initial stuff |
| 57 | + NPAR <- fit$lfit$NPAR |
| 58 | + NOBS <- fit$lfit$NOBSC |
| 59 | + # Get the repsonse, X, and Yeff |
| 60 | + Y <- as.lcens(exp(fit$lfit$YLCAL), exp(fit$lfit$YD), fit$lfit$CENSFLAG) |
| 61 | + X <- fit$lfit$XLCAL |
| 62 | + Yeff <- fit$lfit$YLCAL |
| 63 | + ## The code below computes an effective value for left-censored values |
| 64 | + ## by computing the expected value from the prediction |
| 65 | + ## provided if the method is ever published. No plans for now |
| 66 | + #Yeff[fit$lfit$CENSFLAG] <- fit$lfit$XLCAL[fit$lfit$CENSFLAG, ,drop=FALSE] %*% |
| 67 | + # fit$lfit$PARAML[seq(NPAR)] + fit$lfit$RESID[fit$lfit$CENSFLAG] |
| 68 | + # |
| 69 | + # Other Info |
| 70 | + dist <- "lognormal" |
| 71 | + parms <- fit$lfit$PARAML[seq(fit$lfit$NPAR)] |
| 72 | + parnames <- colnames(fit$lfit$XLCAL) |
| 73 | + } else if(which == "concentration") { |
| 74 | + # Verify AMLE |
| 75 | + if(fit$method != "AMLE") { |
| 76 | + stop("The analysis method must be AMLE") |
| 77 | + } |
| 78 | + # initial stuff |
| 79 | + NPAR <- fit$cfit$NPAR |
| 80 | + NOBS <- fit$cfit$NOBSC |
| 81 | + # Get the repsonse, X, and Yeff |
| 82 | + Y <- as.lcens(exp(fit$cfit$YLCAL), exp(fit$cfit$YD), fit$cfit$CENSFLAG) |
| 83 | + X <- fit$cfit$XLCAL |
| 84 | + Yeff <- fit$cfit$YLCAL |
| 85 | + ## See comment above |
| 86 | + #Yeff[fit$cfit$CENSFLAG] <- fit$cfit$XLCAL[fit$cfit$CENSFLAG, ,drop=FALSE] %*% |
| 87 | + # fit$cfit$PARAML[seq(NPAR)] + fit$cfit$RESID[fit$cfit$CENSFLAG] |
| 88 | + # |
| 89 | + # Other Info |
| 90 | + dist <- "lognormal" |
| 91 | + parms <- fit$cfit$PARAML[seq(fit$cfit$NPAR)] |
| 92 | + parnames <- colnames(fit$cfit$XLCAL) |
| 93 | + } else { # must be censReg |
| 94 | + # Verify AMLE |
| 95 | + if(fit$method != "AMLE") { |
| 96 | + stop("The analysis method must be AMLE") |
| 97 | + } |
| 98 | + # initial stuff |
| 99 | + NPAR <- fit$NPAR |
| 100 | + NOBS <- fit$NOBSC |
| 101 | + dist <- fit$dist |
| 102 | + # Get the repsonse, X, and Yeff |
| 103 | + if(dist == "lognormal") { |
| 104 | + Y <- as.lcens(exp(fit$YLCAL), exp(fit$YD), fit$CENSFLAG) |
| 105 | + } else { |
| 106 | + Y <- as.lcens(fit$YLCAL, fit$YD, fit$CENSFLAG) |
| 107 | + } |
| 108 | + X <- fit$XLCAL |
| 109 | + Yeff <- fit$YLCAL |
| 110 | + ## See comment above |
| 111 | + #Yeff[fit$CENSFLAG] <- fit$XLCAL[fit$CENSFLAG, ,drop=FALSE] %*% |
| 112 | + # fit$PARAML[seq(NPAR)] + fit$RESID[fit$CENSFLAG] |
| 113 | + # |
| 114 | + # Other Info |
| 115 | + parms <- fit$PARAML[seq(fit$NPAR)] |
| 116 | + parnames <- colnames(fit$XLCAL) |
| 117 | + } |
| 118 | + # do it |
| 119 | + # set up res and coeff storage |
| 120 | + pre <- numeric(NOBS) |
| 121 | + coeff <- matrix(0, nrow=NOBS, ncol=NPAR) |
| 122 | + for(i in seq(NOBS)) { |
| 123 | + tmp <- censReg_AMLE.fit(Y[-i], X[-i,], dist) |
| 124 | + coeff[i,] <- tmp$PARAML[-(NPAR + 1L)] |
| 125 | + pre[i] <- Yeff[i] - coeff[i,, drop=FALSE] %*% t(X[i,,drop=FALSE]) |
| 126 | + } |
| 127 | + # Compute the jackknife bias and variance of coeffs |
| 128 | + out <- (NOBS - 1)*(rep(parms, each=NOBS) - coeff) |
| 129 | + bias <- -1/NOBS*colSums(out) |
| 130 | + var <- 1/(NOBS*(NOBS-1)) * (colSums(out^2) - NOBS * bias^2) |
| 131 | + coef <- cbind(est=parms, bias=bias, stderr=sqrt(var)) |
| 132 | + rownames(coef) <- parnames |
| 133 | + retval <- list(coef=coef, coefficients=coeff, |
| 134 | + pctcens=pctCens(Y)) |
| 135 | + # Include press if no censoring |
| 136 | + if(retval$pctcens == 0) { |
| 137 | + retval$press <- sum(pre^2) |
| 138 | + retval$pre <- pre |
| 139 | + } |
| 140 | + class(retval) <- "jackStats" |
| 141 | + return(retval) |
| 142 | +} |
0 commit comments