From f1c7bfd28bdd52ceb9ee2151aca60a55cde08144 Mon Sep 17 00:00:00 2001 From: DankerMu Date: Mon, 9 Feb 2026 15:23:47 +0800 Subject: [PATCH] docs: add environment setup + R env helper scripts --- .gitignore | 1 + docs/environment.md | 112 +++++++++++++++++++++++++++ tools/r/check_env.R | 154 ++++++++++++++++++++++++++++++++++++++ tools/r/install_deps.R | 166 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 433 insertions(+) create mode 100644 docs/environment.md create mode 100644 tools/r/check_env.R create mode 100644 tools/r/install_deps.R diff --git a/.gitignore b/.gitignore index e8aea32..2febf8b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ **/.Rproj.user/ **/.RData **/.Ruserdata +.Rlib/ # Python **/__pycache__/ diff --git a/docs/environment.md b/docs/environment.md new file mode 100644 index 0000000..6785957 --- /dev/null +++ b/docs/environment.md @@ -0,0 +1,112 @@ +# Environment setup (macOS/Linux) + +This repo uses both **R** (AutoSHUD / rSHUD tooling) and **Python** (repo orchestration via `tools/shudnc.py`). + +## System dependencies (GDAL/GEOS/PROJ, udunits, netcdf) + +You need GDAL/GEOS/PROJ for `sf`/`terra`, plus udunits/netcdf for common geospatial + NetCDF workflows. + +### macOS (Homebrew) + +```bash +brew install r pkg-config gdal geos proj udunits netcdf +``` + +If you don’t have a compiler toolchain yet: + +```bash +xcode-select --install +``` + +### Ubuntu / Debian + +```bash +sudo apt-get update +sudo apt-get install -y \ + r-base r-base-dev build-essential pkg-config \ + gdal-bin libgdal-dev libgeos-dev libproj-dev proj-data proj-bin \ + libudunits2-dev \ + libnetcdf-dev netcdf-bin +``` + +## R (project-local `.Rlib/`) + +We install R packages into a **project-local** library directory: `/.Rlib/`. + +1) Make sure submodules exist (for local `rSHUD/` install): + +```bash +git submodule update --init --recursive +``` + +2) Install dependencies into `.Rlib/`: + +```bash +Rscript tools/r/install_deps.R +``` + +If you haven't migrated `rSHUD` away from legacy dependencies yet (or if `rSHUD` install fails), you can still set up the core stack first: + +```bash +Rscript tools/r/install_deps.R --skip-rshud +``` + +3) Check the environment (fails non-zero if missing): + +```bash +Rscript tools/r/check_env.R +``` + +Core-only check (skip `rSHUD`): + +```bash +Rscript tools/r/check_env.R --skip-rshud +``` + +For reproducible runs, you can force R to use the project library: + +```bash +R_LIBS_USER="$PWD/.Rlib" Rscript tools/r/check_env.R +``` + +## Python (`.venv/`) + +`tools/shudnc.py` is the entrypoint for running/validating projects (e.g. `projects/qhh/shud.yaml`). + +Create a local venv: + +```bash +python3 -m venv .venv +source .venv/bin/activate +python -m pip install -U pip +``` + +Install the minimal Python deps: + +```bash +python -m pip install pyyaml +``` + +Optional (used by `tools/compare_forcing.py` / `tools/compare_output.py`): + +```bash +python -m pip install netCDF4 numpy +``` + +## Troubleshooting + +### `sf`/`terra` build problems + +- **macOS**: make sure Homebrew deps are installed and `pkg-config` is available. +- **Linux**: ensure you installed the `-dev` packages (`libgdal-dev`, `libgeos-dev`, `libproj-dev`, `libudunits2-dev`, `libnetcdf-dev`). + +If you see errors about PROJ resources (e.g. `proj.db`), confirm your PROJ installation is complete and on PATH. On Homebrew systems this may require a new shell session after installing `proj`. + +### zsh history expansion gotcha (`!`) + +In zsh, `!` triggers history expansion. This often breaks commands that contain version constraints like `pkg!=1.2.3`. + +- For R one-liners, this can also break expressions like `pkgs[!vapply(...)]`. +- Use single quotes: `python -m pip install 'pkg!=1.2.3'` +- Use single quotes: `Rscript -e 'miss<-pkgs[!vapply(pkgs, requireNamespace, logical(1), quietly=TRUE)]'` +- Or disable history expansion for the session: `set +H` diff --git a/tools/r/check_env.R b/tools/r/check_env.R new file mode 100644 index 0000000..a5d30c9 --- /dev/null +++ b/tools/r/check_env.R @@ -0,0 +1,154 @@ +#!/usr/bin/env Rscript + +options(warn = 1) + +parse_args <- function(args) { + out <- list( + lib = NULL, + skip_rshud = FALSE, + skip_lwgeom = FALSE, + help = FALSE + ) + + i <- 1 + while (i <= length(args)) { + a <- args[[i]] + if (a %in% c("-h", "--help")) { + out$help <- TRUE + i <- i + 1 + next + } + if (a == "--skip-rshud") { + out$skip_rshud <- TRUE + i <- i + 1 + next + } + if (a == "--skip-lwgeom") { + out$skip_lwgeom <- TRUE + i <- i + 1 + next + } + if (a == "--lib") { + if (i == length(args)) { + stop("Missing value for --lib") + } + out$lib <- args[[i + 1]] + i <- i + 2 + next + } + stop(paste0("Unknown argument: ", a)) + } + out +} + +script_path <- function() { + cmd <- commandArgs(trailingOnly = FALSE) + file_arg <- grep("^--file=", cmd, value = TRUE) + if (length(file_arg) == 0) return(NULL) + normalizePath(sub("^--file=", "", file_arg[[1]]), winslash = "/", mustWork = FALSE) +} + +info <- function(...) { + cat(..., "\n", sep = "") +} + +die <- function(..., status = 1) { + message(...) + quit(status = status, save = "no") +} + +check_pkg <- function(pkg) { + err <- NULL + ok <- tryCatch( + { + loadNamespace(pkg) + TRUE + }, + error = function(e) { + err <<- conditionMessage(e) + FALSE + } + ) + list(ok = ok, err = err) +} + +args <- parse_args(commandArgs(trailingOnly = TRUE)) +if (isTRUE(args$help)) { + info("Usage: Rscript tools/r/check_env.R [--lib ] [--skip-rshud] [--skip-lwgeom]") + info("") + info("Checks required R packages and exits non-zero if missing.") + quit(status = 0, save = "no") +} + +sp <- script_path() +script_dir <- if (is.null(sp)) getwd() else dirname(sp) +repo_root <- normalizePath(file.path(script_dir, "..", ".."), winslash = "/", mustWork = FALSE) +default_lib_dir <- normalizePath(file.path(repo_root, ".Rlib"), winslash = "/", mustWork = FALSE) + +if (!is.null(args$lib)) { + lib_dir <- args$lib + if (!grepl("^/", lib_dir) && !grepl("^[A-Za-z]:", lib_dir)) { + lib_dir <- file.path(repo_root, lib_dir) + } + lib_dir <- normalizePath(lib_dir, winslash = "/", mustWork = FALSE) + if (dir.exists(lib_dir) && !(lib_dir %in% .libPaths())) { + .libPaths(c(lib_dir, .libPaths())) + } +} + +if (dir.exists(default_lib_dir) && !(default_lib_dir %in% .libPaths())) { + .libPaths(c(default_lib_dir, .libPaths())) +} + +required <- c("sf", "terra", "ncdf4", "units") +if (!isTRUE(args$skip_lwgeom)) required <- c(required, "lwgeom") +if (!isTRUE(args$skip_rshud)) required <- c(required, "rSHUD") + +missing <- list() +for (pkg in required) { + res <- check_pkg(pkg) + if (isTRUE(res$ok)) next + missing[[pkg]] <- res$err +} + +if (length(missing) > 0) { + info("[check_env] R: ", R.version.string) + info("[check_env] .libPaths():") + for (p in .libPaths()) info(" - ", p) + info("") + info("[check_env] Missing / broken packages:") + for (pkg in names(missing)) { + reason <- missing[[pkg]] + if (is.null(reason) || !nzchar(reason)) { + info(" - ", pkg) + } else { + info(" - ", pkg, ": ", reason) + } + } + info("") + info("[check_env] Fix:") + install_cmd <- "Rscript tools/r/install_deps.R" + check_cmd <- "Rscript tools/r/check_env.R" + if (isTRUE(args$skip_rshud)) { + install_cmd <- paste(install_cmd, "--skip-rshud") + check_cmd <- paste(check_cmd, "--skip-rshud") + } + if (isTRUE(args$skip_lwgeom)) { + install_cmd <- paste(install_cmd, "--skip-lwgeom") + check_cmd <- paste(check_cmd, "--skip-lwgeom") + } + info(" ", install_cmd) + info(" R_LIBS_USER=\"$PWD/.Rlib\" ", check_cmd) + quit(status = 1, save = "no") +} + +info("[check_env] OK") +info("[check_env] R: ", R.version.string) +info("[check_env] .libPaths():") +for (p in .libPaths()) info(" - ", p) +info("") +for (pkg in required) { + ver <- as.character(utils::packageVersion(pkg)) + info("[check_env] ", pkg, ": ", ver) +} +quit(status = 0, save = "no") diff --git a/tools/r/install_deps.R b/tools/r/install_deps.R new file mode 100644 index 0000000..f2526d8 --- /dev/null +++ b/tools/r/install_deps.R @@ -0,0 +1,166 @@ +#!/usr/bin/env Rscript + +options(warn = 1) + +parse_args <- function(args) { + out <- list( + lib = NULL, + rshud = NULL, + skip_rshud = FALSE, + skip_lwgeom = FALSE, + help = FALSE + ) + + i <- 1 + while (i <= length(args)) { + a <- args[[i]] + if (a %in% c("-h", "--help")) { + out$help <- TRUE + i <- i + 1 + next + } + if (a == "--skip-rshud") { + out$skip_rshud <- TRUE + i <- i + 1 + next + } + if (a == "--skip-lwgeom") { + out$skip_lwgeom <- TRUE + i <- i + 1 + next + } + if (a %in% c("--lib", "--rshud")) { + if (i == length(args)) { + stop(paste0("Missing value for ", a)) + } + val <- args[[i + 1]] + if (a == "--lib") out$lib <- val + if (a == "--rshud") out$rshud <- val + i <- i + 2 + next + } + stop(paste0("Unknown argument: ", a)) + } + out +} + +script_path <- function() { + cmd <- commandArgs(trailingOnly = FALSE) + file_arg <- grep("^--file=", cmd, value = TRUE) + if (length(file_arg) == 0) return(NULL) + normalizePath(sub("^--file=", "", file_arg[[1]]), winslash = "/", mustWork = FALSE) +} + +die <- function(..., status = 1) { + message(...) + quit(status = status, save = "no") +} + +info <- function(...) { + cat(..., "\n", sep = "") +} + +args <- parse_args(commandArgs(trailingOnly = TRUE)) +if (isTRUE(args$help)) { + info("Usage: Rscript tools/r/install_deps.R [--lib ] [--rshud ] [--skip-rshud] [--skip-lwgeom]") + info("") + info("Installs required R packages into a project-local library (default: /.Rlib).") + info("Also installs local rSHUD from the git submodule by default.") + quit(status = 0, save = "no") +} + +sp <- script_path() +script_dir <- if (is.null(sp)) getwd() else dirname(sp) +repo_root <- normalizePath(file.path(script_dir, "..", ".."), winslash = "/", mustWork = FALSE) + +lib_dir <- if (!is.null(args$lib)) args$lib else file.path(repo_root, ".Rlib") +if (!grepl("^/", lib_dir) && !grepl("^[A-Za-z]:", lib_dir)) { + lib_dir <- file.path(repo_root, lib_dir) +} +lib_dir <- normalizePath(lib_dir, winslash = "/", mustWork = FALSE) + +dir.create(lib_dir, recursive = TRUE, showWarnings = FALSE) +.libPaths(c(lib_dir, .libPaths())) + +cran_repo <- Sys.getenv("SHUDNC_CRAN_REPO") +if (nchar(cran_repo) == 0) cran_repo <- "https://cloud.r-project.org" +options(repos = c(CRAN = cran_repo)) + +info("[install_deps] repo_root: ", repo_root) +info("[install_deps] lib_dir: ", lib_dir) +info("[install_deps] CRAN: ", getOption("repos")[["CRAN"]]) +info("") + +cran_pkgs <- c("sf", "terra", "ncdf4", "units") +if (!isTRUE(args$skip_lwgeom)) cran_pkgs <- c(cran_pkgs, "lwgeom") + +missing <- character(0) +for (pkg in cran_pkgs) { + if (requireNamespace(pkg, quietly = TRUE)) { + info("[install_deps] OK: ", pkg) + next + } + info("[install_deps] Installing: ", pkg) + tryCatch( + utils::install.packages(pkg, lib = lib_dir, dependencies = c("Depends", "Imports", "LinkingTo")), + error = function(e) die("[install_deps] Failed to install ", pkg, ": ", conditionMessage(e)) + ) + if (!requireNamespace(pkg, quietly = TRUE)) { + missing <- c(missing, pkg) + } +} + +if (!isTRUE(args$skip_rshud)) { + rshud_path <- if (!is.null(args$rshud)) args$rshud else file.path(repo_root, "rSHUD") + if (!grepl("^/", rshud_path) && !grepl("^[A-Za-z]:", rshud_path)) { + rshud_path <- file.path(repo_root, rshud_path) + } + rshud_path <- normalizePath(rshud_path, winslash = "/", mustWork = FALSE) + + if (!dir.exists(rshud_path)) { + die( + "[install_deps] rSHUD path not found: ", rshud_path, "\n", + "[install_deps] Did you init submodules?\n", + " git submodule update --init --recursive\n", + "[install_deps] Or re-run with --skip-rshud." + ) + } + + if (requireNamespace("rSHUD", quietly = TRUE)) { + info("[install_deps] OK: rSHUD (already installed)") + } else { + info("[install_deps] Installing local rSHUD: ", rshud_path) + r_bin <- file.path(R.home("bin"), "R") + if (!file.exists(r_bin)) r_bin <- "R" + + cmd_args <- c( + "CMD", + "INSTALL", + paste0("--library=", lib_dir), + rshud_path + ) + status <- system2(r_bin, args = cmd_args) + if (!identical(status, 0L)) { + die( + "[install_deps] R CMD INSTALL rSHUD failed (exit code ", status, ").\n", + "[install_deps] Note: current rSHUD may still depend on legacy GIS packages.\n", + "[install_deps] If you are in the middle of the sf/terra migration, re-run with:\n", + " Rscript tools/r/install_deps.R --skip-rshud\n" + ) + } + if (!requireNamespace("rSHUD", quietly = TRUE)) { + missing <- c(missing, "rSHUD") + } + } +} + +if (length(missing) > 0) { + die( + "[install_deps] Some packages are still missing after install:\n - ", + paste(missing, collapse = "\n - ") + ) +} + +info("") +info("[install_deps] Done.") +quit(status = 0, save = "no")