From 18b66373f42f6d9b3eede830f5bcc81c696eea40 Mon Sep 17 00:00:00 2001 From: Nirmal Gupta Date: Tue, 2 Jun 2026 12:06:51 -0500 Subject: [PATCH 1/5] feat(skill): package secscan as a Claude Code skill; config in a directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes that go together: 1. Config files moved into a single config/ directory so the host-side bind mount is one volume instead of a per-file mount. This makes packaging the scanner as a Claude Code skill clean — every per-deployment file (main config, 1Password env file) lives in one place that maps to /config inside the container. Moved (git tracked, history preserved via rename): config.example.yaml -> config/config.example.yaml .env.1password.tpl.example -> config/.env.1password.tpl.example And (gitignored, plain move): config.yaml -> config/config.yaml .env.1password.tpl -> config/.env.1password.tpl .gitignore updated. secscan.sh now defaults to config/ as the config directory, accepts --config-dir as a new flag, mounts the directory at /config:ro inside the container, and resolves env_file relative to that dir. SECSCAN_CONFIG_DIR env var added for skill use ("scan project foo uses this config dir, project bar uses that one"). Dockerfile unchanged — it already expected /config/config.yaml. 2. New `skill/` directory ships a Claude Code skill bundle: skill/SKILL.md agent instructions (frontmatter + body) skill/references/README.md high-level reference skill/references/CONFIG.md full config schema + 1Password walkthrough skill/references/RUN.md runbook, flags, exit codes, recovery skill/install.sh copies skill into ~/.claude/skills/secscan/ The skill is small — it's just instructions pointing at $SECSCAN_HOME for the actual scanner. Users clone this repo once, export SECSCAN_HOME, run skill/install.sh once. Updates to the scanner don't require re-installing the skill. The skill's SKILL.md documents the hard rules (never --no-dry-run without confirmation, never expose secrets, project board is source of truth). README updated with a "Claude Code skill" section and all config paths adjusted. 221 tests pass; `./secscan.sh check` reports green against the new layout. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 6 +- README.md | 40 +++- .../.env.1password.tpl.example | 0 .../config.example.yaml | 0 secscan.sh | 67 ++++-- skill/SKILL.md | 94 ++++++++ skill/install.sh | 38 +++ skill/references/CONFIG.md | 216 ++++++++++++++++++ skill/references/README.md | 81 +++++++ skill/references/RUN.md | 156 +++++++++++++ 10 files changed, 669 insertions(+), 29 deletions(-) rename .env.1password.tpl.example => config/.env.1password.tpl.example (100%) rename config.example.yaml => config/config.example.yaml (100%) create mode 100644 skill/SKILL.md create mode 100755 skill/install.sh create mode 100644 skill/references/CONFIG.md create mode 100644 skill/references/README.md create mode 100644 skill/references/RUN.md diff --git a/.gitignore b/.gitignore index a9cbaed..e2b11e4 100644 --- a/.gitignore +++ b/.gitignore @@ -11,11 +11,11 @@ htmlcov/ work/ /tmp_* -# Per-deployment config (use config.example.yaml as the template; keep secrets out of git) -config.yaml +# Per-deployment config (use config/config.example.yaml as the template; keep secrets out of git) +config/config.yaml # Personal 1Password reference template (paths to your vault items) -.env.1password.tpl +config/.env.1password.tpl # IDE .idea/ diff --git a/README.md b/README.md index 22943cd..8f0e814 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ Closing/fixing findings is out of scope — another system owns that. # - Category (dependency, secret, sast, iac, license) # 2. Copy the example config -cp config.example.yaml config.yaml -$EDITOR config.yaml # set repo, ref, project.owner, project.number +cp config/config.example.yaml config/config.yaml +$EDITOR config/config.yaml # set repo, ref, project.owner, project.number # 3. Set up secrets — pick ONE of the two paths in the next section @@ -86,8 +86,8 @@ brew install 1password-cli op signin # Copy the template and edit the vault/item paths to point at your own entries -cp .env.1password.tpl.example .env.1password.tpl -$EDITOR .env.1password.tpl +cp config/.env.1password.tpl.example config/.env.1password.tpl +$EDITOR config/.env.1password.tpl ``` `.env.1password.tpl` then looks like: @@ -157,10 +157,10 @@ Common failure modes and what `check` says: | Symptom | Fix | |---|---| -| `config not found` | `cp config.example.yaml config.yaml` | +| `config not found` | `cp config/config.example.yaml config/config.yaml` | | `GITHUB_TOKEN unset` (env source) | `export GITHUB_TOKEN=…` or switch to `secrets.source: "1password"` | | `op not installed` (1Password source) | `brew install 1password-cli && op signin` | -| `.env.1password.tpl missing` | `cp .env.1password.tpl.example .env.1password.tpl && $EDITOR …` | +| `.env.1password.tpl missing` | `cp config/.env.1password.tpl.example config/.env.1password.tpl && $EDITOR …` | | `SLACK_… unset` (slack.enabled=true) | Either export the var, add it to the 1Password env file, or set `slack.enabled: false` | | `image not built yet` | `./secscan.sh build` | | `docker daemon not reachable` | Start Docker Desktop | @@ -180,6 +180,34 @@ real binaries, run via `./secscan.sh run`. --- +## Claude Code skill + +This repo ships a [Claude Code](https://claude.com/claude-code) skill at +[`skill/`](skill/). It lets the agent drive secscan for you ("scan this repo +for security issues", "/secscan", etc.). + +Install once: + +```bash +git clone https://github.com/leverj/security-scanner.git ~/code/security-scanner +echo 'export SECSCAN_HOME=~/code/security-scanner' >> ~/.zshrc # or .bashrc +source ~/.zshrc + +~/code/security-scanner/skill/install.sh # copies skill to ~/.claude/skills/secscan/ +``` + +The skill bundle contains: + +- `skill/SKILL.md` — what the agent sees: when to invoke, operating procedure, hard rules. +- `skill/references/README.md` — high-level reference. +- `skill/references/CONFIG.md` — full config schema + 1Password walkthrough. +- `skill/references/RUN.md` — runbook, flags, exit codes, failure recovery. + +The scanner repo stays the single source of truth — the skill is just +instructions pointing at `$SECSCAN_HOME`. Update secscan by `git pull`-ing +in `$SECSCAN_HOME`; no need to re-run `install.sh` unless the skill +content itself changes. + ## Spec See [secscan-spec.md](secscan-spec.md) for the full design. diff --git a/.env.1password.tpl.example b/config/.env.1password.tpl.example similarity index 100% rename from .env.1password.tpl.example rename to config/.env.1password.tpl.example diff --git a/config.example.yaml b/config/config.example.yaml similarity index 100% rename from config.example.yaml rename to config/config.example.yaml diff --git a/secscan.sh b/secscan.sh index 42b88bd..8d458fc 100755 --- a/secscan.sh +++ b/secscan.sh @@ -18,13 +18,24 @@ # Required env (only when slack.enabled=true AND secrets.source=env): # the var named by slack.webhook_url_env (or BOTH channel_id_env and bot_token_env) # -# Config: defaults to ./config.yaml; override with `--config /path/to/cfg.yaml` before -# any other args, or set SECSCAN_CONFIG=... in env. +# Config layout (bind-mounted as a single directory into the container): +# +# config/config.yaml # required — main settings +# config/.env.1password.tpl # optional — only when secrets.source=1password +# +# Default config directory: ./config/. Override with one of: +# --config /path/to/cfg.yaml # explicit file path (its parent dir is mounted) +# SECSCAN_CONFIG=... # same thing via env var +# SECSCAN_CONFIG_DIR=... # mount this dir instead; expects config.yaml inside +# +# When the skill packages secscan, point SECSCAN_CONFIG_DIR at the per-project +# config the agent maintains for the user. set -euo pipefail IMAGE="${SECSCAN_IMAGE:-secscan:latest}" HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEFAULT_CONFIG_DIR="$HERE/config" die() { echo "error: $*" >&2; exit 1; } warn() { echo "warning: $*" >&2; } @@ -112,14 +123,15 @@ cmd_build() { } cmd_check() { - local config="${SECSCAN_CONFIG:-$HERE/config.yaml}" + local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config="${SECSCAN_CONFIG:-$config_dir/config.yaml}" local ok=1 echo "== config ==" if [[ -f "$config" ]]; then echo " ✓ $config" else - echo " ✗ $config (cp config.example.yaml config.yaml)" + echo " ✗ $config (cp config/config.example.yaml config/config.yaml)" ok=0 fi @@ -166,10 +178,12 @@ cmd_check() { ;; 1password|1Password|op) local ef; ef="$(read_config_field "$config" "secrets.env_file" ".env.1password.tpl")" - [[ "$ef" = /* ]] || ef="$HERE/$ef" + # Resolve env_file relative to the config directory (so the whole config/ + # dir is the unit of bind-mount). + [[ "$ef" = /* ]] || ef="$config_dir/$ef" if command -v op >/dev/null; then echo " ✓ op (1Password CLI) installed"; else echo " ✗ op not installed (brew install 1password-cli)"; ok=0; fi if op account list >/dev/null 2>&1; then echo " ✓ op signed in"; else echo " ⚠ op not signed in (run: op signin)"; fi - if [[ -f "$ef" ]]; then echo " ✓ $ef present"; else echo " ✗ $ef missing (cp .env.1password.tpl.example .env.1password.tpl)"; ok=0; fi + if [[ -f "$ef" ]]; then echo " ✓ $ef present"; else echo " ✗ $ef missing (cp config/.env.1password.tpl.example config/.env.1password.tpl)"; ok=0; fi ;; *) echo " ✗ secrets.source must be 'env' or '1password', got: $secrets_source" @@ -198,19 +212,24 @@ cmd_check() { cmd_run() { command -v docker >/dev/null || die "docker not on PATH" - local config="${SECSCAN_CONFIG:-$HERE/config.yaml}" + local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config="${SECSCAN_CONFIG:-$config_dir/config.yaml}" local extra_args=() local have_dry_run=0 while [[ $# -gt 0 ]]; do case "$1" in - --config) config="$2"; shift 2 ;; - --config=*) config="${1#--config=}"; shift ;; + --config) config="$2"; config_dir="$(dirname "$2")"; shift 2 ;; + --config=*) config="${1#--config=}"; config_dir="$(dirname "$config")"; shift ;; + --config-dir) config_dir="$2"; config="$2/config.yaml"; shift 2 ;; + --config-dir=*) config_dir="${1#--config-dir=}"; config="$config_dir/config.yaml"; shift ;; --dry-run) have_dry_run=1; extra_args+=("$1"); shift ;; --) shift; extra_args+=("$@"); break ;; *) extra_args+=("$1"); shift ;; esac done + # Canonicalize. + config_dir="$(cd "$config_dir" 2>/dev/null && pwd || echo "$config_dir")" # Default to --dry-run unless the caller asked for the real path. Build a new # array so `--no-dry-run` is removed cleanly (rather than replaced with ""). @@ -235,10 +254,11 @@ cmd_run() { error: config not found at $config To set up: - cp config.example.yaml config.yaml - \$EDITOR config.yaml # set repo, ref, parent_issue, secrets.source + cp config/config.example.yaml config/config.yaml + \$EDITOR config/config.yaml # set repo, ref, project, secrets.source See README.md ("Setup: secrets") for env-vs-1Password choice. +Or set SECSCAN_CONFIG_DIR=/path/to/your-config-dir to use a different directory. EOF exit 1 fi @@ -303,9 +323,9 @@ Run \`./secscan.sh check\` to see your full setup status. EOF exit 1 fi - echo "secrets: env (shell exports) slack: $SLACK_MODE" >&2 + echo "secrets: env (shell exports) slack: $SLACK_MODE config-dir: $config_dir" >&2 exec docker run --rm \ - -v "$config":/config/config.yaml:ro \ + -v "$config_dir":/config:ro \ "${env_args[@]}" \ "$IMAGE" "${extra_args[@]+"${extra_args[@]}"}" ;; @@ -323,14 +343,14 @@ EOF exit 1 fi local ef="$env_file" - [[ "$ef" = /* ]] || ef="$HERE/$ef" + [[ "$ef" = /* ]] || ef="$config_dir/$ef" if [[ ! -f "$ef" ]]; then cat >&2 <// paths + cp config/.env.1password.tpl.example config/.env.1password.tpl + \$EDITOR config/.env.1password.tpl # set op://// paths The template lists every env var secscan understands. EOF @@ -342,9 +362,9 @@ EOF grep -qE "^\s*${v}\s*=" "$ef" || warn "$v not referenced in $ef but slack.enabled=true; add 'op://...' line or set slack.enabled: false" done fi - echo "secrets: 1password ($ef) slack: $SLACK_MODE" >&2 + echo "secrets: 1password ($ef) slack: $SLACK_MODE config-dir: $config_dir" >&2 exec op run --env-file="$ef" -- docker run --rm \ - -v "$config":/config/config.yaml:ro \ + -v "$config_dir":/config:ro \ "${env_args[@]}" \ "$IMAGE" "${extra_args[@]+"${extra_args[@]}"}" ;; @@ -364,14 +384,21 @@ secscan.sh — build/run the secscan container usage: ./secscan.sh build - ./secscan.sh run [--config path/to/config.yaml] [--dry-run|--no-dry-run] [extra secscan args...] + ./secscan.sh run [--config path/to/config.yaml] + [--config-dir path/to/config_dir] + [--dry-run|--no-dry-run] + [extra secscan args...] ./secscan.sh check defaults: --dry-run is added unless you pass --no-dry-run - --config defaults to ./config.yaml (override with SECSCAN_CONFIG env) + --config-dir defaults to ./config/ (override with SECSCAN_CONFIG_DIR env) + --config defaults to /config.yaml (override with SECSCAN_CONFIG env) image tag defaults to "secscan:latest" (override with SECSCAN_IMAGE env) +The whole --config-dir is bind-mounted read-only at /config inside the container, +so any related files (the 1Password env template, etc.) ride along. + secrets (driven by config.yaml): secrets.source: env -> use already-exported shell variables secrets.source: 1password -> auto-wrap with \`op run --env-file=\` diff --git a/skill/SKILL.md b/skill/SKILL.md new file mode 100644 index 0000000..9d95313 --- /dev/null +++ b/skill/SKILL.md @@ -0,0 +1,94 @@ +--- +name: secscan +description: | + Run the secscan security scanner on a repo. Detects the stack, runs OSV-Scanner, + Gitleaks, Semgrep, Trivy, Trufflehog, and optionally Codex+Gemma LLM SAST with + cross-validation. Files each finding as an issue into a GitHub Projects v2 board. + Trigger when the user asks to "scan", "run secscan", "check security", "audit + dependencies / secrets / code", or types /secscan. +--- + +# secscan skill + +You're the agent operating the secscan security scanner. Your job is to invoke +it correctly, monitor its output, and report back to the user. + +## When to invoke + +Trigger on requests like: +- "scan this repo for security issues" +- "run secscan" +- "check for secrets / CVEs / SAST issues" +- "/secscan" +- "audit dependencies" + +If the user just says "scan" without context, ask which repo (current dir? a +different one?) and which ref (default: `main`). + +## What secscan needs + +- The **security-scanner repo** cloned somewhere (the tooling itself). Default + location: `$SECSCAN_HOME` env var; fall back to `~/code/security-scanner` if + unset. If neither exists, tell the user to clone + `https://github.com/leverj/security-scanner` and set `SECSCAN_HOME`. +- A **config directory** with `config.yaml` (and optionally `.env.1password.tpl`). + Default: `$SECSCAN_HOME/config/`. Override via `--config-dir` or + `SECSCAN_CONFIG_DIR`. +- Docker running (or another container runtime); secscan is delivered as a + container image. +- A **GitHub Projects v2 board** for findings, configured under `project:` in + the config. PAT must have `repo` + `project` scopes. + +## Operating procedure + +1. **Locate the tooling.** Check `$SECSCAN_HOME`, then `~/code/security-scanner`. + If neither exists, surface the README installation steps (see + `references/README.md`) and stop. + +2. **Check config.** Run `$SECSCAN_HOME/secscan.sh check`. If it reports any + `✗`, walk the user through the fix using `references/CONFIG.md` as the + source of truth. Common failure modes: + - missing `config/config.yaml` → copy from `config.example.yaml` + - missing GITHUB_TOKEN or 1Password setup → see `references/CONFIG.md` + - docker not running → tell the user to start Docker Desktop + +3. **Pick the right run mode.** + - **Default to `--dry-run`** for the first run; surface what would be filed. + - Only run `--no-dry-run` after the user explicitly confirms. + - Codex + Gemma LLM SAST and cross-validation are **off by default in + `config.yaml`**; flip them on only when the user asks for "deep" or "LLM" + scanning, and warn them about subscription cost (Codex) + Ollama + prerequisites (Gemma). + +4. **Run it.** From the security-scanner repo: + ```bash + $SECSCAN_HOME/secscan.sh run --dry-run + ``` + For LLM-on runs against a non-default config dir: + ```bash + $SECSCAN_HOME/secscan.sh run --config-dir /path/to/cfg --no-dry-run + ``` + See `references/RUN.md` for the full set of flags and exit codes. + +5. **Report.** Quote the summary line secscan emits (`summary: created=N + dup-skipped=N ...`), surface any per-scanner failures, and link to the + GitHub project board for triage. + +## Hard rules + +- **Never run with `--no-dry-run`** unless the user explicitly confirmed in the + current turn — the dry-run default is there to prevent surprise issue + creation. +- **Never expose secrets.** GITHUB_TOKEN, 1Password env contents, Slack + webhooks must never appear in your messages back to the user. secscan + scrubs these from its own logs; you must too. +- **Don't edit `config/config.yaml` silently.** If a value needs changing, + show the proposed diff and ask first. +- **Honor the project board as the source of truth.** Don't try to dedup + findings yourself; that's secscan's job (deterministic fingerprints). + +## Where to look for deeper info + +- `references/README.md` — installation, prerequisites, how dedup works +- `references/CONFIG.md` — full config schema + 1Password setup walkthrough +- `references/RUN.md` — runbook, flags, exit codes, troubleshooting diff --git a/skill/install.sh b/skill/install.sh new file mode 100755 index 0000000..78726b8 --- /dev/null +++ b/skill/install.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Install (or update) the secscan Claude Code skill. +# +# Copies this directory to ~/.claude/skills/secscan/ — or to $CLAUDE_SKILLS_DIR +# if that env var is set (CC honors a couple of locations; consult `cc --help` +# if your install puts skills elsewhere). +# +# Re-running this is safe: existing files are overwritten with the latest copy +# from the security-scanner repo. The user's $SECSCAN_HOME (where the scanner +# tooling itself lives) is unaffected. + +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEST_BASE="${CLAUDE_SKILLS_DIR:-$HOME/.claude/skills}" +DEST="$DEST_BASE/secscan" + +mkdir -p "$DEST_BASE" +if [[ -d "$DEST" ]]; then + echo "updating existing skill at $DEST" +else + echo "installing skill to $DEST" +fi + +# rsync if available (preserves timestamps cleanly); fall back to cp -R. +if command -v rsync >/dev/null; then + rsync -a --delete --exclude install.sh "$HERE/" "$DEST/" +else + rm -rf "$DEST" + mkdir -p "$DEST" + cp -R "$HERE/SKILL.md" "$HERE/references" "$DEST/" +fi + +echo "done." +echo +echo "Make sure these env vars are set in your shell so the skill can find the scanner:" +echo " export SECSCAN_HOME=$(cd "$HERE/.." && pwd)" +echo " # optionally: export SECSCAN_CONFIG_DIR=/path/to/per-project/config" diff --git a/skill/references/CONFIG.md b/skill/references/CONFIG.md new file mode 100644 index 0000000..e937a46 --- /dev/null +++ b/skill/references/CONFIG.md @@ -0,0 +1,216 @@ +# secscan config — reference + +The agent (you) reads this when configuring secscan for the user. The config +lives in **a directory**, not a single file — the whole directory is +bind-mounted into the container, so secrets-resolution files (1Password env +file, etc.) ride along with the main `config.yaml`. + +## Layout + +``` +config/ + config.yaml # required — main settings + config.example.yaml # committed template + .env.1password.tpl # required only when secrets.source: 1password + .env.1password.tpl.example # committed template +``` + +By default secscan looks at `$SECSCAN_HOME/config/`. Override with +`--config-dir` or `SECSCAN_CONFIG_DIR=/path/to/your-config`. + +## Required top-level keys + +```yaml +repo: "owner/name" # target repo to scan (must exist on GitHub) +ref: "main" # branch / tag / SHA to scan + +project: # target Projects v2 board for findings + owner: "owner" # org or user that owns the project + number: 5 # project number from the URL: /projects/ + +github_token_env: "GITHUB_TOKEN" # env var name that holds the PAT +``` + +**PAT scopes:** `repo` (full) + `project`. Classic PAT, not fine-grained +(fine-grained doesn't yet expose Projects v2 mutations as of late 2025). + +## Scanners — flip what runs + +```yaml +scanners: + osv: true # vulnerable language packages + gitleaks: true # secret patterns (with git history) + semgrep: true # SAST patterns + trivy: true # vuln + secret + IaC + license, all in one + trufflehog: true # verified-live secrets + syft: true # SBOM artifact (no project items filed) + + # LLM-driven SAST — off by default; opt-in. + codex: false # OpenAI Codex via local `codex` CLI (subscription) + gemma: false # Local Gemma 4 via Ollama +``` + +When **both** `codex` and `gemma` are true, cross-validation kicks in +automatically. See the `cross_validate:` block below. + +## Codex (subscription) + +```yaml +codex: + binary: "codex" # auto-detected on PATH + # model: "gpt-5-codex" # omit to use codex's configured default + timeout: 1200 # seconds; LLM scans take minutes on real repos +``` + +**Auth:** `codex login` outside this tool. secscan never sees an API key. + +**Prereq:** `codex` CLI installed (`brew install codex` or per docs) AND the +user is logged in via `codex login`. The runner refuses to start otherwise +with a clear "run `codex login`" message. + +## Gemma (local Ollama) + +```yaml +gemma: + # Falls back to triage.base_url / triage.model when blank — most users only + # configure Ollama once. + # base_url: "http://host.docker.internal:11434" + # model: "gemma4:26b" + # keep_alive: "5m" + timeout: 1800 + max_files: 60 # source files batched in one prompt + max_file_bytes: 12000 # per-file content cap (truncated past this) + max_total_bytes: 200000 # total prompt cap across all files +``` + +**Prereq:** Ollama running locally (or reachable via `host.docker.internal`) +and the named model pulled (`ollama pull gemma4:26b`). + +## Cross-validation + +```yaml +cross_validate: + enabled: true + codex_timeout: 300 # per-finding budget for codex reviewing a gemma flag + gemma_timeout: 180 # per-finding budget for gemma reviewing a codex flag +``` + +Only active when **both** `scanners.codex` and `scanners.gemma` are true. +Verdicts: `real` (no change), `false_positive` (severity downgraded one +notch — `high→medium`, `medium→low`, `low→info`; **`critical` stays +critical**), or `uncertain` (annotated, no change). Findings are **never +suppressed** — humans triage on the project board. + +## Triage (optional, post-scanner) + +Gemma also runs as a per-finding reviewer / Slack-intro writer, distinct from +its scanner role: + +```yaml +triage: + enabled: true + provider: "ollama" + model: "gemma4:26b" + base_url: "http://host.docker.internal:11434" + keep_alive: "5m" + timeout: 600 + prewarm: true # load the model in a background thread at startup + intro_timeout: 120 # tight cap on the Slack intro generation + # Granular feature flags (default conservative): + intro_enabled: true # cheap: one chat call total + prose_enabled: false # expensive: one call per new finding + fuzzy_dup_enabled: false # expensive: one call per new finding +``` + +## Slack (optional) + +```yaml +slack: + enabled: true + webhook_url_env: "SLACK_WEBHOOK_URL" # OR (mutually exclusive): + # channel_id_env: "SLACK_CHANNEL_ID" + # bot_token_env: "SLACK_BOT_TOKEN" +``` + +## Other knobs + +```yaml +paths: + exclude: # fnmatch globs + trailing-slash directory prefixes + - "archive/" + - "vendor/" + - ".github/scripts/" + +severity_floor: "low" # info | low | medium | high | critical +``` + +## Secrets — two paths + +### Path A: shell env (simplest) + +```yaml +secrets: + source: "env" +``` + +```bash +export GITHUB_TOKEN=github_pat_... +export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/... # only if slack.enabled +$SECSCAN_HOME/secscan.sh run +``` + +Put the `export` lines in `~/.zshrc` to persist. The script fails fast if any +required var is missing. + +### Path B: 1Password (recommended for daily use) + +```yaml +secrets: + source: "1password" + env_file: ".env.1password.tpl" # relative to the config/ dir +``` + +Setup: + +```bash +brew install 1password-cli +op signin + +cp config/.env.1password.tpl.example config/.env.1password.tpl +$EDITOR config/.env.1password.tpl +``` + +The file should look like: + +``` +GITHUB_TOKEN=op:////GITHUB_TOKEN +SLACK_WEBHOOK_URL=op:////SLACK_WEBHOOK_URL +``` + +Then just: + +```bash +$SECSCAN_HOME/secscan.sh run +``` + +`secscan.sh` auto-wraps the invocation with +`op run --env-file=config/.env.1password.tpl -- docker run ...`. Tokens are +pulled JIT into the process env, never written to disk and never on argv. + +### Path C: Docker secrets / CI + +Set `secrets.source: env` in `config.yaml` and let your orchestrator +(GitHub Actions, K8s, Docker Swarm) populate `GITHUB_TOKEN` etc. in the +container env. The script picks them up the same way. + +## Verifying + +After any config change: + +```bash +$SECSCAN_HOME/secscan.sh check +``` + +This reports the state of every prerequisite (config file, docker, image, +secrets path, Slack vars). Walk the user through any `✗` it surfaces — see +the troubleshooting table in **RUN.md**. diff --git a/skill/references/README.md b/skill/references/README.md new file mode 100644 index 0000000..027a5ec --- /dev/null +++ b/skill/references/README.md @@ -0,0 +1,81 @@ +# secscan — reference + +The agent (you) reads this when the user needs background on what secscan is, +how to install it, or what it produces. It's a condensed view; the root +`README.md` in the security-scanner repo is the authoritative source. + +## What it is + +Stateless single-repo security scanner. Detects a repo's tech stack, runs +several scanners on it, and files every finding as a deduplicated GitHub issue +attached to a Projects v2 board. + +The scanners (most run by default; LLM ones opt-in): + +| Scanner | What it finds | Default | +|---|---|---| +| OSV-Scanner | Vulnerable language packages (npm, pip, go, ...) | on | +| Gitleaks | Hardcoded secrets / keys (pattern + history) | on | +| Trufflehog | Verified-live secrets (validates against the vendor) | on | +| Semgrep | SAST patterns (eval, SQL concat, XSS, etc.) | on | +| Trivy | Vulns + secrets + IaC + license (all in one) | on | +| Syft | SBOM artifact (CycloneDX JSON) | on | +| Codex | LLM SAST via OpenAI Codex CLI (subscription) | **off** | +| Gemma | LLM SAST via local Ollama | **off** | + +When **both** Codex and Gemma are on, cross-validation runs — each tool +reviews the other's findings ("real / false_positive / uncertain"). False +positive verdicts downgrade severity one notch; critical is asymmetric (never +auto-downgrades). Findings are never suppressed. + +## Where state lives + +There is no internal database. The single source of truth is a **GitHub +Projects v2 board** the user owns. Each finding becomes an issue in the repo +plus a project item with `Severity` + `Category` single-select fields set. +Dedup is done by walking project items and reading deterministic fingerprints +embedded in issue bodies — once a finding is filed (or closed), it's never +re-filed. + +PAT scopes required: **`repo` + `project`** (classic PAT; fine-grained +doesn't yet expose Projects v2 mutations). + +## Install + +```bash +git clone https://github.com/leverj/security-scanner.git ~/code/security-scanner +export SECSCAN_HOME=~/code/security-scanner # add to .zshrc / .bashrc +cd $SECSCAN_HOME + +cp config/config.example.yaml config/config.yaml +$EDITOR config/config.yaml # see CONFIG.md + +./secscan.sh build # docker build secscan:latest +./secscan.sh check # verify everything is wired +./secscan.sh run # defaults to --dry-run +``` + +For 1Password-managed secrets and full config schema, see **CONFIG.md**. +For day-to-day operations and troubleshooting, see **RUN.md**. + +## Files inside `$SECSCAN_HOME/` + +``` +secscan/ # python package — scanners, sync, normalization +secscan.sh # wrapper around `docker run` +Dockerfile # builds secscan:latest (Python + all scanner binaries) +config/ # bind-mounted at /config inside the container + config.yaml # main settings (gitignored) + config.example.yaml # template (committed) + .env.1password.tpl # 1Password env file (gitignored) + .env.1password.tpl.example # template (committed) +skill/ # this skill bundle — what you're reading +``` + +The whole `config/` directory is the unit of bind-mount, so any file related +to secrets resolution rides along with the main config. + +## Spec + +The full design is in `$SECSCAN_HOME/secscan-spec.md` if the user wants the +deep dive (data model, fingerprint scheme, dedup rules, hostile-repo posture). diff --git a/skill/references/RUN.md b/skill/references/RUN.md new file mode 100644 index 0000000..f44c727 --- /dev/null +++ b/skill/references/RUN.md @@ -0,0 +1,156 @@ +# secscan — runbook + +How to invoke secscan, what flags exist, what exit codes mean, and how to +recover from common failures. The agent (you) reads this when actually +operating the scanner for the user. + +## The three commands + +```bash +$SECSCAN_HOME/secscan.sh build # docker build the image (once, or after upgrades) +$SECSCAN_HOME/secscan.sh check # verify prereqs — run this before each run if unsure +$SECSCAN_HOME/secscan.sh run [...] # actually scan +``` + +## `run` flags + +``` +--config Use a specific config.yaml. Its parent dir is mounted. +--config-dir Use a specific config DIR; expects config.yaml inside. +--dry-run Don't file any issues (DEFAULT — bias toward safety). +--no-dry-run Actually create issues / project items. +-- Everything after is passed verbatim to `python -m secscan`. +extra args Forwarded to the scanner CLI. +``` + +Defaults: +- `--dry-run` is added unless the caller passes `--no-dry-run`. +- `--config-dir` defaults to `$SECSCAN_HOME/config/` (override: + `SECSCAN_CONFIG_DIR` env). +- `--config` defaults to `/config.yaml` (override: + `SECSCAN_CONFIG` env). +- Image tag is `secscan:latest` (override: `SECSCAN_IMAGE`). + +## Exit codes + +| Code | Meaning | +|---|---| +| 0 | Success (findings may have been filed; existence of findings is not an error) | +| 2 | Bad config — fail before any scanner ran | +| 3 | All scanners failed — refused to report "all clear" | +| 4 | GitHub API failure (project not found, auth, etc.) | +| other non-zero | docker / shell / unexpected error | + +## Recipes + +### First-time dry-run + +```bash +$SECSCAN_HOME/secscan.sh check # are we wired? +$SECSCAN_HOME/secscan.sh run # dry-run by default +``` + +The summary line at the end looks like: + +``` +summary: created=0 dup-skipped=0 fuzzy-dup-skipped=0 below-floor=0 + total-findings=42 scanners-completed=5 scanners-failed=0 +``` + +If `created` is the number you expected to file, proceed to the real run. + +### Real run (after dry-run looks good) + +```bash +$SECSCAN_HOME/secscan.sh run --no-dry-run +``` + +**Hard rule:** never pass `--no-dry-run` unless the user explicitly confirmed +in the current turn. The default is there to prevent surprise issue creation. + +### Custom config dir (e.g. multiple projects) + +```bash +$SECSCAN_HOME/secscan.sh run --config-dir ~/.config/secscan/project-foo +``` + +The whole `~/.config/secscan/project-foo/` is bind-mounted at `/config` in +the container, so it should look exactly like the repo's `config/` layout +(at minimum: `config.yaml`). + +### Enable LLM SAST + cross-validation + +Edit `config.yaml`: + +```yaml +scanners: + codex: true + gemma: true +``` + +Then run. The first time you turn these on, do a `--dry-run` against a small +repo to calibrate signal/noise before pointing at a large codebase. + +**Prereq checks before flipping these on:** +- Codex: `command -v codex && codex doctor` shows `auth mode: chatgpt` + ("not logged in" → user must run `codex login` first). +- Gemma: `curl -sf $(grep base_url config/config.yaml | head -1 | awk '{print $2}' | tr -d '\"')/api/tags` + returns JSON (Ollama reachable) AND the model in `triage.model` / + `gemma.model` is pulled (`ollama list | grep gemma4:26b`). + +## What "completed" vs "failed" means + +A scanner that did NOT complete contributes **zero findings**. This is by +design — a crashed scanner must never read as "all clear" to downstream +tooling. The summary line distinguishes `scanners-completed` vs +`scanners-failed`. Investigate any failure before trusting a run. + +When a single scanner is failing repeatedly, you can flip it off in +`config.yaml` (`scanners.: false`) to unblock the rest while you fix it. + +## Common failures + +| Symptom | Cause | Fix | +|---|---|---| +| `config not found at ` | No `config.yaml` in config dir | `cp config/config.example.yaml config/config.yaml` and edit | +| `GITHUB_TOKEN not set` (env source) | Shell var unset | `export GITHUB_TOKEN=...` OR switch to 1Password (see CONFIG.md) | +| `op (1Password CLI) not installed` | 1Password path requires `op` | `brew install 1password-cli && op signin` | +| `op not signed in` | Signed-out 1P session | `op signin` | +| `.env.1password.tpl missing` | Per-user 1P env file not created | `cp config/.env.1password.tpl.example config/.env.1password.tpl && $EDITOR …` | +| `image not built yet` | No `secscan:latest` image | `$SECSCAN_HOME/secscan.sh build` | +| `docker daemon not reachable` | Docker Desktop not running | Start Docker Desktop | +| `GitHub API 404: project not found` | Wrong `project.owner` / `project.number`, or PAT missing `project` scope | Verify URL and PAT scopes (see CONFIG.md) | +| `scanner codex: NOT COMPLETED (auth failed — run `codex login` first)` | Codex CLI not authed | User runs `codex login` | +| `scanner gemma: NOT COMPLETED (ollama unreachable: ...)` | Ollama down or wrong URL | Start Ollama, or fix `gemma.base_url` | + +## Logs and artifacts + +- **stderr** is where secscan logs (one-line-per-event format). +- **Findings**: in the GitHub repo + the Projects v2 board configured under + `project:`. +- **SBOM** (when `scanners.syft: true`): written under `work/` inside the + container; the wrapper script wipes the container's `/work` on exit but + the SBOM path is logged via stderr (`sbom: cyclonedx -> /work/sbom-...`). +- **Slack** (when `slack.enabled: true`): per-category digest with severity + breakdown. + +## How dedup behaves + +Each finding has a deterministic fingerprint embedded in the issue body as +an HTML comment. On the next run, all project items (open AND closed) are +listed, fingerprints parsed back out, and any new finding whose fingerprint +already exists is **skipped — even if the existing issue is closed**. + +Closed = "humans triaged this; never re-file." If you need regression +re-surfacing, that's the external fixing system's concern, not secscan's. + +## Reporting back to the user + +After a run, surface: +1. The final summary line verbatim. +2. Any `scanners-failed` count above zero, with the per-scanner errors. +3. A link to the project board: `https://github.com/orgs//projects/`. +4. The dry-run / real-run mode, explicitly. + +Do **not** paste the full stderr log into your reply — it can be long. Quote +relevant excerpts only. From 1f50d075d22111f63a16d1d1967965c0eb93f996 Mon Sep 17 00:00:00 2001 From: Nirmal Gupta Date: Tue, 2 Jun 2026 12:25:07 -0500 Subject: [PATCH 2/5] feat(image): publishable docker image with self-describing manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shifts the skill packaging strategy: instead of bundling skill files in this repo, this repo produces a published Docker image (`leverj/security-scan`) that ships a SECSCAN-MANIFEST.yaml inside. The companion ai-skills repo hosts the actual Claude Code skill, which drives the image and reads the manifest to do informed upgrades. What's in this commit: - Drop `skill/` directory entirely — that was a wrong-repo prototype. The real skill lives in leverj/ai-skills. - `SECSCAN-MANIFEST.yaml` at repo root (baked into image at /app/SECSCAN-MANIFEST.yaml). A consumer skill reads it with: docker run --rm --entrypoint cat \\ leverj/security-scan: /app/SECSCAN-MANIFEST.yaml It declares the image version, config_schema_version, changelog, breaking_changes, and most importantly a typed list of: - config.new_fields (skill ADDs these to user's config.yaml if absent) - config.renamed_fields (skill renames in-place) - config.removed_fields (skill strips with user confirmation) This is the contract that lets the consumer skill do "image updated; here's what changes; want to upgrade?" without baking version-specific migration code into the skill itself. - `.github/workflows/publish.yml` — on a `v*` tag, builds amd64+arm64 images and pushes leverj/security-scan: + :latest to Docker Hub. A guard step verifies pyproject.toml's version + SECSCAN-MANIFEST.yaml's version match the tag before publishing. Requires repo secrets DOCKERHUB_USERNAME + DOCKERHUB_TOKEN. - Bump pyproject.toml version 0.1.0 -> 0.2.0 (matches manifest). - README: replace the (now-removed) "Claude Code skill" section with a short pointer to leverj/ai-skills. Local smoke test: docker build + `--entrypoint cat /app/SECSCAN-MANIFEST.yaml` returns the manifest verbatim. 221 tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/publish.yml | 98 +++++++++++++++ Dockerfile | 4 + README.md | 36 ++---- SECSCAN-MANIFEST.yaml | 116 ++++++++++++++++++ pyproject.toml | 4 +- skill/SKILL.md | 94 --------------- skill/install.sh | 38 ------ skill/references/CONFIG.md | 216 ---------------------------------- skill/references/README.md | 81 ------------- skill/references/RUN.md | 156 ------------------------ 10 files changed, 233 insertions(+), 610 deletions(-) create mode 100644 .github/workflows/publish.yml create mode 100644 SECSCAN-MANIFEST.yaml delete mode 100644 skill/SKILL.md delete mode 100755 skill/install.sh delete mode 100644 skill/references/CONFIG.md delete mode 100644 skill/references/README.md delete mode 100644 skill/references/RUN.md diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..5454b93 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,98 @@ +name: Publish image + +# Builds and publishes the secscan image to Docker Hub on every tag named v*. +# The tag must match the [project.version] in pyproject.toml and the +# `version:` in SECSCAN-MANIFEST.yaml (a guard step verifies this). +# +# Required repository secrets: +# DOCKERHUB_USERNAME the Docker Hub user/org that owns leverj/security-scan +# DOCKERHUB_TOKEN Docker Hub access token with read+write on the repo +# +# Cut a release: +# git tag v0.2.0 && git push origin v0.2.0 +# +# The workflow tags the image with: +# leverj/security-scan:v0.2.0 (immutable per release) +# leverj/security-scan:latest (always the most recent tag) + +on: + push: + tags: ["v*"] + workflow_dispatch: + inputs: + tag: + description: "Tag to build (e.g., v0.2.0). Must match pyproject.toml + manifest version." + required: true + +permissions: + contents: read + +env: + IMAGE: leverj/security-scan + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag || github.ref }} + + - name: Resolve tag + id: tag + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + tag="${{ github.event.inputs.tag }}" + else + tag="${GITHUB_REF#refs/tags/}" + fi + echo "tag=$tag" >> "$GITHUB_OUTPUT" + # Strip the leading 'v' for comparison against pyproject / manifest. + echo "version=${tag#v}" >> "$GITHUB_OUTPUT" + + - name: Verify version alignment + run: | + py_version=$(grep -E '^version\s*=' pyproject.toml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + mf_version=$(grep -E '^version:' SECSCAN-MANIFEST.yaml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + want='${{ steps.tag.outputs.version }}' + + echo "tag=$want pyproject=$py_version manifest=$mf_version" + + if [[ "$py_version" != "$want" ]]; then + echo "::error::pyproject.toml version ($py_version) != tag ($want). Bump pyproject.toml or fix the tag." >&2 + exit 1 + fi + if [[ "$mf_version" != "$want" ]]; then + echo "::error::SECSCAN-MANIFEST.yaml version ($mf_version) != tag ($want). Bump the manifest or fix the tag." >&2 + exit 1 + fi + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: true + # Multi-arch — runners build amd64/arm64 in parallel. + platforms: linux/amd64,linux/arm64 + tags: | + ${{ env.IMAGE }}:${{ steps.tag.outputs.tag }} + ${{ env.IMAGE }}:latest + provenance: false + + - name: Smoke-test the published image (manifest readable) + run: | + docker run --rm --entrypoint cat \ + "${{ env.IMAGE }}:${{ steps.tag.outputs.tag }}" \ + /app/SECSCAN-MANIFEST.yaml | head -5 diff --git a/Dockerfile b/Dockerfile index a97be28..a796b96 100644 --- a/Dockerfile +++ b/Dockerfile @@ -114,6 +114,10 @@ WORKDIR /app COPY pyproject.toml /app/pyproject.toml COPY secscan /app/secscan COPY README.md /app/README.md +# Manifest the consuming skill reads to see version + needed config migrations. +# Pull it out without starting the scanner: +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECSCAN-MANIFEST.yaml +COPY SECSCAN-MANIFEST.yaml /app/SECSCAN-MANIFEST.yaml RUN pip install --no-cache-dir /app # Make sure the mount points exist (no VOLUME directive — keeps `--rm` from diff --git a/README.md b/README.md index 8f0e814..2572e1e 100644 --- a/README.md +++ b/README.md @@ -180,33 +180,23 @@ real binaries, run via `./secscan.sh run`. --- -## Claude Code skill +## Use as a Claude Code skill -This repo ships a [Claude Code](https://claude.com/claude-code) skill at -[`skill/`](skill/). It lets the agent drive secscan for you ("scan this repo -for security issues", "/secscan", etc.). +The companion bundle at [`leverj/ai-skills`](https://github.com/leverj/ai-skills) +ships a `secscan` skill that drives this image directly: -Install once: - -```bash -git clone https://github.com/leverj/security-scanner.git ~/code/security-scanner -echo 'export SECSCAN_HOME=~/code/security-scanner' >> ~/.zshrc # or .bashrc -source ~/.zshrc - -~/code/security-scanner/skill/install.sh # copies skill to ~/.claude/skills/secscan/ +``` +/plugin marketplace add leverj/ai-skills +/plugin install leverj@leverj-ai-skills +# then: /leverj:secscan run ``` -The skill bundle contains: - -- `skill/SKILL.md` — what the agent sees: when to invoke, operating procedure, hard rules. -- `skill/references/README.md` — high-level reference. -- `skill/references/CONFIG.md` — full config schema + 1Password walkthrough. -- `skill/references/RUN.md` — runbook, flags, exit codes, failure recovery. - -The scanner repo stays the single source of truth — the skill is just -instructions pointing at `$SECSCAN_HOME`. Update secscan by `git pull`-ing -in `$SECSCAN_HOME`; no need to re-run `install.sh` unless the skill -content itself changes. +The skill pulls and runs the published Docker image +`leverj/security-scan:`, bind-mounts your `config/` directory at +`/config:ro`, and offers a user-confirmed upgrade flow when a newer image +version is available (the image ships a `SECSCAN-MANIFEST.yaml` describing +its version + any config fields the skill should add to your local +`config.yaml`). ## Spec diff --git a/SECSCAN-MANIFEST.yaml b/SECSCAN-MANIFEST.yaml new file mode 100644 index 0000000..7f510f5 --- /dev/null +++ b/SECSCAN-MANIFEST.yaml @@ -0,0 +1,116 @@ +# SECSCAN-MANIFEST.yaml +# +# Declarative contract between this image and any tool (skill, CI job, etc.) +# that drives it. Baked into the image at /app/SECSCAN-MANIFEST.yaml. +# +# Read it from outside the running container with: +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECSCAN-MANIFEST.yaml +# +# Contract: +# - `version` matches pyproject.toml's [project.version] and the docker tag. +# - `config_schema_version` bumps only when the YAML schema changes in a way +# that needs migration (adding optional fields with defaults does NOT +# require a bump; renames and removals do). +# - `config.new_fields` lets a smart skill add missing fields to a user's +# config.yaml on upgrade WITHOUT clobbering values they've already set. +# - `config.renamed_fields` and `config.removed_fields` capture +# breaking changes for the skill to surface to the user. +# +# Skills should compare a user's local "pinned tag" against the latest +# manifest, and on upgrade: +# 1. Show `changelog` to the user. +# 2. If `breaking_changes` is non-empty, require confirmation per item. +# 3. Apply `config.renamed_fields` (rename in-place; show diff). +# 4. Add any `config.new_fields` not already present (with documented defaults). +# 5. Strip any `config.removed_fields` (show diff, confirm). +# +# Adding to this file in a new release is non-breaking. Removing fields is +# breaking — skills must tolerate older manifests missing keys. + +version: "0.2.0" +config_schema_version: 2 +docker_image: "leverj/security-scan" +released: "2026-06-02" + +# One-liners for the upgrade prompt the skill shows users. +changelog: + - "BREAKING: config moved from a single file to a directory (config/config.yaml). Bind-mount config/ at /config:ro." + - "BREAKING: replaced parent_issue (int) with project.{owner,number} — findings file into a GitHub Projects v2 board, not as sub-issues." + - "PAT now needs `project` scope in addition to `repo`." + - "Added Codex + Gemma LLM SAST scanners (off by default) with bidirectional cross-validation." + - "Added bundled Semgrep rules: XSS, SQLi, Supabase migration patterns." + +breaking_changes: + - id: "config-as-directory" + summary: "The host-side mount target is now the config DIRECTORY, not a single config.yaml file." + user_action: "Move config.yaml + .env.1password.tpl into a config/ directory and bind-mount that directory at /config:ro." + - id: "projects-v2" + summary: "parent_issue (int) is removed. Findings now file into a GitHub Projects v2 board." + user_action: "Add `project: {owner, number}` block to config.yaml; add `project` scope to your PAT; optionally bulk-add existing sub-issues to the new board for clean dedup on the first run." + +config: + # Optional fields the skill should ADD to a user's config.yaml when missing. + # `default` is the value to insert. `since` is the schema version that + # introduced the field — skills can use it to decide whether to apply. + new_fields: + - path: "project.owner" + since: 2 + required: true + default: null + note: "Org or user that owns the target Projects v2 board. Required." + - path: "project.number" + since: 2 + required: true + default: null + note: "Project number (the integer in the URL: /projects/). Required." + - path: "scanners.codex" + since: 2 + default: false + note: "Enable OpenAI Codex LLM SAST (uses your local `codex` CLI subscription; no API key)." + - path: "scanners.gemma" + since: 2 + default: false + note: "Enable local Gemma LLM SAST via Ollama." + - path: "codex" + since: 2 + default: + binary: "codex" + model: null + timeout: 1200 + note: "Codex CLI tunables. Only used when scanners.codex is true." + - path: "gemma" + since: 2 + default: + base_url: null + model: null + keep_alive: null + timeout: 1800 + max_files: 60 + max_file_bytes: 12000 + max_total_bytes: 200000 + note: "Gemma SAST tunables. base_url/model/keep_alive fall back to triage:* when null." + - path: "cross_validate" + since: 2 + default: + enabled: true + codex_timeout: 300 + gemma_timeout: 180 + note: "When both scanners.codex AND scanners.gemma are true, each reviews the other's findings. False positives downgrade severity one notch; critical never auto-downgrades; findings are never suppressed." + + # Fields that were renamed. Skill should apply the rename in the user's config. + renamed_fields: + - from: "parent_issue" + to: "project" + since: 2 + note: "parent_issue (int) -> project (mapping). secscan no longer files as sub-issues under a parent epic; findings are flat items in a Projects v2 board. The skill should drop parent_issue from the user's config and prompt for project.{owner,number}." + + removed_fields: [] + +# Files inside the image worth knowing about (for documentation purposes — +# skills don't typically need to reach in past the entrypoint). +image_paths: + manifest: "/app/SECSCAN-MANIFEST.yaml" + source: "/app/secscan/" + rules: "/app/secscan/rules/" + config_mount: "/config" + work_mount: "/work" diff --git a/pyproject.toml b/pyproject.toml index f6e65bc..efdf6f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "secscan" -version = "0.1.0" -description = "Stateless single-repo security scanner that files findings as GitHub sub-issues" +version = "0.2.0" +description = "Stateless single-repo security scanner; files findings into a GitHub Projects v2 board" requires-python = ">=3.11" dependencies = [ "PyYAML>=6.0", diff --git a/skill/SKILL.md b/skill/SKILL.md deleted file mode 100644 index 9d95313..0000000 --- a/skill/SKILL.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -name: secscan -description: | - Run the secscan security scanner on a repo. Detects the stack, runs OSV-Scanner, - Gitleaks, Semgrep, Trivy, Trufflehog, and optionally Codex+Gemma LLM SAST with - cross-validation. Files each finding as an issue into a GitHub Projects v2 board. - Trigger when the user asks to "scan", "run secscan", "check security", "audit - dependencies / secrets / code", or types /secscan. ---- - -# secscan skill - -You're the agent operating the secscan security scanner. Your job is to invoke -it correctly, monitor its output, and report back to the user. - -## When to invoke - -Trigger on requests like: -- "scan this repo for security issues" -- "run secscan" -- "check for secrets / CVEs / SAST issues" -- "/secscan" -- "audit dependencies" - -If the user just says "scan" without context, ask which repo (current dir? a -different one?) and which ref (default: `main`). - -## What secscan needs - -- The **security-scanner repo** cloned somewhere (the tooling itself). Default - location: `$SECSCAN_HOME` env var; fall back to `~/code/security-scanner` if - unset. If neither exists, tell the user to clone - `https://github.com/leverj/security-scanner` and set `SECSCAN_HOME`. -- A **config directory** with `config.yaml` (and optionally `.env.1password.tpl`). - Default: `$SECSCAN_HOME/config/`. Override via `--config-dir` or - `SECSCAN_CONFIG_DIR`. -- Docker running (or another container runtime); secscan is delivered as a - container image. -- A **GitHub Projects v2 board** for findings, configured under `project:` in - the config. PAT must have `repo` + `project` scopes. - -## Operating procedure - -1. **Locate the tooling.** Check `$SECSCAN_HOME`, then `~/code/security-scanner`. - If neither exists, surface the README installation steps (see - `references/README.md`) and stop. - -2. **Check config.** Run `$SECSCAN_HOME/secscan.sh check`. If it reports any - `✗`, walk the user through the fix using `references/CONFIG.md` as the - source of truth. Common failure modes: - - missing `config/config.yaml` → copy from `config.example.yaml` - - missing GITHUB_TOKEN or 1Password setup → see `references/CONFIG.md` - - docker not running → tell the user to start Docker Desktop - -3. **Pick the right run mode.** - - **Default to `--dry-run`** for the first run; surface what would be filed. - - Only run `--no-dry-run` after the user explicitly confirms. - - Codex + Gemma LLM SAST and cross-validation are **off by default in - `config.yaml`**; flip them on only when the user asks for "deep" or "LLM" - scanning, and warn them about subscription cost (Codex) + Ollama - prerequisites (Gemma). - -4. **Run it.** From the security-scanner repo: - ```bash - $SECSCAN_HOME/secscan.sh run --dry-run - ``` - For LLM-on runs against a non-default config dir: - ```bash - $SECSCAN_HOME/secscan.sh run --config-dir /path/to/cfg --no-dry-run - ``` - See `references/RUN.md` for the full set of flags and exit codes. - -5. **Report.** Quote the summary line secscan emits (`summary: created=N - dup-skipped=N ...`), surface any per-scanner failures, and link to the - GitHub project board for triage. - -## Hard rules - -- **Never run with `--no-dry-run`** unless the user explicitly confirmed in the - current turn — the dry-run default is there to prevent surprise issue - creation. -- **Never expose secrets.** GITHUB_TOKEN, 1Password env contents, Slack - webhooks must never appear in your messages back to the user. secscan - scrubs these from its own logs; you must too. -- **Don't edit `config/config.yaml` silently.** If a value needs changing, - show the proposed diff and ask first. -- **Honor the project board as the source of truth.** Don't try to dedup - findings yourself; that's secscan's job (deterministic fingerprints). - -## Where to look for deeper info - -- `references/README.md` — installation, prerequisites, how dedup works -- `references/CONFIG.md` — full config schema + 1Password setup walkthrough -- `references/RUN.md` — runbook, flags, exit codes, troubleshooting diff --git a/skill/install.sh b/skill/install.sh deleted file mode 100755 index 78726b8..0000000 --- a/skill/install.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# Install (or update) the secscan Claude Code skill. -# -# Copies this directory to ~/.claude/skills/secscan/ — or to $CLAUDE_SKILLS_DIR -# if that env var is set (CC honors a couple of locations; consult `cc --help` -# if your install puts skills elsewhere). -# -# Re-running this is safe: existing files are overwritten with the latest copy -# from the security-scanner repo. The user's $SECSCAN_HOME (where the scanner -# tooling itself lives) is unaffected. - -set -euo pipefail - -HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -DEST_BASE="${CLAUDE_SKILLS_DIR:-$HOME/.claude/skills}" -DEST="$DEST_BASE/secscan" - -mkdir -p "$DEST_BASE" -if [[ -d "$DEST" ]]; then - echo "updating existing skill at $DEST" -else - echo "installing skill to $DEST" -fi - -# rsync if available (preserves timestamps cleanly); fall back to cp -R. -if command -v rsync >/dev/null; then - rsync -a --delete --exclude install.sh "$HERE/" "$DEST/" -else - rm -rf "$DEST" - mkdir -p "$DEST" - cp -R "$HERE/SKILL.md" "$HERE/references" "$DEST/" -fi - -echo "done." -echo -echo "Make sure these env vars are set in your shell so the skill can find the scanner:" -echo " export SECSCAN_HOME=$(cd "$HERE/.." && pwd)" -echo " # optionally: export SECSCAN_CONFIG_DIR=/path/to/per-project/config" diff --git a/skill/references/CONFIG.md b/skill/references/CONFIG.md deleted file mode 100644 index e937a46..0000000 --- a/skill/references/CONFIG.md +++ /dev/null @@ -1,216 +0,0 @@ -# secscan config — reference - -The agent (you) reads this when configuring secscan for the user. The config -lives in **a directory**, not a single file — the whole directory is -bind-mounted into the container, so secrets-resolution files (1Password env -file, etc.) ride along with the main `config.yaml`. - -## Layout - -``` -config/ - config.yaml # required — main settings - config.example.yaml # committed template - .env.1password.tpl # required only when secrets.source: 1password - .env.1password.tpl.example # committed template -``` - -By default secscan looks at `$SECSCAN_HOME/config/`. Override with -`--config-dir` or `SECSCAN_CONFIG_DIR=/path/to/your-config`. - -## Required top-level keys - -```yaml -repo: "owner/name" # target repo to scan (must exist on GitHub) -ref: "main" # branch / tag / SHA to scan - -project: # target Projects v2 board for findings - owner: "owner" # org or user that owns the project - number: 5 # project number from the URL: /projects/ - -github_token_env: "GITHUB_TOKEN" # env var name that holds the PAT -``` - -**PAT scopes:** `repo` (full) + `project`. Classic PAT, not fine-grained -(fine-grained doesn't yet expose Projects v2 mutations as of late 2025). - -## Scanners — flip what runs - -```yaml -scanners: - osv: true # vulnerable language packages - gitleaks: true # secret patterns (with git history) - semgrep: true # SAST patterns - trivy: true # vuln + secret + IaC + license, all in one - trufflehog: true # verified-live secrets - syft: true # SBOM artifact (no project items filed) - - # LLM-driven SAST — off by default; opt-in. - codex: false # OpenAI Codex via local `codex` CLI (subscription) - gemma: false # Local Gemma 4 via Ollama -``` - -When **both** `codex` and `gemma` are true, cross-validation kicks in -automatically. See the `cross_validate:` block below. - -## Codex (subscription) - -```yaml -codex: - binary: "codex" # auto-detected on PATH - # model: "gpt-5-codex" # omit to use codex's configured default - timeout: 1200 # seconds; LLM scans take minutes on real repos -``` - -**Auth:** `codex login` outside this tool. secscan never sees an API key. - -**Prereq:** `codex` CLI installed (`brew install codex` or per docs) AND the -user is logged in via `codex login`. The runner refuses to start otherwise -with a clear "run `codex login`" message. - -## Gemma (local Ollama) - -```yaml -gemma: - # Falls back to triage.base_url / triage.model when blank — most users only - # configure Ollama once. - # base_url: "http://host.docker.internal:11434" - # model: "gemma4:26b" - # keep_alive: "5m" - timeout: 1800 - max_files: 60 # source files batched in one prompt - max_file_bytes: 12000 # per-file content cap (truncated past this) - max_total_bytes: 200000 # total prompt cap across all files -``` - -**Prereq:** Ollama running locally (or reachable via `host.docker.internal`) -and the named model pulled (`ollama pull gemma4:26b`). - -## Cross-validation - -```yaml -cross_validate: - enabled: true - codex_timeout: 300 # per-finding budget for codex reviewing a gemma flag - gemma_timeout: 180 # per-finding budget for gemma reviewing a codex flag -``` - -Only active when **both** `scanners.codex` and `scanners.gemma` are true. -Verdicts: `real` (no change), `false_positive` (severity downgraded one -notch — `high→medium`, `medium→low`, `low→info`; **`critical` stays -critical**), or `uncertain` (annotated, no change). Findings are **never -suppressed** — humans triage on the project board. - -## Triage (optional, post-scanner) - -Gemma also runs as a per-finding reviewer / Slack-intro writer, distinct from -its scanner role: - -```yaml -triage: - enabled: true - provider: "ollama" - model: "gemma4:26b" - base_url: "http://host.docker.internal:11434" - keep_alive: "5m" - timeout: 600 - prewarm: true # load the model in a background thread at startup - intro_timeout: 120 # tight cap on the Slack intro generation - # Granular feature flags (default conservative): - intro_enabled: true # cheap: one chat call total - prose_enabled: false # expensive: one call per new finding - fuzzy_dup_enabled: false # expensive: one call per new finding -``` - -## Slack (optional) - -```yaml -slack: - enabled: true - webhook_url_env: "SLACK_WEBHOOK_URL" # OR (mutually exclusive): - # channel_id_env: "SLACK_CHANNEL_ID" - # bot_token_env: "SLACK_BOT_TOKEN" -``` - -## Other knobs - -```yaml -paths: - exclude: # fnmatch globs + trailing-slash directory prefixes - - "archive/" - - "vendor/" - - ".github/scripts/" - -severity_floor: "low" # info | low | medium | high | critical -``` - -## Secrets — two paths - -### Path A: shell env (simplest) - -```yaml -secrets: - source: "env" -``` - -```bash -export GITHUB_TOKEN=github_pat_... -export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/... # only if slack.enabled -$SECSCAN_HOME/secscan.sh run -``` - -Put the `export` lines in `~/.zshrc` to persist. The script fails fast if any -required var is missing. - -### Path B: 1Password (recommended for daily use) - -```yaml -secrets: - source: "1password" - env_file: ".env.1password.tpl" # relative to the config/ dir -``` - -Setup: - -```bash -brew install 1password-cli -op signin - -cp config/.env.1password.tpl.example config/.env.1password.tpl -$EDITOR config/.env.1password.tpl -``` - -The file should look like: - -``` -GITHUB_TOKEN=op:////GITHUB_TOKEN -SLACK_WEBHOOK_URL=op:////SLACK_WEBHOOK_URL -``` - -Then just: - -```bash -$SECSCAN_HOME/secscan.sh run -``` - -`secscan.sh` auto-wraps the invocation with -`op run --env-file=config/.env.1password.tpl -- docker run ...`. Tokens are -pulled JIT into the process env, never written to disk and never on argv. - -### Path C: Docker secrets / CI - -Set `secrets.source: env` in `config.yaml` and let your orchestrator -(GitHub Actions, K8s, Docker Swarm) populate `GITHUB_TOKEN` etc. in the -container env. The script picks them up the same way. - -## Verifying - -After any config change: - -```bash -$SECSCAN_HOME/secscan.sh check -``` - -This reports the state of every prerequisite (config file, docker, image, -secrets path, Slack vars). Walk the user through any `✗` it surfaces — see -the troubleshooting table in **RUN.md**. diff --git a/skill/references/README.md b/skill/references/README.md deleted file mode 100644 index 027a5ec..0000000 --- a/skill/references/README.md +++ /dev/null @@ -1,81 +0,0 @@ -# secscan — reference - -The agent (you) reads this when the user needs background on what secscan is, -how to install it, or what it produces. It's a condensed view; the root -`README.md` in the security-scanner repo is the authoritative source. - -## What it is - -Stateless single-repo security scanner. Detects a repo's tech stack, runs -several scanners on it, and files every finding as a deduplicated GitHub issue -attached to a Projects v2 board. - -The scanners (most run by default; LLM ones opt-in): - -| Scanner | What it finds | Default | -|---|---|---| -| OSV-Scanner | Vulnerable language packages (npm, pip, go, ...) | on | -| Gitleaks | Hardcoded secrets / keys (pattern + history) | on | -| Trufflehog | Verified-live secrets (validates against the vendor) | on | -| Semgrep | SAST patterns (eval, SQL concat, XSS, etc.) | on | -| Trivy | Vulns + secrets + IaC + license (all in one) | on | -| Syft | SBOM artifact (CycloneDX JSON) | on | -| Codex | LLM SAST via OpenAI Codex CLI (subscription) | **off** | -| Gemma | LLM SAST via local Ollama | **off** | - -When **both** Codex and Gemma are on, cross-validation runs — each tool -reviews the other's findings ("real / false_positive / uncertain"). False -positive verdicts downgrade severity one notch; critical is asymmetric (never -auto-downgrades). Findings are never suppressed. - -## Where state lives - -There is no internal database. The single source of truth is a **GitHub -Projects v2 board** the user owns. Each finding becomes an issue in the repo -plus a project item with `Severity` + `Category` single-select fields set. -Dedup is done by walking project items and reading deterministic fingerprints -embedded in issue bodies — once a finding is filed (or closed), it's never -re-filed. - -PAT scopes required: **`repo` + `project`** (classic PAT; fine-grained -doesn't yet expose Projects v2 mutations). - -## Install - -```bash -git clone https://github.com/leverj/security-scanner.git ~/code/security-scanner -export SECSCAN_HOME=~/code/security-scanner # add to .zshrc / .bashrc -cd $SECSCAN_HOME - -cp config/config.example.yaml config/config.yaml -$EDITOR config/config.yaml # see CONFIG.md - -./secscan.sh build # docker build secscan:latest -./secscan.sh check # verify everything is wired -./secscan.sh run # defaults to --dry-run -``` - -For 1Password-managed secrets and full config schema, see **CONFIG.md**. -For day-to-day operations and troubleshooting, see **RUN.md**. - -## Files inside `$SECSCAN_HOME/` - -``` -secscan/ # python package — scanners, sync, normalization -secscan.sh # wrapper around `docker run` -Dockerfile # builds secscan:latest (Python + all scanner binaries) -config/ # bind-mounted at /config inside the container - config.yaml # main settings (gitignored) - config.example.yaml # template (committed) - .env.1password.tpl # 1Password env file (gitignored) - .env.1password.tpl.example # template (committed) -skill/ # this skill bundle — what you're reading -``` - -The whole `config/` directory is the unit of bind-mount, so any file related -to secrets resolution rides along with the main config. - -## Spec - -The full design is in `$SECSCAN_HOME/secscan-spec.md` if the user wants the -deep dive (data model, fingerprint scheme, dedup rules, hostile-repo posture). diff --git a/skill/references/RUN.md b/skill/references/RUN.md deleted file mode 100644 index f44c727..0000000 --- a/skill/references/RUN.md +++ /dev/null @@ -1,156 +0,0 @@ -# secscan — runbook - -How to invoke secscan, what flags exist, what exit codes mean, and how to -recover from common failures. The agent (you) reads this when actually -operating the scanner for the user. - -## The three commands - -```bash -$SECSCAN_HOME/secscan.sh build # docker build the image (once, or after upgrades) -$SECSCAN_HOME/secscan.sh check # verify prereqs — run this before each run if unsure -$SECSCAN_HOME/secscan.sh run [...] # actually scan -``` - -## `run` flags - -``` ---config Use a specific config.yaml. Its parent dir is mounted. ---config-dir Use a specific config DIR; expects config.yaml inside. ---dry-run Don't file any issues (DEFAULT — bias toward safety). ---no-dry-run Actually create issues / project items. --- Everything after is passed verbatim to `python -m secscan`. -extra args Forwarded to the scanner CLI. -``` - -Defaults: -- `--dry-run` is added unless the caller passes `--no-dry-run`. -- `--config-dir` defaults to `$SECSCAN_HOME/config/` (override: - `SECSCAN_CONFIG_DIR` env). -- `--config` defaults to `/config.yaml` (override: - `SECSCAN_CONFIG` env). -- Image tag is `secscan:latest` (override: `SECSCAN_IMAGE`). - -## Exit codes - -| Code | Meaning | -|---|---| -| 0 | Success (findings may have been filed; existence of findings is not an error) | -| 2 | Bad config — fail before any scanner ran | -| 3 | All scanners failed — refused to report "all clear" | -| 4 | GitHub API failure (project not found, auth, etc.) | -| other non-zero | docker / shell / unexpected error | - -## Recipes - -### First-time dry-run - -```bash -$SECSCAN_HOME/secscan.sh check # are we wired? -$SECSCAN_HOME/secscan.sh run # dry-run by default -``` - -The summary line at the end looks like: - -``` -summary: created=0 dup-skipped=0 fuzzy-dup-skipped=0 below-floor=0 - total-findings=42 scanners-completed=5 scanners-failed=0 -``` - -If `created` is the number you expected to file, proceed to the real run. - -### Real run (after dry-run looks good) - -```bash -$SECSCAN_HOME/secscan.sh run --no-dry-run -``` - -**Hard rule:** never pass `--no-dry-run` unless the user explicitly confirmed -in the current turn. The default is there to prevent surprise issue creation. - -### Custom config dir (e.g. multiple projects) - -```bash -$SECSCAN_HOME/secscan.sh run --config-dir ~/.config/secscan/project-foo -``` - -The whole `~/.config/secscan/project-foo/` is bind-mounted at `/config` in -the container, so it should look exactly like the repo's `config/` layout -(at minimum: `config.yaml`). - -### Enable LLM SAST + cross-validation - -Edit `config.yaml`: - -```yaml -scanners: - codex: true - gemma: true -``` - -Then run. The first time you turn these on, do a `--dry-run` against a small -repo to calibrate signal/noise before pointing at a large codebase. - -**Prereq checks before flipping these on:** -- Codex: `command -v codex && codex doctor` shows `auth mode: chatgpt` - ("not logged in" → user must run `codex login` first). -- Gemma: `curl -sf $(grep base_url config/config.yaml | head -1 | awk '{print $2}' | tr -d '\"')/api/tags` - returns JSON (Ollama reachable) AND the model in `triage.model` / - `gemma.model` is pulled (`ollama list | grep gemma4:26b`). - -## What "completed" vs "failed" means - -A scanner that did NOT complete contributes **zero findings**. This is by -design — a crashed scanner must never read as "all clear" to downstream -tooling. The summary line distinguishes `scanners-completed` vs -`scanners-failed`. Investigate any failure before trusting a run. - -When a single scanner is failing repeatedly, you can flip it off in -`config.yaml` (`scanners.: false`) to unblock the rest while you fix it. - -## Common failures - -| Symptom | Cause | Fix | -|---|---|---| -| `config not found at ` | No `config.yaml` in config dir | `cp config/config.example.yaml config/config.yaml` and edit | -| `GITHUB_TOKEN not set` (env source) | Shell var unset | `export GITHUB_TOKEN=...` OR switch to 1Password (see CONFIG.md) | -| `op (1Password CLI) not installed` | 1Password path requires `op` | `brew install 1password-cli && op signin` | -| `op not signed in` | Signed-out 1P session | `op signin` | -| `.env.1password.tpl missing` | Per-user 1P env file not created | `cp config/.env.1password.tpl.example config/.env.1password.tpl && $EDITOR …` | -| `image not built yet` | No `secscan:latest` image | `$SECSCAN_HOME/secscan.sh build` | -| `docker daemon not reachable` | Docker Desktop not running | Start Docker Desktop | -| `GitHub API 404: project not found` | Wrong `project.owner` / `project.number`, or PAT missing `project` scope | Verify URL and PAT scopes (see CONFIG.md) | -| `scanner codex: NOT COMPLETED (auth failed — run `codex login` first)` | Codex CLI not authed | User runs `codex login` | -| `scanner gemma: NOT COMPLETED (ollama unreachable: ...)` | Ollama down or wrong URL | Start Ollama, or fix `gemma.base_url` | - -## Logs and artifacts - -- **stderr** is where secscan logs (one-line-per-event format). -- **Findings**: in the GitHub repo + the Projects v2 board configured under - `project:`. -- **SBOM** (when `scanners.syft: true`): written under `work/` inside the - container; the wrapper script wipes the container's `/work` on exit but - the SBOM path is logged via stderr (`sbom: cyclonedx -> /work/sbom-...`). -- **Slack** (when `slack.enabled: true`): per-category digest with severity - breakdown. - -## How dedup behaves - -Each finding has a deterministic fingerprint embedded in the issue body as -an HTML comment. On the next run, all project items (open AND closed) are -listed, fingerprints parsed back out, and any new finding whose fingerprint -already exists is **skipped — even if the existing issue is closed**. - -Closed = "humans triaged this; never re-file." If you need regression -re-surfacing, that's the external fixing system's concern, not secscan's. - -## Reporting back to the user - -After a run, surface: -1. The final summary line verbatim. -2. Any `scanners-failed` count above zero, with the per-scanner errors. -3. A link to the project board: `https://github.com/orgs//projects/`. -4. The dry-run / real-run mode, explicitly. - -Do **not** paste the full stderr log into your reply — it can be long. Quote -relevant excerpts only. From ea8de061f1af8d71af8b146b6b67126e4fe8677d Mon Sep 17 00:00:00 2001 From: Nirmal Gupta Date: Tue, 2 Jun 2026 12:34:06 -0500 Subject: [PATCH 3/5] refactor: rename secscan -> security-scan everywhere MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cosmetic rename across the codebase, image, manifest, and config: Python module secscan/ -> security_scan/ (underscore — Python identifier) PyPI name secscan -> security-scan CLI entry point secscan -> security-scan Wrapper script secscan.sh -> security-scan.sh Manifest filename SECSCAN-… -> SECURITY-SCAN-MANIFEST.yaml Spec filename secscan-spec -> security-scan-spec.md Env vars SECSCAN_* -> SECURITY_SCAN_* All log/user-facing strings now read `security-scan:` (hyphen). Python imports use `security_scan` (underscore) since identifiers can't contain hyphens. Fingerprint marker compatibility: new issues are filed with the `security-scan:` marker; the parser also accepts legacy `secscan:` markers so dedup against existing project items continues to work without a one-time backfill. Image name was already `leverj/security-scan` so no change there. Validated: - 221 tests pass after marker regex update - `pip install -e .` produces the `security-scan` CLI - `./security-scan.sh check` reports green - `docker build` succeeds and manifest extraction returns the new file Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 4 +- .github/workflows/publish.yml | 10 +-- Dockerfile | 12 +-- README.md | 34 ++++----- ...NIFEST.yaml => SECURITY-SCAN-MANIFEST.yaml | 14 ++-- config/config.example.yaml | 12 +-- pyproject.toml | 8 +- secscan/__init__.py | 3 - secscan-spec.md => security-scan-spec.md | 8 +- secscan.sh => security-scan.sh | 50 ++++++------ security_scan/__init__.py | 3 + {secscan => security_scan}/__main__.py | 2 +- {secscan => security_scan}/config.py | 4 +- {secscan => security_scan}/cross_validate.py | 2 +- {secscan => security_scan}/detect.py | 0 {secscan => security_scan}/fingerprint.py | 9 ++- {secscan => security_scan}/github.py | 32 ++++---- {secscan => security_scan}/main.py | 26 +++---- {secscan => security_scan}/models.py | 0 {secscan => security_scan}/normalize.py | 2 +- {secscan => security_scan}/notify.py | 8 +- .../rules/javascript.yaml | 0 {secscan => security_scan}/rules/python.yaml | 0 {secscan => security_scan}/rules/secrets.yaml | 0 {secscan => security_scan}/rules/sqli.yaml | 0 .../rules/supabase.yaml | 0 {secscan => security_scan}/rules/xss.yaml | 0 .../runners/__init__.py | 0 {secscan => security_scan}/runners/codex.py | 4 +- {secscan => security_scan}/runners/gemma.py | 2 +- .../runners/gitleaks.py | 0 {secscan => security_scan}/runners/osv.py | 2 +- {secscan => security_scan}/runners/semgrep.py | 0 {secscan => security_scan}/runners/syft.py | 0 {secscan => security_scan}/runners/trivy.py | 0 .../runners/trufflehog.py | 0 {secscan => security_scan}/sync.py | 16 ++-- {secscan => security_scan}/triage.py | 8 +- tests/test_codex_runner.py | 40 +++++----- tests/test_config.py | 2 +- tests/test_cross_validate.py | 54 ++++++------- tests/test_detect.py | 2 +- tests/test_e2e_dryrun.py | 54 ++++++------- tests/test_fingerprint.py | 8 +- tests/test_gemma_runner.py | 20 ++--- tests/test_github.py | 22 +++--- tests/test_main.py | 76 +++++++++---------- tests/test_models.py | 2 +- tests/test_new_scanners.py | 24 +++--- tests/test_normalize.py | 4 +- tests/test_notify.py | 28 +++---- tests/test_resolve_rules.py | 10 +-- tests/test_runners.py | 36 ++++----- tests/test_sync.py | 20 ++--- tests/test_triage.py | 14 ++-- tools/backfill_markers.py | 14 ++-- 56 files changed, 354 insertions(+), 351 deletions(-) rename SECSCAN-MANIFEST.yaml => SECURITY-SCAN-MANIFEST.yaml (90%) delete mode 100644 secscan/__init__.py rename secscan-spec.md => security-scan-spec.md (98%) rename secscan.sh => security-scan.sh (90%) create mode 100644 security_scan/__init__.py rename {secscan => security_scan}/__main__.py (52%) rename {secscan => security_scan}/config.py (98%) rename {secscan => security_scan}/cross_validate.py (99%) rename {secscan => security_scan}/detect.py (100%) rename {secscan => security_scan}/fingerprint.py (87%) rename {secscan => security_scan}/github.py (94%) rename {secscan => security_scan}/main.py (95%) rename {secscan => security_scan}/models.py (100%) rename {secscan => security_scan}/normalize.py (99%) rename {secscan => security_scan}/notify.py (97%) rename {secscan => security_scan}/rules/javascript.yaml (100%) rename {secscan => security_scan}/rules/python.yaml (100%) rename {secscan => security_scan}/rules/secrets.yaml (100%) rename {secscan => security_scan}/rules/sqli.yaml (100%) rename {secscan => security_scan}/rules/supabase.yaml (100%) rename {secscan => security_scan}/rules/xss.yaml (100%) rename {secscan => security_scan}/runners/__init__.py (100%) rename {secscan => security_scan}/runners/codex.py (98%) rename {secscan => security_scan}/runners/gemma.py (99%) rename {secscan => security_scan}/runners/gitleaks.py (100%) rename {secscan => security_scan}/runners/osv.py (93%) rename {secscan => security_scan}/runners/semgrep.py (100%) rename {secscan => security_scan}/runners/syft.py (100%) rename {secscan => security_scan}/runners/trivy.py (100%) rename {secscan => security_scan}/runners/trufflehog.py (100%) rename {secscan => security_scan}/sync.py (90%) rename {secscan => security_scan}/triage.py (98%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cb9aaa..13d35b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: pip install -e ".[dev]" - name: Lint (ruff) - run: ruff check secscan/ tests/ + run: ruff check security-scan/ tests/ - name: Tests (pytest) run: pytest -q @@ -58,6 +58,6 @@ jobs: with: context: . push: false - tags: secscan:ci + tags: security-scan:ci # Disable provenance/sbom for faster CI; can re-enable when we cut a release. provenance: false diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5454b93..a6e243e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,8 +1,8 @@ name: Publish image -# Builds and publishes the secscan image to Docker Hub on every tag named v*. +# Builds and publishes the security-scan image to Docker Hub on every tag named v*. # The tag must match the [project.version] in pyproject.toml and the -# `version:` in SECSCAN-MANIFEST.yaml (a guard step verifies this). +# `version:` in SECURITY-SCAN-MANIFEST.yaml (a guard step verifies this). # # Required repository secrets: # DOCKERHUB_USERNAME the Docker Hub user/org that owns leverj/security-scan @@ -56,7 +56,7 @@ jobs: - name: Verify version alignment run: | py_version=$(grep -E '^version\s*=' pyproject.toml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') - mf_version=$(grep -E '^version:' SECSCAN-MANIFEST.yaml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + mf_version=$(grep -E '^version:' SECURITY-SCAN-MANIFEST.yaml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') want='${{ steps.tag.outputs.version }}' echo "tag=$want pyproject=$py_version manifest=$mf_version" @@ -66,7 +66,7 @@ jobs: exit 1 fi if [[ "$mf_version" != "$want" ]]; then - echo "::error::SECSCAN-MANIFEST.yaml version ($mf_version) != tag ($want). Bump the manifest or fix the tag." >&2 + echo "::error::SECURITY-SCAN-MANIFEST.yaml version ($mf_version) != tag ($want). Bump the manifest or fix the tag." >&2 exit 1 fi @@ -95,4 +95,4 @@ jobs: run: | docker run --rm --entrypoint cat \ "${{ env.IMAGE }}:${{ steps.tag.outputs.tag }}" \ - /app/SECSCAN-MANIFEST.yaml | head -5 + /app/SECURITY-SCAN-MANIFEST.yaml | head -5 diff --git a/Dockerfile b/Dockerfile index a796b96..e9a6a41 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# secscan — single-repo security scanner. Stateless. State lives in GitHub Issues. +# security-scan — single-repo security scanner. Stateless. State lives in GitHub Issues. # # Mount points (bind-mount at runtime — no VOLUME directive, so anonymous volumes # never accumulate when --rm is used): @@ -109,15 +109,15 @@ RUN set -eux; \ chmod +x /usr/local/bin/syft; \ syft --version -# --- secscan itself ------------------------------------------------------- +# --- security-scan itself ------------------------------------------------------- WORKDIR /app COPY pyproject.toml /app/pyproject.toml -COPY secscan /app/secscan +COPY security_scan /app/security_scan COPY README.md /app/README.md # Manifest the consuming skill reads to see version + needed config migrations. # Pull it out without starting the scanner: -# docker run --rm --entrypoint cat leverj/security-scan: /app/SECSCAN-MANIFEST.yaml -COPY SECSCAN-MANIFEST.yaml /app/SECSCAN-MANIFEST.yaml +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml +COPY SECURITY-SCAN-MANIFEST.yaml /app/SECURITY-SCAN-MANIFEST.yaml RUN pip install --no-cache-dir /app # Make sure the mount points exist (no VOLUME directive — keeps `--rm` from @@ -125,5 +125,5 @@ RUN pip install --no-cache-dir /app RUN mkdir -p /config /rules /work # Default entrypoint runs the scanner against /config/config.yaml. -ENTRYPOINT ["python", "-m", "secscan", "--config", "/config/config.yaml", "--work-dir", "/work"] +ENTRYPOINT ["python", "-m", "security_scan", "--config", "/config/config.yaml", "--work-dir", "/work"] CMD [] diff --git a/README.md b/README.md index 2572e1e..a543824 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# secscan +# security-scan [![CI](https://github.com/leverj/security-scanner/actions/workflows/ci.yml/badge.svg)](https://github.com/leverj/security-scanner/actions/workflows/ci.yml) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE) @@ -17,7 +17,7 @@ Closing/fixing findings is out of scope — another system owns that. ```bash # 1. Create (or pick) a GitHub Projects v2 board for security findings. # Note its number (visible in the URL: /projects/). -# On first run secscan provisions two single-select fields on the board: +# On first run security-scan provisions two single-select fields on the board: # - Severity (critical, high, medium, low, info) # - Category (dependency, secret, sast, iac, license) @@ -28,16 +28,16 @@ $EDITOR config/config.yaml # set repo, ref, project.owner, project.number # 3. Set up secrets — pick ONE of the two paths in the next section # 4. Verify your setup, then run -./secscan.sh check # green checks across the board? -./secscan.sh build -./secscan.sh run # defaults to --dry-run; add --no-dry-run to actually file issues +./security-scan.sh check # green checks across the board? +./security-scan.sh build +./security-scan.sh run # defaults to --dry-run; add --no-dry-run to actually file issues ``` --- ## Setup: secrets -secscan needs a GitHub Personal Access Token, and optionally a Slack webhook URL. +security-scan needs a GitHub Personal Access Token, and optionally a Slack webhook URL. **Secrets never go into `config.yaml`** — they come in via env vars at runtime. `config.yaml` declares which path you're using: @@ -68,7 +68,7 @@ export GITHUB_TOKEN=github_pat_... # Optional Slack — get a webhook from https://api.slack.com/apps export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/... -./secscan.sh run +./security-scan.sh run ``` To persist, put the `export` lines in `~/.zshrc` or `~/.bashrc`. The script verifies @@ -105,7 +105,7 @@ secrets: ``` ```bash -./secscan.sh run # auto-wraps with: op run --env-file=.env.1password.tpl -- docker run ... +./security-scan.sh run # auto-wraps with: op run --env-file=.env.1password.tpl -- docker run ... ``` The file `.env.1password.tpl` is `.gitignore`d. The committed @@ -116,7 +116,7 @@ and never commit your filled-in copy. For container orchestrators (Docker Swarm, K8s, GitHub Actions, etc.), populate `GITHUB_TOKEN` (and friends) via your platform's secret mechanism so it appears -in the container's environment. With `secrets.source: env`, `secscan.sh` (or a +in the container's environment. With `secrets.source: env`, `security-scan.sh` (or a direct `docker run`) will pick it up. --- @@ -136,7 +136,7 @@ need re-surfacing of regressions, that's the external fixing system's concern. ## Troubleshooting -`./secscan.sh check` reports the status of every prerequisite: +`./security-scan.sh check` reports the status of every prerequisite: ``` == config == @@ -144,7 +144,7 @@ need re-surfacing of regressions, that's the external fixing system's concern. == docker == ✓ docker is running == image == - ✓ secscan:latest present # ⚠ "not built yet" if you skipped `build` + ✓ security-scan:latest present # ⚠ "not built yet" if you skipped `build` == secrets (1password) == ✓ op (1Password CLI) installed ✓ op signed in @@ -162,7 +162,7 @@ Common failure modes and what `check` says: | `op not installed` (1Password source) | `brew install 1password-cli && op signin` | | `.env.1password.tpl missing` | `cp config/.env.1password.tpl.example config/.env.1password.tpl && $EDITOR …` | | `SLACK_… unset` (slack.enabled=true) | Either export the var, add it to the 1Password env file, or set `slack.enabled: false` | -| `image not built yet` | `./secscan.sh build` | +| `image not built yet` | `./security-scan.sh build` | | `docker daemon not reachable` | Start Docker Desktop | --- @@ -176,28 +176,28 @@ python3 -m venv .venv && .venv/bin/pip install -e ".[dev]" The scanner binaries (osv-scanner, gitleaks, semgrep) live only inside the Docker image — local tests use SARIF fixtures and mocked subprocesses. To exercise the -real binaries, run via `./secscan.sh run`. +real binaries, run via `./security-scan.sh run`. --- ## Use as a Claude Code skill The companion bundle at [`leverj/ai-skills`](https://github.com/leverj/ai-skills) -ships a `secscan` skill that drives this image directly: +ships a `security-scan` skill that drives this image directly: ``` /plugin marketplace add leverj/ai-skills /plugin install leverj@leverj-ai-skills -# then: /leverj:secscan run +# then: /leverj:security-scan run ``` The skill pulls and runs the published Docker image `leverj/security-scan:`, bind-mounts your `config/` directory at `/config:ro`, and offers a user-confirmed upgrade flow when a newer image -version is available (the image ships a `SECSCAN-MANIFEST.yaml` describing +version is available (the image ships a `SECURITY-SCAN-MANIFEST.yaml` describing its version + any config fields the skill should add to your local `config.yaml`). ## Spec -See [secscan-spec.md](secscan-spec.md) for the full design. +See [security-scan-spec.md](security-scan-spec.md) for the full design. diff --git a/SECSCAN-MANIFEST.yaml b/SECURITY-SCAN-MANIFEST.yaml similarity index 90% rename from SECSCAN-MANIFEST.yaml rename to SECURITY-SCAN-MANIFEST.yaml index 7f510f5..6b80cf4 100644 --- a/SECSCAN-MANIFEST.yaml +++ b/SECURITY-SCAN-MANIFEST.yaml @@ -1,10 +1,10 @@ -# SECSCAN-MANIFEST.yaml +# SECURITY-SCAN-MANIFEST.yaml # # Declarative contract between this image and any tool (skill, CI job, etc.) -# that drives it. Baked into the image at /app/SECSCAN-MANIFEST.yaml. +# that drives it. Baked into the image at /app/SECURITY-SCAN-MANIFEST.yaml. # # Read it from outside the running container with: -# docker run --rm --entrypoint cat leverj/security-scan: /app/SECSCAN-MANIFEST.yaml +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml # # Contract: # - `version` matches pyproject.toml's [project.version] and the docker tag. @@ -102,15 +102,15 @@ config: - from: "parent_issue" to: "project" since: 2 - note: "parent_issue (int) -> project (mapping). secscan no longer files as sub-issues under a parent epic; findings are flat items in a Projects v2 board. The skill should drop parent_issue from the user's config and prompt for project.{owner,number}." + note: "parent_issue (int) -> project (mapping). security-scan no longer files as sub-issues under a parent epic; findings are flat items in a Projects v2 board. The skill should drop parent_issue from the user's config and prompt for project.{owner,number}." removed_fields: [] # Files inside the image worth knowing about (for documentation purposes — # skills don't typically need to reach in past the entrypoint). image_paths: - manifest: "/app/SECSCAN-MANIFEST.yaml" - source: "/app/secscan/" - rules: "/app/secscan/rules/" + manifest: "/app/SECURITY-SCAN-MANIFEST.yaml" + source: "/app/security-scan/" + rules: "/app/security-scan/rules/" config_mount: "/config" work_mount: "/work" diff --git a/config/config.example.yaml b/config/config.example.yaml index ae0e798..471201f 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -1,8 +1,8 @@ -# secscan config — mount read-only at /config/config.yaml inside the container. +# security-scan config — mount read-only at /config/config.yaml inside the container. # Secrets (token, Slack creds) come from env vars referenced by *_env keys below, # never from this file. # -# The `secrets:` block below tells ./secscan.sh how to populate those env vars +# The `secrets:` block below tells ./security-scan.sh how to populate those env vars # before invoking the container. Python code reads only os.environ, regardless. repo: "leverj/ezel" @@ -11,7 +11,7 @@ ref: "dev" # Target a GitHub Projects v2 board. Findings file as flat items here — no # parent epic / sub-issue relationship. The board's URL is # https://github.com/orgs//projects/ (or /users//...). -# secscan idempotently provisions two single-select fields on first run: +# security-scan idempotently provisions two single-select fields on first run: # - Severity (critical, high, medium, low, info) # - Category (dependency, secret, sast, iac, license) # PAT must have BOTH `repo` and `project` scopes. @@ -20,7 +20,7 @@ project: number: 5 github_token_env: "GITHUB_TOKEN" -# How ./secscan.sh sources the env vars named in *_env keys above. +# How ./security-scan.sh sources the env vars named in *_env keys above. # source: env -> assume GITHUB_TOKEN (etc.) are already exported in your shell # source: 1password -> auto-prefix with `op run --env-file=` secrets: @@ -47,7 +47,7 @@ scanners: codex: false # OpenAI Codex via local `codex` CLI (uses your subscription) gemma: false # Local Gemma 4 via Ollama -# Codex CLI tunables. Auth is via `codex login` outside this tool; secscan +# Codex CLI tunables. Auth is via `codex login` outside this tool; security-scan # never sees an API key. The CLI must be on PATH. codex: binary: "codex" @@ -91,7 +91,7 @@ triage: # Timeout for the heavy paths (fuzzy-dedup, prose). A ~17 GB model's first # call of the day can take minutes; subsequent calls are fast (keep_alive). timeout: 600 - # Start the model loading in a BACKGROUND thread when secscan starts so it + # Start the model loading in a BACKGROUND thread when security-scan starts so it # warms in parallel with the scanners. Strongly recommended for big models. prewarm: true # Slack intro is a single one-liner at the end; cap it tightly so a slow diff --git a/pyproject.toml b/pyproject.toml index efdf6f5..2de1af0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=68"] build-backend = "setuptools.build_meta" [project] -name = "secscan" +name = "security-scan" version = "0.2.0" description = "Stateless single-repo security scanner; files findings into a GitHub Projects v2 board" requires-python = ">=3.11" @@ -20,14 +20,14 @@ dev = [ ] [project.scripts] -secscan = "secscan.main:cli" +security-scan = "security_scan.main:cli" [tool.setuptools.packages.find] -include = ["secscan*"] +include = ["security_scan*"] exclude = ["tests*"] [tool.setuptools.package-data] -secscan = ["rules/**/*.yaml", "rules/**/*.yml"] +security_scan = ["rules/**/*.yaml", "rules/**/*.yml"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/secscan/__init__.py b/secscan/__init__.py deleted file mode 100644 index fefcead..0000000 --- a/secscan/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""secscan — stateless single-repo security scanner that files findings as GitHub sub-issues.""" - -__version__ = "0.1.0" diff --git a/secscan-spec.md b/security-scan-spec.md similarity index 98% rename from secscan-spec.md rename to security-scan-spec.md index a52be8c..6f2d164 100644 --- a/secscan-spec.md +++ b/security-scan-spec.md @@ -1,4 +1,4 @@ -# secscan — Architecture & Build Spec (v1) +# security-scan — Architecture & Build Spec (v1) A single‑repo, stateless, self‑hosted security scanner that detects a repo's tech stack, runs the right scanners, and files each finding as a deduplicated GitHub sub‑issue under a @@ -44,7 +44,7 @@ This document is written to be handed to Claude Code and built module by module. ## 3. Module breakdown ``` -secscan/ +security-scan/ config.py # load + validate config (YAML) and env (token) detect.py # stack detection (manifest walk + optional Linguist cross-check) runners/ # one module per scanner, each returns SARIF (or is normalized to it) @@ -102,7 +102,7 @@ fingerprint = "fp_" + sha256(key_basis).hexdigest()[:16] **Marker** embedded in every issue body (hidden HTML comment), so a future run can read it back: ``` - + ``` `github.py` lists **all** sub‑issues of the parent (state=all), parses these markers, and builds the set of already‑filed fingerprints. @@ -223,7 +223,7 @@ Volumes: /work (rw) -> ephemeral per-run clone + scratch (can be tmpfs) Secrets: GITHUB_TOKEN, SLACK_* via env (docker run --env-file, Docker secret, or 1Password injection) -Entrypoint: python -m secscan --config /config/config.yaml +Entrypoint: python -m security-scan --config /config/config.yaml ``` Stateless: the container holds no state between runs; everything durable is in GitHub Issues. The clone lives in `/work` and is wiped each run. Token file (if used instead of env) must be `600` and is never logged (mask in all output). diff --git a/secscan.sh b/security-scan.sh similarity index 90% rename from secscan.sh rename to security-scan.sh index 8d458fc..73a2252 100755 --- a/secscan.sh +++ b/security-scan.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -# Convenience wrapper for building and running the secscan container. +# Convenience wrapper for building and running the security-scan container. # -# ./secscan.sh build -> docker build the image -# ./secscan.sh run [args...] -> docker run, default --dry-run, forwards extra args -# ./secscan.sh check -> validate setup (config, secrets, docker, image) +# ./security-scan.sh build -> docker build the image +# ./security-scan.sh run [args...] -> docker run, default --dry-run, forwards extra args +# ./security-scan.sh check -> validate setup (config, secrets, docker, image) # # Two things are config-driven and read from config.yaml at runtime: # @@ -25,15 +25,15 @@ # # Default config directory: ./config/. Override with one of: # --config /path/to/cfg.yaml # explicit file path (its parent dir is mounted) -# SECSCAN_CONFIG=... # same thing via env var +# SECURITY_SCAN_CONFIG=... # same thing via env var # SECSCAN_CONFIG_DIR=... # mount this dir instead; expects config.yaml inside # -# When the skill packages secscan, point SECSCAN_CONFIG_DIR at the per-project +# When the skill packages security-scan, point SECSCAN_CONFIG_DIR at the per-project # config the agent maintains for the user. set -euo pipefail -IMAGE="${SECSCAN_IMAGE:-secscan:latest}" +IMAGE="${SECURITY_SCAN_IMAGE:-security-scan:latest}" HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEFAULT_CONFIG_DIR="$HERE/config" @@ -124,7 +124,7 @@ cmd_build() { cmd_check() { local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" - local config="${SECSCAN_CONFIG:-$config_dir/config.yaml}" + local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local ok=1 echo "== config ==" @@ -152,7 +152,7 @@ cmd_check() { if docker image inspect "$IMAGE" >/dev/null 2>&1; then echo " ✓ $IMAGE present" else - echo " ⚠ $IMAGE not built yet — run: ./secscan.sh build" + echo " ⚠ $IMAGE not built yet — run: ./security-scan.sh build" fi if [[ -f "$config" ]]; then @@ -201,10 +201,10 @@ cmd_check() { echo if [[ $ok -eq 1 ]]; then - echo "all good. try: ./secscan.sh run" + echo "all good. try: ./security-scan.sh run" return 0 else - echo "fix the ✗ items above, then re-run ./secscan.sh check" + echo "fix the ✗ items above, then re-run ./security-scan.sh check" return 1 fi } @@ -213,7 +213,7 @@ cmd_run() { command -v docker >/dev/null || die "docker not on PATH" local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" - local config="${SECSCAN_CONFIG:-$config_dir/config.yaml}" + local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local extra_args=() local have_dry_run=0 @@ -290,7 +290,7 @@ Two ways to fix this: 1) Export it now: export GITHUB_TOKEN=github_pat_xxx # see README.md "Option A" - ./secscan.sh run + ./security-scan.sh run 2) Switch to 1Password (recommended for daily use): # in config.yaml @@ -300,9 +300,9 @@ Two ways to fix this: # then: cp .env.1password.tpl.example .env.1password.tpl \$EDITOR .env.1password.tpl # set op:// vault paths - ./secscan.sh run + ./security-scan.sh run -Run \`./secscan.sh check\` to see your full setup status. +Run \`./security-scan.sh check\` to see your full setup status. EOF exit 1 fi @@ -319,7 +319,7 @@ Either export them: …or set slack.enabled: false in $config to disable Slack for this run. -Run \`./secscan.sh check\` to see your full setup status. +Run \`./security-scan.sh check\` to see your full setup status. EOF exit 1 fi @@ -352,7 +352,7 @@ Create it from the committed template: cp config/.env.1password.tpl.example config/.env.1password.tpl \$EDITOR config/.env.1password.tpl # set op://// paths -The template lists every env var secscan understands. +The template lists every env var security-scan understands. EOF exit 1 fi @@ -380,21 +380,21 @@ case "${1:-}" in check) shift; cmd_check "$@" ;; ""|-h|--help) cat </config.yaml (override with SECSCAN_CONFIG env) - image tag defaults to "secscan:latest" (override with SECSCAN_IMAGE env) + --config defaults to /config.yaml (override with SECURITY_SCAN_CONFIG env) + image tag defaults to "security-scan:latest" (override with SECURITY_SCAN_IMAGE env) The whole --config-dir is bind-mounted read-only at /config inside the container, so any related files (the 1Password env template, etc.) ride along. @@ -412,7 +412,7 @@ slack (driven by config.yaml): slack.enabled: true with slack.channel_id_env + slack.bot_token_env -> both named vars must be set (uses chat.postMessage) -Run \`./secscan.sh check\` for a full setup status. +Run \`./security-scan.sh check\` for a full setup status. EOF ;; *) die "unknown command: $1 (try 'build', 'run', or 'check')" ;; diff --git a/security_scan/__init__.py b/security_scan/__init__.py new file mode 100644 index 0000000..33be793 --- /dev/null +++ b/security_scan/__init__.py @@ -0,0 +1,3 @@ +"""security_scan — stateless single-repo security scanner that files findings as GitHub sub-issues.""" + +__version__ = "0.1.0" diff --git a/secscan/__main__.py b/security_scan/__main__.py similarity index 52% rename from secscan/__main__.py rename to security_scan/__main__.py index 9e20d9f..2f7510e 100644 --- a/secscan/__main__.py +++ b/security_scan/__main__.py @@ -1,4 +1,4 @@ -from secscan.main import cli +from security_scan.main import cli if __name__ == "__main__": cli() diff --git a/secscan/config.py b/security_scan/config.py similarity index 98% rename from secscan/config.py rename to security_scan/config.py index 45142de..3a30b33 100644 --- a/secscan/config.py +++ b/security_scan/config.py @@ -12,7 +12,7 @@ import yaml -from secscan.models import SEVERITY_ORDER +from security_scan.models import SEVERITY_ORDER class ConfigError(ValueError): @@ -37,7 +37,7 @@ class ScannersConfig: @dataclass class CodexConfig: """Tunables for the local Codex CLI runner. Auth is via `codex login` - (ChatGPT subscription); secscan never sees an API key.""" + (ChatGPT subscription); security_scan never sees an API key.""" binary: str = "codex" model: str | None = None # None => use codex's configured default timeout: int = 1200 # seconds; LLM scans can run minutes diff --git a/secscan/cross_validate.py b/security_scan/cross_validate.py similarity index 99% rename from secscan/cross_validate.py rename to security_scan/cross_validate.py index 68aa6fd..d712567 100644 --- a/secscan/cross_validate.py +++ b/security_scan/cross_validate.py @@ -39,7 +39,7 @@ import requests -from secscan.models import SEVERITY_ORDER, Finding +from security_scan.models import SEVERITY_ORDER, Finding # Severity downgrade ladder. Critical is intentionally NOT downgraded — the # asymmetry is deliberate (worst case for FP-on-critical is one extra issue diff --git a/secscan/detect.py b/security_scan/detect.py similarity index 100% rename from secscan/detect.py rename to security_scan/detect.py diff --git a/secscan/fingerprint.py b/security_scan/fingerprint.py similarity index 87% rename from secscan/fingerprint.py rename to security_scan/fingerprint.py index afc41c6..acc40e4 100644 --- a/secscan/fingerprint.py +++ b/security_scan/fingerprint.py @@ -10,10 +10,13 @@ import hashlib import re -from secscan.models import Finding +from security_scan.models import Finding MARKER_RE = re.compile( - r"" + # Accept legacy `secscan:` marker too so issues filed by the pre-rename code + # still match for dedup. New markers are written as `security-scan:` (see + # inject_marker below). + r"" ) @@ -68,7 +71,7 @@ def resolve_fingerprint(f: Finding) -> str: def inject_marker(body: str, fp: str, f: Finding) -> str: """Append the hidden marker to an issue body. Code-owned, regardless of LLM prose.""" - marker = f"" + marker = f"" if MARKER_RE.search(body): return MARKER_RE.sub(marker, body) sep = "\n\n" if body and not body.endswith("\n") else "" diff --git a/secscan/github.py b/security_scan/github.py similarity index 94% rename from secscan/github.py rename to security_scan/github.py index dc0071a..acb6592 100644 --- a/secscan/github.py +++ b/security_scan/github.py @@ -19,11 +19,11 @@ _API = "https://api.github.com" _GRAPHQL = "https://api.github.com/graphql" -_UA = "secscan/0.1" +_UA = "security_scan/0.1" _ACCEPT = "application/vnd.github+json" _API_VERSION = "2022-11-28" -# Single-select options + colors that secscan creates on the target Project v2 if +# Single-select options + colors that security_scan creates on the target Project v2 if # the user hasn't created them already. GitHub's `ProjectV2SingleSelectFieldOptionColor` # enum accepts: GRAY, BLUE, GREEN, YELLOW, ORANGE, RED, PINK, PURPLE. _SEVERITY_OPTIONS: list[tuple[str, str]] = [ @@ -131,7 +131,7 @@ def _scrub(self, text: str) -> str: def resolve_project(self, owner: str, number: int) -> ProjectContext: """Find the Projects v2 board by (owner, number). Idempotently ensures - single-select `Severity` and `Category` fields exist with the secscan + single-select `Severity` and `Category` fields exist with the security_scan option set. Re-running is safe. """ if self.dry_run: @@ -257,18 +257,18 @@ def create_issue(self, title: str, body: str, labels: list[str] | None = None) - # Color palette per category/severity. Anything unmapped becomes mid-grey. _LABEL_COLOR = { # categories - "secscan:dependency": "5319e7", # purple — language/OS package CVEs - "secscan:secret": "d93f0b", # red — pattern-matched secret - "secscan:secret-verified": "b60205", # dark red — live/verified secret - "secscan:sast": "fbca04", # yellow — code patterns - "secscan:iac": "0e8a16", # green — IaC misconfig - "secscan:license": "1d76db", # blue — license issues + "security-scan:dependency": "5319e7", # purple — language/OS package CVEs + "security-scan:secret": "d93f0b", # red — pattern-matched secret + "security-scan:secret-verified": "b60205", # dark red — live/verified secret + "security-scan:sast": "fbca04", # yellow — code patterns + "security-scan:iac": "0e8a16", # green — IaC misconfig + "security-scan:license": "1d76db", # blue — license issues # severities - "secscan:critical": "b60205", - "secscan:high": "d93f0b", - "secscan:medium": "fbca04", - "secscan:low": "c5def5", - "secscan:info": "ededed", + "security-scan:critical": "b60205", + "security-scan:high": "d93f0b", + "security-scan:medium": "fbca04", + "security-scan:low": "c5def5", + "security-scan:info": "ededed", } _LABEL_CREATED: set[str] # populated in __init__ @@ -282,7 +282,7 @@ def _ensure_label(self, name: str) -> None: self._request( "POST", f"{_API}/repos/{self.owner}/{self.name}/labels", - json={"name": name, "color": color, "description": "secscan-managed label"}, + json={"name": name, "color": color, "description": "security_scan-managed label"}, ) except GitHubError as e: # 422 = label already exists with this name; anything else is a real problem. @@ -346,7 +346,7 @@ def _ensure_single_select_field( if missing: print( f"github: project field {name!r} is missing options {missing}; " - "secscan won't be able to set those values until you add them", + "security_scan won't be able to set those values until you add them", file=sys.stderr, ) return ProjectField(id=existing["id"], options=opts) diff --git a/secscan/main.py b/security_scan/main.py similarity index 95% rename from secscan/main.py rename to security_scan/main.py index b7655a3..dd1655c 100644 --- a/secscan/main.py +++ b/security_scan/main.py @@ -14,19 +14,19 @@ import tempfile from pathlib import Path -from secscan.config import Config, ConfigError, load_config -from secscan.detect import DetectionResult, ScannerTarget, detect_stack -from secscan.github import GitHub, GitHubError -from secscan.models import Finding -from secscan.normalize import normalize_sarif -from secscan.notify import post_digest -from secscan.runners import RunnerResult -from secscan.sync import SyncResult, sync +from security_scan.config import Config, ConfigError, load_config +from security_scan.detect import DetectionResult, ScannerTarget, detect_stack +from security_scan.github import GitHub, GitHubError +from security_scan.models import Finding +from security_scan.normalize import normalize_sarif +from security_scan.notify import post_digest +from security_scan.runners import RunnerResult +from security_scan.sync import SyncResult, sync def cli(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( - prog="secscan", + prog="security_scan", description="Stateless single-repo security scanner; files findings into a GitHub Projects v2 board.", ) parser.add_argument("--config", required=True, help="Path to config.yaml") @@ -45,7 +45,7 @@ def cli(argv: list[str] | None = None) -> int: def run(cfg: Config, dry_run: bool = False, work_dir: str | None = None, keep_work: bool = False) -> int: - work_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="secscan-")) + work_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="security_scan-")) repo_dir = work_root / cfg.repo_name gh = GitHub(cfg.github_token, cfg.repo_owner, cfg.repo_name, dry_run=dry_run) @@ -83,7 +83,7 @@ def run(cfg: Config, dry_run: bool = False, work_dir: str | None = None, keep_wo if (cfg.cross_validate.enabled and "codex" in completed_scanners and "gemma" in completed_scanners): - from secscan.cross_validate import cross_validate + from security_scan.cross_validate import cross_validate before = sum(1 for f in findings if f.scanner in ("codex", "gemma")) print(f"cross-validate: reviewing {before} LLM finding(s) bidirectionally", file=sys.stderr) cross_validate( @@ -195,7 +195,7 @@ def _scan_and_normalize( def _invoke_runner(t: ScannerTarget, cfg: Config, repo_dir: Path, semgrep_rules: Path | str | None) -> RunnerResult: """Dynamically import the runner so missing optional bits never block import-time.""" - mod = importlib.import_module(f"secscan.runners.{t.scanner}") + mod = importlib.import_module(f"security_scan.runners.{t.scanner}") if t.scanner == "osv": return mod.run(t.targets[0], exclude=cfg.paths.exclude) if t.scanner == "gitleaks": @@ -300,7 +300,7 @@ def _maybe_triage(cfg: Config): return None try: # Lazy import to avoid touching `requests` when triage is off. - from secscan.triage import Triage + from security_scan.triage import Triage t = Triage(cfg.triage) # Kick off model warm-up in the background; scans run in parallel. t.start_warmup() diff --git a/secscan/models.py b/security_scan/models.py similarity index 100% rename from secscan/models.py rename to security_scan/models.py diff --git a/secscan/normalize.py b/security_scan/normalize.py similarity index 99% rename from secscan/normalize.py rename to security_scan/normalize.py index e8758ed..acc32d9 100644 --- a/secscan/normalize.py +++ b/security_scan/normalize.py @@ -11,7 +11,7 @@ import sys from fnmatch import fnmatchcase -from secscan.models import Finding, normalize_severity +from security_scan.models import Finding, normalize_severity _CATEGORY = { "osv": "dependency", diff --git a/secscan/notify.py b/security_scan/notify.py similarity index 97% rename from secscan/notify.py rename to security_scan/notify.py index c2f4176..a7d787e 100644 --- a/secscan/notify.py +++ b/security_scan/notify.py @@ -12,9 +12,9 @@ import requests -from secscan.config import SlackConfig -from secscan.models import Finding -from secscan.sync import SyncResult +from security_scan.config import SlackConfig +from security_scan.models import Finding +from security_scan.sync import SyncResult def post_digest( @@ -131,7 +131,7 @@ def _default_digest( by_sev[f.severity] = by_sev.get(f.severity, 0) + 1 lines: list[str] = [ - f":lock: *secscan* — `{repo}@{ref}` — " + f":lock: *security_scan* — `{repo}@{ref}` — " f"" ] diff --git a/secscan/rules/javascript.yaml b/security_scan/rules/javascript.yaml similarity index 100% rename from secscan/rules/javascript.yaml rename to security_scan/rules/javascript.yaml diff --git a/secscan/rules/python.yaml b/security_scan/rules/python.yaml similarity index 100% rename from secscan/rules/python.yaml rename to security_scan/rules/python.yaml diff --git a/secscan/rules/secrets.yaml b/security_scan/rules/secrets.yaml similarity index 100% rename from secscan/rules/secrets.yaml rename to security_scan/rules/secrets.yaml diff --git a/secscan/rules/sqli.yaml b/security_scan/rules/sqli.yaml similarity index 100% rename from secscan/rules/sqli.yaml rename to security_scan/rules/sqli.yaml diff --git a/secscan/rules/supabase.yaml b/security_scan/rules/supabase.yaml similarity index 100% rename from secscan/rules/supabase.yaml rename to security_scan/rules/supabase.yaml diff --git a/secscan/rules/xss.yaml b/security_scan/rules/xss.yaml similarity index 100% rename from secscan/rules/xss.yaml rename to security_scan/rules/xss.yaml diff --git a/secscan/runners/__init__.py b/security_scan/runners/__init__.py similarity index 100% rename from secscan/runners/__init__.py rename to security_scan/runners/__init__.py diff --git a/secscan/runners/codex.py b/security_scan/runners/codex.py similarity index 98% rename from secscan/runners/codex.py rename to security_scan/runners/codex.py index e4842c7..df4929b 100644 --- a/secscan/runners/codex.py +++ b/security_scan/runners/codex.py @@ -131,7 +131,7 @@ def run( if shutil.which(binary) is None: return RunnerResult("codex", None, False, f"binary not found: {binary}") - with tempfile.TemporaryDirectory(prefix="codex-secscan-") as td: + with tempfile.TemporaryDirectory(prefix="codex-security_scan-") as td: schema_path = Path(td) / "schema.json" output_path = Path(td) / "output.json" schema_path.write_text(json.dumps(_SCHEMA)) @@ -160,7 +160,7 @@ def run( text=True, timeout=timeout, check=False, - # Don't inherit secscan's env wholesale — keep CODEX_HOME etc., but + # Don't inherit security_scan's env wholesale — keep CODEX_HOME etc., but # strip anything that might confuse the agent. Codex reads its own # config from ~/.codex/. env={**os.environ}, diff --git a/secscan/runners/gemma.py b/security_scan/runners/gemma.py similarity index 99% rename from secscan/runners/gemma.py rename to security_scan/runners/gemma.py index e565b15..a486f90 100644 --- a/secscan/runners/gemma.py +++ b/security_scan/runners/gemma.py @@ -25,7 +25,7 @@ from . import RunnerResult -# Extensions worth feeding to the model. Mirrors secscan/detect._SEMGREP_EXTS with +# Extensions worth feeding to the model. Mirrors security_scan/detect._SEMGREP_EXTS with # a few SQL/HCL/TF additions since LLM reading isn't limited to semgrep's parsers. _SOURCE_EXTS = { ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", diff --git a/secscan/runners/gitleaks.py b/security_scan/runners/gitleaks.py similarity index 100% rename from secscan/runners/gitleaks.py rename to security_scan/runners/gitleaks.py diff --git a/secscan/runners/osv.py b/security_scan/runners/osv.py similarity index 93% rename from secscan/runners/osv.py rename to security_scan/runners/osv.py index 9a9b735..de32be9 100644 --- a/secscan/runners/osv.py +++ b/security_scan/runners/osv.py @@ -11,7 +11,7 @@ def run(root: Path, exclude: list[str] | None = None, binary: str = "osv-scanner") -> RunnerResult: # NOTE: we intentionally do NOT pass --paths-to-ignore: the flag's name and # presence varies across osv-scanner versions (it's a hard error on 1.9.2). - # secscan.normalize.normalize_sarif() filters excluded paths post-hoc, so we + # security_scan.normalize.normalize_sarif() filters excluded paths post-hoc, so we # get the same effect with zero version coupling. _ = exclude # accepted for signature stability; intentionally unused here cmd = [binary, "--format", "sarif", "--skip-git", "--recursive"] diff --git a/secscan/runners/semgrep.py b/security_scan/runners/semgrep.py similarity index 100% rename from secscan/runners/semgrep.py rename to security_scan/runners/semgrep.py diff --git a/secscan/runners/syft.py b/security_scan/runners/syft.py similarity index 100% rename from secscan/runners/syft.py rename to security_scan/runners/syft.py diff --git a/secscan/runners/trivy.py b/security_scan/runners/trivy.py similarity index 100% rename from secscan/runners/trivy.py rename to security_scan/runners/trivy.py diff --git a/secscan/runners/trufflehog.py b/security_scan/runners/trufflehog.py similarity index 100% rename from secscan/runners/trufflehog.py rename to security_scan/runners/trufflehog.py diff --git a/secscan/sync.py b/security_scan/sync.py similarity index 90% rename from secscan/sync.py rename to security_scan/sync.py index cb98b77..44cd77f 100644 --- a/secscan/sync.py +++ b/security_scan/sync.py @@ -10,9 +10,9 @@ from dataclasses import dataclass, field from typing import Protocol -from secscan.fingerprint import inject_marker, parse_marker, resolve_fingerprint -from secscan.github import GitHub, ProjectContext -from secscan.models import Finding +from security_scan.fingerprint import inject_marker, parse_marker, resolve_fingerprint +from security_scan.github import GitHub, ProjectContext +from security_scan.models import Finding class Triage(Protocol): @@ -131,13 +131,13 @@ def sync( def _labels_for(f: Finding) -> list[str]: """The label set applied to a sub-issue. - `security` is the existing umbrella label. `secscan:` lets you + `security` is the existing umbrella label. `security_scan:` lets you filter the parent's sub-issue list by category in the GitHub UI. - `secscan:` lets you triage by severity. All labels are namespaced - under `secscan:` so they're easy to clean up if you ever drop the tool. + `security_scan:` lets you triage by severity. All labels are namespaced + under `security_scan:` so they're easy to clean up if you ever drop the tool. """ return [ "security", - f"secscan:{f.category}", - f"secscan:{f.severity}", + f"security-scan:{f.category}", + f"security-scan:{f.severity}", ] diff --git a/secscan/triage.py b/security_scan/triage.py similarity index 98% rename from secscan/triage.py rename to security_scan/triage.py index 8a33234..cbb9605 100644 --- a/secscan/triage.py +++ b/security_scan/triage.py @@ -20,10 +20,10 @@ import requests -from secscan.config import TriageConfig -from secscan.fingerprint import parse_marker -from secscan.models import Finding -from secscan.sync import SyncResult, default_issue +from security_scan.config import TriageConfig +from security_scan.fingerprint import parse_marker +from security_scan.models import Finding +from security_scan.sync import SyncResult, default_issue class Triage: diff --git a/tests/test_codex_runner.py b/tests/test_codex_runner.py index 5f1d764..e33951f 100644 --- a/tests/test_codex_runner.py +++ b/tests/test_codex_runner.py @@ -6,7 +6,7 @@ from pathlib import Path from unittest.mock import patch -from secscan.runners import codex as codex_runner +from security_scan.runners import codex as codex_runner def _fake_completed(rc=0, stdout="", stderr=""): @@ -39,8 +39,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/usr/bin/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/usr/bin/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) assert result.completed is True @@ -78,8 +78,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) rule_ids = [r["ruleId"] for r in result.sarif["runs"][0]["results"]] assert "codex.already-prefixed" in rule_ids @@ -87,7 +87,7 @@ def _fake_run(cmd, **kw): def test_runner_handles_missing_binary(tmp_path): - with patch("secscan.runners.codex.shutil.which", return_value=None): + with patch("security_scan.runners.codex.shutil.which", return_value=None): result = codex_runner.run(tmp_path) assert result.completed is False assert "binary not found" in result.error @@ -95,8 +95,8 @@ def test_runner_handles_missing_binary(tmp_path): def test_runner_detects_auth_failure(tmp_path): """When codex isn't logged in it exits non-zero with an auth message — surface clearly.""" - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(1, "", "Error: not logged in. Run `codex login`.")): result = codex_runner.run(tmp_path) assert result.completed is False @@ -105,8 +105,8 @@ def test_runner_detects_auth_failure(tmp_path): def test_runner_returns_failure_on_non_zero_exit(tmp_path): - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(2, "", "internal model error")): result = codex_runner.run(tmp_path) assert result.completed is False @@ -115,8 +115,8 @@ def test_runner_returns_failure_on_non_zero_exit(tmp_path): def test_runner_failure_when_no_output_file_written(tmp_path): """codex exited cleanly but produced no output — likely refused the task.""" - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", return_value=_fake_completed(0)): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(0)): result = codex_runner.run(tmp_path) assert result.completed is False assert "no output" in result.error.lower() @@ -128,8 +128,8 @@ def _fake_run(cmd, **kw): Path(cmd[idx + 1]).write_text("this is not json {{{ <-- broken") return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) assert result.completed is False assert "parse" in result.error.lower() @@ -137,8 +137,8 @@ def _fake_run(cmd, **kw): def test_runner_timeout(tmp_path): import subprocess - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="codex", timeout=10)): result = codex_runner.run(tmp_path, timeout=10) assert result.completed is False @@ -155,8 +155,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) paths = [r["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] for r in result.sarif["runs"][0]["results"]] @@ -172,8 +172,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) r = result.sarif["runs"][0]["results"][0] assert r["properties"]["security-severity"] == "5.5" # medium diff --git a/tests/test_config.py b/tests/test_config.py index d7cefcb..1018646 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,7 @@ import pytest -from secscan.config import ConfigError, load_config +from security_scan.config import ConfigError, load_config def write(tmp_path, name, body): diff --git a/tests/test_cross_validate.py b/tests/test_cross_validate.py index 71f52a8..d081f63 100644 --- a/tests/test_cross_validate.py +++ b/tests/test_cross_validate.py @@ -11,8 +11,8 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.cross_validate import cross_validate -from secscan.models import Finding +from security_scan.cross_validate import cross_validate +from security_scan.models import Finding def _f(scanner, rule_id, severity="high"): @@ -66,9 +66,9 @@ def test_disabled_when_only_one_scanner_enabled(tmp_path): def test_gemma_marks_codex_finding_real_keeps_severity(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "real", "reason": "definitely real"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] @@ -81,9 +81,9 @@ def test_gemma_marks_codex_finding_real_keeps_severity(tmp_path): def test_gemma_marks_codex_finding_false_positive_downgrades(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "false_positive", "reason": "not exploitable"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] @@ -96,9 +96,9 @@ def test_critical_never_auto_downgrades_on_fp(tmp_path): """Asymmetric guardrail: critical findings stay critical even if the validator disagrees. The cost of missing a real critical is too high.""" f = _f("codex", "rce.eval", severity="critical") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "false_positive", "reason": "looks fine"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "critical" # protected @@ -108,9 +108,9 @@ def test_critical_never_auto_downgrades_on_fp(tmp_path): def test_uncertain_does_not_downgrade(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "uncertain", "reason": "can't tell"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "high" @@ -119,9 +119,9 @@ def test_uncertain_does_not_downgrade(tmp_path): def test_unrecognized_verdict_treated_as_uncertain(tmp_path): f = _f("codex", "x", severity="medium") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "OBVIOUSLY_FAKE", "reason": "what"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "medium" @@ -130,9 +130,9 @@ def test_unrecognized_verdict_treated_as_uncertain(tmp_path): def test_codex_marks_gemma_finding_false_positive_downgrades(tmp_path): f = _f("gemma", "py.eval", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.subprocess.run", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.subprocess.run", side_effect=_codex_completed(0, {"verdict": "false_positive", "reason": "test code, not prod"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) @@ -146,8 +146,8 @@ def test_ollama_unreachable_skips_gemma_review(tmp_path): """If Ollama can't be reached, codex findings simply get no review — not failure.""" import requests f = _f("codex", "x", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", side_effect=requests.ConnectionError("down")): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert "cross_validation" not in (f.extra or {}) @@ -156,8 +156,8 @@ def test_ollama_unreachable_skips_gemma_review(tmp_path): def test_codex_missing_skips_codex_review_of_gemma_findings(tmp_path): f = _f("gemma", "x", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value=None), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()): + with patch("security_scan.cross_validate.shutil.which", return_value=None), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) # Gemma finding not reviewed because codex CLI is missing. assert "cross_validation" not in (f.extra or {}) @@ -168,9 +168,9 @@ def test_validator_failure_yields_uncertain(tmp_path): verdict — never block the finding or crash the run.""" f = _f("codex", "x", severity="high") import requests - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", side_effect=requests.ConnectionError("post failed")): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] diff --git a/tests/test_detect.py b/tests/test_detect.py index 239e449..877ce2e 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -1,6 +1,6 @@ from pathlib import Path -from secscan.detect import ScannerTarget, detect_stack +from security_scan.detect import ScannerTarget, detect_stack ALL_ON = { "osv": True, "gitleaks": True, "semgrep": True, diff --git a/tests/test_e2e_dryrun.py b/tests/test_e2e_dryrun.py index b99aa19..08b9e6e 100644 --- a/tests/test_e2e_dryrun.py +++ b/tests/test_e2e_dryrun.py @@ -17,7 +17,7 @@ import pytest -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -25,10 +25,10 @@ SlackConfig, TriageConfig, ) -from secscan.fingerprint import parse_marker, resolve_fingerprint -from secscan.github import ProjectContext, ProjectField -from secscan.normalize import normalize_sarif -from secscan.runners import RunnerResult +from security_scan.fingerprint import parse_marker, resolve_fingerprint +from security_scan.github import ProjectContext, ProjectField +from security_scan.normalize import normalize_sarif +from security_scan.runners import RunnerResult def _synthetic_repo(root: Path) -> None: @@ -167,7 +167,7 @@ def _make_fake_gh(state="OPEN", existing_with_fp: list[str] | None = None) -> Ma "number": i + 1, "state": state, "title": "old", - "body": f"prose\n", + "body": f"prose\n", }) fake_gh = MagicMock() @@ -191,13 +191,13 @@ def create(title, body, labels=None): def test_full_dryrun_pipeline_files_three_findings(cfg, tmp_path): - from secscan.main import run + from security_scan.main import run fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): rc = run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) assert rc == 0 @@ -211,7 +211,7 @@ def test_full_dryrun_pipeline_files_three_findings(cfg, tmp_path): def test_dryrun_does_not_post_to_real_github(cfg, tmp_path): """The actual GitHub class in dry_run mode must make zero HTTP requests across issue creation AND every Projects v2 mutation.""" - from secscan.github import GitHub + from security_scan.github import GitHub captured_requests = [] @@ -236,13 +236,13 @@ def fake_request(*a, **kw): def test_marker_roundtrip_on_dryrun_bodies(cfg, tmp_path): """Every body that the pipeline would have posted must contain a parseable marker.""" - from secscan.main import run + from security_scan.main import run fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) for issue in fake_gh.captured: @@ -253,17 +253,17 @@ def test_marker_roundtrip_on_dryrun_bodies(cfg, tmp_path): def test_closed_existing_fingerprint_suppresses_refile(cfg, tmp_path): """The spec invariant: a closed project item with our fingerprint never refiles.""" - from secscan.main import run + from security_scan.main import run findings = normalize_sarif(_semgrep_sarif(), "semgrep") semgrep_fp = resolve_fingerprint(findings[0]) fake_gh = _make_fake_gh(state="CLOSED", existing_with_fp=[semgrep_fp]) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) # 3 findings total; the semgrep one matches a closed-existing fp -> skip. @@ -286,14 +286,14 @@ def test_fingerprint_survives_line_shift_in_source(cfg): def test_raw_secret_never_in_issue_body(cfg, tmp_path): """End-to-end check that the raw AWS key never reaches a posted body.""" - from secscan.main import run + from security_scan.main import run raw_secret = "TEST_FAKE_SECRET_VALUE" fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) for issue in fake_gh.captured: diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index 31f097b..71101cd 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -1,10 +1,10 @@ -from secscan.fingerprint import ( +from security_scan.fingerprint import ( compute_fingerprint, inject_marker, parse_marker, resolve_fingerprint, ) -from secscan.models import Finding +from security_scan.models import Finding def _sast(file_path="src/a.js", line=10, snippet="exec(userInput)"): @@ -96,11 +96,11 @@ def test_marker_idempotent_inject(): body = inject_marker("prose", fp, f) body2 = inject_marker(body, fp, f) # Should not duplicate the marker - assert body2.count("") is None + assert parse_marker("") is None diff --git a/tests/test_gemma_runner.py b/tests/test_gemma_runner.py index a88a430..9b1e81b 100644 --- a/tests/test_gemma_runner.py +++ b/tests/test_gemma_runner.py @@ -6,7 +6,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.runners import gemma as gemma_runner +from security_scan.runners import gemma as gemma_runner def _ollama_resp(payload: dict, status=200): @@ -44,7 +44,7 @@ def test_runner_happy_path(tmp_path): "message": "Concatenating user input into SQL.", "snippet": "db.query('... ' + n)"}, ] } - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)) as p: + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)) as p: result = gemma_runner.run(tmp_path) assert result.completed is True results = result.sarif["runs"][0]["results"] @@ -60,7 +60,7 @@ def test_runner_happy_path(tmp_path): def test_runner_unreachable_ollama(tmp_path): _drop_source(tmp_path) import requests - with patch("secscan.runners.gemma.requests.post", + with patch("security_scan.runners.gemma.requests.post", side_effect=requests.ConnectionError("ollama down")): result = gemma_runner.run(tmp_path) assert result.completed is False @@ -72,7 +72,7 @@ def test_runner_http_error(tmp_path): r = MagicMock() r.status_code = 500 r.text = "server error" - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "500" in result.error @@ -83,7 +83,7 @@ def test_runner_parse_error_on_malformed_content(tmp_path): r = MagicMock() r.status_code = 200 r.json.return_value = {"message": {"content": "not json at all"}} - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "parse" in result.error.lower() @@ -96,7 +96,7 @@ def test_runner_namespaces_rule_id(tmp_path): "title": "t", "message": "m"}, {"file": "y.py", "rule_id": "raw-rule", "severity": "low", "title": "t", "message": "m"}, ]} - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): result = gemma_runner.run(tmp_path) rule_ids = {r["ruleId"] for r in result.sarif["runs"][0]["results"]} assert "gemma.raw-rule" in rule_ids @@ -117,7 +117,7 @@ def _capture(*args, **kwargs): captured["body"] = kwargs["json"] return _ollama_resp({"findings": []}) - with patch("secscan.runners.gemma.requests.post", side_effect=_capture): + with patch("security_scan.runners.gemma.requests.post", side_effect=_capture): gemma_runner.run(tmp_path, max_files=3, max_file_bytes=1000, max_total_bytes=5000) user_msg = next(m["content"] for m in captured["body"]["messages"] if m["role"] == "user") @@ -142,7 +142,7 @@ def _capture(*args, **kwargs): captured["body"] = kwargs["json"] return _ollama_resp({"findings": []}) - with patch("secscan.runners.gemma.requests.post", side_effect=_capture): + with patch("security_scan.runners.gemma.requests.post", side_effect=_capture): gemma_runner.run(tmp_path) user_msg = next(m["content"] for m in captured["body"]["messages"] if m["role"] == "user") @@ -159,7 +159,7 @@ def test_runner_drops_findings_without_file(tmp_path): {"file": "", "rule_id": "no-path", "severity": "low", "title": "t", "message": "m"}, {"file": "src/auth.py", "rule_id": "ok", "severity": "low", "title": "t", "message": "m"}, ]} - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): result = gemma_runner.run(tmp_path) paths = [r["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] for r in result.sarif["runs"][0]["results"]] @@ -171,7 +171,7 @@ def test_runner_findings_not_a_list_is_failure(tmp_path): r = MagicMock() r.status_code = 200 r.json.return_value = {"message": {"content": json.dumps({"findings": "not a list"})}} - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "schema" in result.error.lower() diff --git a/tests/test_github.py b/tests/test_github.py index 4b2dfd6..81e09b9 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -7,7 +7,7 @@ import pytest import requests -from secscan.github import GitHub, GitHubError, ProjectField +from security_scan.github import GitHub, GitHubError, ProjectField TOKEN = "ghp_supersecrettoken_abcdef123456" @@ -31,7 +31,7 @@ def _gh(dry_run=False): def test_clone_shallow_uses_depth_1(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo", shallow=True) args = m.call_args.args[0] assert args[0] == "git" @@ -45,7 +45,7 @@ def test_clone_shallow_uses_depth_1(tmp_path): def test_clone_full_omits_depth(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo", shallow=False) args = m.call_args.args[0] assert "--depth=1" not in args @@ -55,7 +55,7 @@ def test_clone_url_has_no_credentials(tmp_path): """The clone URL must not embed the token — git would persist it into .git/config.""" gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo") args = m.call_args.args[0] url = next(a for a in args if a.startswith("https://")) @@ -69,7 +69,7 @@ def test_clone_passes_token_via_one_shot_config(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo") args = m.call_args.args[0] assert "-c" in args @@ -88,7 +88,7 @@ def test_clone_scrubs_token_from_error(tmp_path): gh = _gh() leaky = f"fatal: could not read from https://x-access-token:{TOKEN}@github.com/leverj/ezel.git" completed = MagicMock(returncode=128, stdout="", stderr=leaky) - with patch("secscan.github.subprocess.run", return_value=completed): + with patch("security_scan.github.subprocess.run", return_value=completed): with pytest.raises(GitHubError) as ei: gh.clone("dev", tmp_path / "repo") assert TOKEN not in str(ei.value) @@ -102,14 +102,14 @@ def test_create_issue_posts_correct_payload(): created = {"id": 9001, "node_id": "I_xxx", "number": 42, "title": "t", "body": "b", "html_url": "u", "state": "open"} resp = _resp(201, json_body=created, headers={}) with patch.object(requests.Session, "request", return_value=resp) as m: - out = gh.create_issue("t", "b", labels=["security", "secscan"]) + out = gh.create_issue("t", "b", labels=["security", "security_scan"]) assert out == created call = m.call_args method = call.args[0] if call.args else call.kwargs["method"] url = call.args[1] if len(call.args) > 1 else call.kwargs["url"] assert method == "POST" assert url == "https://api.github.com/repos/leverj/ezel/issues" - assert call.kwargs["json"] == {"title": "t", "body": "b", "labels": ["security", "secscan"]} + assert call.kwargs["json"] == {"title": "t", "body": "b", "labels": ["security", "security_scan"]} assert gh.session.headers["Authorization"] == f"Bearer {TOKEN}" assert gh.session.headers["Accept"] == "application/vnd.github+json" assert gh.session.headers["X-GitHub-Api-Version"] == "2022-11-28" @@ -318,7 +318,7 @@ def test_set_project_field_calls_update_mutation(): def test_set_project_field_unknown_option_is_noop(): - """If the user renamed an option, secscan must not crash — silently skip.""" + """If the user renamed an option, security_scan must not crash — silently skip.""" gh = _gh() field = ProjectField(id="FID", options={"critical": "o-crit"}) with patch.object(requests.Session, "request") as m: @@ -354,7 +354,7 @@ def test_retry_on_500(): gh = _gh() bad = _resp(500, json_body={"message": "boom"}) good = _resp(201, json_body={"id": 1, "node_id": "I_x", "number": 1, "title": "t", "body": "b", "html_url": "u", "state": "open"}) - with patch("secscan.github.time.sleep") as sl, \ + with patch("security_scan.github.time.sleep") as sl, \ patch.object(requests.Session, "request", side_effect=[bad, good]) as m: gh.create_issue("t", "b") assert m.call_count == 2 @@ -371,7 +371,7 @@ def test_rate_limit_waits_and_retries(): headers={"X-RateLimit-Remaining": "0", "X-RateLimit-Reset": str(reset_at)}, ) good = _resp(201, json_body={"id": 1, "node_id": "I_x", "number": 1, "title": "t", "body": "b", "html_url": "u", "state": "open"}) - with patch("secscan.github.time.sleep") as sl, \ + with patch("security_scan.github.time.sleep") as sl, \ patch.object(requests.Session, "request", side_effect=[limited, good]) as m: gh.create_issue("t", "b") assert m.call_count == 2 diff --git a/tests/test_main.py b/tests/test_main.py index 1b1648b..1656b88 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -4,7 +4,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -12,8 +12,8 @@ SlackConfig, TriageConfig, ) -from secscan.github import ProjectContext, ProjectField -from secscan.runners import RunnerResult +from security_scan.github import ProjectContext, ProjectField +from security_scan.runners import RunnerResult def _cfg(tmp_path, **kw): @@ -115,7 +115,7 @@ def create(title, body, labels=None): def test_e2e_dry_run_creates_no_issues(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) @@ -123,10 +123,10 @@ def test_e2e_dry_run_creates_no_issues(tmp_path): fake_gh = _fresh_gh(dry_run=True) results = _scanner_results() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=results["osv"]) as o, \ - patch("secscan.runners.gitleaks.run", return_value=results["gitleaks"]) as gl, \ - patch("secscan.runners.semgrep.run", return_value=results["semgrep"]) as sg: + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=results["osv"]) as o, \ + patch("security_scan.runners.gitleaks.run", return_value=results["gitleaks"]) as gl, \ + patch("security_scan.runners.semgrep.run", return_value=results["semgrep"]) as sg: fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) @@ -139,7 +139,7 @@ def test_e2e_dry_run_creates_no_issues(tmp_path): def test_e2e_creates_issues_when_not_dry_run(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) @@ -147,10 +147,10 @@ def test_e2e_creates_issues_when_not_dry_run(tmp_path): fake_gh = _fresh_gh(dry_run=False) results = _scanner_results() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=results["osv"]), \ - patch("secscan.runners.gitleaks.run", return_value=results["gitleaks"]), \ - patch("secscan.runners.semgrep.run", return_value=results["semgrep"]): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=results["osv"]), \ + patch("security_scan.runners.gitleaks.run", return_value=results["gitleaks"]), \ + patch("security_scan.runners.semgrep.run", return_value=results["semgrep"]): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -163,17 +163,17 @@ def test_e2e_creates_issues_when_not_dry_run(tmp_path): def test_failed_scanner_does_not_block_others(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", None, False, "binary not found")), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", None, False, "binary not found")), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -182,17 +182,17 @@ def test_failed_scanner_does_not_block_others(tmp_path): def test_all_scanners_fail_returns_error(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", None, False, "x")), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", None, False, "x")), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", None, False, "x")): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", None, False, "x")), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", None, False, "x")), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", None, False, "x")): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -202,15 +202,15 @@ def test_all_scanners_fail_returns_error(tmp_path): def test_repo_dir_is_wiped_even_when_work_dir_provided(tmp_path): """Security: the clone must be removed even when the caller supplied --work-dir.""" - from secscan.main import run + from security_scan.main import run cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=False) @@ -220,15 +220,15 @@ def test_repo_dir_is_wiped_even_when_work_dir_provided(tmp_path): def test_keep_work_preserves_clone(tmp_path): - from secscan.main import run + from security_scan.main import run cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=True) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) @@ -236,17 +236,17 @@ def test_keep_work_preserves_clone(tmp_path): def test_severity_floor_skips_low_findings(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path, severity_floor="critical") # only critical fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) diff --git a/tests/test_models.py b/tests/test_models.py index 5a8abf0..5fca28a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,4 +1,4 @@ -from secscan.models import Finding, normalize_severity +from security_scan.models import Finding, normalize_severity def test_normalize_severity_from_security_severity_score(): diff --git a/tests/test_new_scanners.py b/tests/test_new_scanners.py index edfb309..3a4e2b3 100644 --- a/tests/test_new_scanners.py +++ b/tests/test_new_scanners.py @@ -4,10 +4,10 @@ from pathlib import Path from unittest.mock import patch -from secscan.normalize import normalize_sarif -from secscan.runners import syft as syft_runner -from secscan.runners import trivy as trivy_runner -from secscan.runners import trufflehog as trufflehog_runner +from security_scan.normalize import normalize_sarif +from security_scan.runners import syft as syft_runner +from security_scan.runners import trivy as trivy_runner +from security_scan.runners import trufflehog as trufflehog_runner FIXTURES = Path(__file__).parent / "fixtures" @@ -26,14 +26,14 @@ def _completed(rc=0, stdout="", stderr=""): def test_trivy_runner_happy_path(tmp_path): sarif = (FIXTURES / "sarif" / "trivy.json").read_text() - with patch("secscan.runners.subprocess.run", return_value=_completed(0, sarif, "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, sarif, "")): result = trivy_runner.run(tmp_path) assert result.completed and result.sarif is not None assert result.scanner == "trivy" def test_trivy_cmd_includes_all_scanners(tmp_path): - with patch("secscan.runners.subprocess.run", return_value=_completed(0, "{}", "")) as m: + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, "{}", "")) as m: trivy_runner.run(tmp_path, exclude=["vendor/"]) cmd = m.call_args.args[0] # Joined --scanners value @@ -73,7 +73,7 @@ def test_trivy_exclude_filter(tmp_path): def test_trivy_binary_not_found(tmp_path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError("trivy")): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError("trivy")): result = trivy_runner.run(tmp_path) assert not result.completed assert "binary not found" in (result.error or "") @@ -84,7 +84,7 @@ def test_trivy_binary_not_found(tmp_path): def test_trufflehog_runner_wraps_jsonl(tmp_path): jsonl = (FIXTURES / "trufflehog.jsonl").read_text() - with patch("secscan.runners.subprocess.run", return_value=_completed(0, jsonl, "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, jsonl, "")): result = trufflehog_runner.run(tmp_path) assert result.completed assert isinstance(result.sarif, dict) @@ -134,7 +134,7 @@ def test_trufflehog_skips_unparseable_lines(tmp_path, capsys): def test_trufflehog_exit_code_nonzero_is_failure(tmp_path): - with patch("secscan.runners.subprocess.run", return_value=_completed(2, "", "config error")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(2, "", "config error")): result = trufflehog_runner.run(tmp_path) assert not result.completed assert "exit 2" in (result.error or "") @@ -157,7 +157,7 @@ def fake_run(cmd, **kw): p.stderr = "" return p - with patch("secscan.runners.subprocess.run", side_effect=fake_run): + with patch("security_scan.runners.subprocess.run", side_effect=fake_run): result = syft_runner.run(tmp_path, output_path=sbom_path) assert result.completed meta = result.sarif["_syft_sbom"] @@ -168,13 +168,13 @@ def fake_run(cmd, **kw): def test_syft_runner_failure_missing_output(tmp_path): sbom_path = tmp_path / "should-not-exist.json" - with patch("secscan.runners.subprocess.run", return_value=_completed(0, "", "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, "", "")): result = syft_runner.run(tmp_path, output_path=sbom_path) assert not result.completed def test_syft_binary_not_found(tmp_path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError("syft")): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError("syft")): result = syft_runner.run(tmp_path, output_path=tmp_path / "x.json") assert not result.completed assert "binary not found" in (result.error or "") diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 1619f18..e791fd0 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -3,8 +3,8 @@ import pytest -from secscan.fingerprint import resolve_fingerprint -from secscan.normalize import normalize_sarif +from security_scan.fingerprint import resolve_fingerprint +from security_scan.normalize import normalize_sarif FIXTURES = Path(__file__).parent / "fixtures" / "sarif" diff --git a/tests/test_notify.py b/tests/test_notify.py index c45378e..6d73466 100644 --- a/tests/test_notify.py +++ b/tests/test_notify.py @@ -1,9 +1,9 @@ from unittest.mock import MagicMock, patch -from secscan.config import SlackConfig -from secscan.models import Finding -from secscan.notify import _default_digest, post_digest -from secscan.sync import SyncResult +from security_scan.config import SlackConfig +from security_scan.models import Finding +from security_scan.notify import _default_digest, post_digest +from security_scan.sync import SyncResult def _f(sev): @@ -12,7 +12,7 @@ def _f(sev): def test_disabled_slack_is_noop(monkeypatch): slack = SlackConfig(enabled=False) - monkeypatch.setattr("secscan.notify.requests.post", lambda *a, **kw: (_ for _ in ()).throw(AssertionError("called"))) + monkeypatch.setattr("security_scan.notify.requests.post", lambda *a, **kw: (_ for _ in ()).throw(AssertionError("called"))) assert post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) is False @@ -20,12 +20,12 @@ def test_webhook_called_with_text(monkeypatch): monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: ok = post_digest(slack, [_f("high")], SyncResult(created=[{"number": 1}]), "o/n", "main", "owner", 42) assert ok is True args, kwargs = mp.call_args assert args[0] == "https://hooks.slack.test/x" - assert "secscan" in kwargs["json"]["text"] + assert "security_scan" in kwargs["json"]["text"] def test_webhook_missing_env_returns_false(monkeypatch, capsys): @@ -40,7 +40,7 @@ def test_chat_postmessage_used_when_channel_set(monkeypatch): slack = SlackConfig(enabled=True, channel_id_env="SLACK_CHANNEL_ID", bot_token_env="SLACK_BOT_TOKEN") resp = MagicMock(status_code=200) resp.json.return_value = {"ok": True} - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: ok = post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) assert ok is True assert mp.call_args.args[0] == "https://slack.com/api/chat.postMessage" @@ -97,7 +97,7 @@ def test_default_digest_below_floor_only_says_so(): def test_default_digest_groups_by_category(): - from secscan.models import Finding + from security_scan.models import Finding findings = [ Finding("trivy", "dependency", "CVE-2024-1", "critical", "package-lock.json", 1, "t", "m", extra={"package": "left-pad", "installed_version": "1.0.0", @@ -123,7 +123,7 @@ def test_default_digest_groups_by_category(): def test_one_liner_does_not_repeat_rule_id_when_package_extra_missing(): """OSV often leaves extras empty; the message text has the package name. Don't render '`CVE-X` · `CVE-X` · no fix' — that's noise.""" - from secscan.models import Finding + from security_scan.models import Finding f = Finding( "osv", "dependency", "CVE-2026-33169", "medium", "Gemfile.lock", 1, "title", @@ -142,7 +142,7 @@ def test_one_liner_does_not_repeat_rule_id_when_package_extra_missing(): def test_default_digest_caps_per_section(): - from secscan.models import Finding + from security_scan.models import Finding findings = [ Finding("semgrep", "sast", f"rule-{i}", "medium", "f.js", i, f"t{i}", "m") for i in range(10) @@ -162,7 +162,7 @@ def test_intro_is_prepended_to_structured_digest(monkeypatch): slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) actionable = [_f("high"), _f("medium")] - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: post_digest( slack, actionable, SyncResult(created=[{"n": 1}, {"n": 2}], created_findings=actionable), @@ -180,7 +180,7 @@ def test_digest_text_legacy_param_still_overrides(monkeypatch): monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: post_digest(slack, [_f("high")], SyncResult(), "o/n", "main", "owner", 9, digest_text="exact replacement") assert mp.call_args.kwargs["json"]["text"] == "exact replacement" @@ -190,6 +190,6 @@ def test_failure_is_non_blocking(monkeypatch): import requests monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") - with patch("secscan.notify.requests.post", side_effect=requests.ConnectionError("down")): + with patch("security_scan.notify.requests.post", side_effect=requests.ConnectionError("down")): ok = post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) assert ok is False # didn't raise diff --git a/tests/test_resolve_rules.py b/tests/test_resolve_rules.py index 9d2ca3d..d954250 100644 --- a/tests/test_resolve_rules.py +++ b/tests/test_resolve_rules.py @@ -8,7 +8,7 @@ from pathlib import Path from unittest.mock import patch -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -16,7 +16,7 @@ SlackConfig, TriageConfig, ) -from secscan.main import _has_rule_files, _resolve_semgrep_rules +from security_scan.main import _has_rule_files, _resolve_semgrep_rules def _cfg(rules=None): @@ -68,7 +68,7 @@ def test_resolver_skips_empty_rules_mount_falls_through_to_bundled(tmp_path: Pat bundled.mkdir() (bundled / "r.yaml").write_text("rules: []") - with patch("secscan.main.Path") as P: + with patch("security_scan.main.Path") as P: # `Path("/rules")` -> empty mount; bundled discovered via __file__ parent / "rules" def fake_path(arg): if arg == "/rules": @@ -76,7 +76,7 @@ def fake_path(arg): return Path(arg) P.side_effect = fake_path # Make `Path(__file__).parent / "rules"` resolve to our bundled stub. - monkeypatch.setattr("secscan.main.__file__", str(bundled / "main.py")) + monkeypatch.setattr("security_scan.main.__file__", str(bundled / "main.py")) # Re-patching Path through to real Path for the parent / "rules" computation # is fiddly; instead, call the resolver but verify behavior through _has_rule_files. @@ -90,7 +90,7 @@ def test_resolver_returns_auto_when_nothing_has_rules(tmp_path: Path, monkeypatc the resolver falls back to 'auto'.""" no_rules_pkg = tmp_path / "pkg" no_rules_pkg.mkdir() - monkeypatch.setattr("secscan.main.__file__", str(no_rules_pkg / "main.py")) + monkeypatch.setattr("security_scan.main.__file__", str(no_rules_pkg / "main.py")) # Force the /rules check to fail (typical host system has no /rules) # by relying on the real filesystem; if /rules exists on the test host that's still # fine because it would have to contain *.yaml/yml/json to count. diff --git a/tests/test_runners.py b/tests/test_runners.py index 271e3c8..536a10c 100644 --- a/tests/test_runners.py +++ b/tests/test_runners.py @@ -9,10 +9,10 @@ import pytest -from secscan.runners import RunnerResult, _run -from secscan.runners import gitleaks as gitleaks_runner -from secscan.runners import osv as osv_runner -from secscan.runners import semgrep as semgrep_runner +from security_scan.runners import RunnerResult, _run +from security_scan.runners import gitleaks as gitleaks_runner +from security_scan.runners import osv as osv_runner +from security_scan.runners import semgrep as semgrep_runner TINY_SARIF = { "version": "2.1.0", @@ -56,7 +56,7 @@ def _assert_no_execute_verbs(cmd: list[str]) -> None: # --- _run -------------------------------------------------------------------- def test_run_invokes_subprocess_with_cwd(tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, "hello", "") rc, out, err = _run(["echo", "hi"], cwd=tmp_path) assert (rc, out, err) == (0, "hello", "") @@ -78,7 +78,7 @@ def test_run_invokes_subprocess_with_cwd(tmp_path: Path): ], ) def test_runner_exit_zero_returns_parsed_sarif(module, kwargs, scanner, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: result: RunnerResult = module.run(tmp_path, **kwargs) assert result.completed is True assert result.scanner == scanner @@ -107,7 +107,7 @@ def test_runner_exit_zero_returns_parsed_sarif(module, kwargs, scanner, tmp_path def test_runner_vulns_found_exit_code_is_success( module, kwargs, scanner, vuln_rc, tmp_path: Path ): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(vuln_rc, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(vuln_rc, TINY_SARIF_JSON, "")) as m: result = module.run(tmp_path, **kwargs) assert m.called assert result.completed is True @@ -127,7 +127,7 @@ def test_runner_vulns_found_exit_code_is_success( ], ) def test_runner_binary_not_found(module, kwargs, binary_name, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError(binary_name)): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError(binary_name)): result = module.run(tmp_path, **kwargs) assert result.completed is False assert result.sarif is None @@ -146,7 +146,7 @@ def test_runner_binary_not_found(module, kwargs, binary_name, tmp_path: Path): ], ) def test_runner_unexpected_exit_code_is_failure(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(99, "", "boom") result = module.run(tmp_path, **kwargs) assert result.completed is False @@ -165,7 +165,7 @@ def test_runner_unexpected_exit_code_is_failure(module, kwargs, tmp_path: Path): ], ) def test_runner_unparseable_json_is_failure(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, "not json at all <<<", "")): + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, "not json at all <<<", "")): result = module.run(tmp_path, **kwargs) assert result.completed is False assert result.sarif is None @@ -184,7 +184,7 @@ def test_runner_unparseable_json_is_failure(module, kwargs, tmp_path: Path): ], ) def test_runner_cmd_has_no_execute_verbs(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") module.run(tmp_path, **kwargs) cmd = m.call_args.args[0] @@ -202,7 +202,7 @@ def test_runner_cmd_has_no_execute_verbs(module, kwargs, tmp_path: Path): ], ) def test_runner_subprocess_cwd_is_set(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") module.run(tmp_path, **kwargs) cwd = m.call_args.kwargs.get("cwd") @@ -216,7 +216,7 @@ def test_osv_does_not_pass_paths_to_ignore(tmp_path: Path): """osv-scanner's exclude flag name varies by version (and is unsupported on 1.9.2). We rely on post-hoc filtering in normalize.py instead — assert the flag is never passed even when excludes are configured.""" - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") osv_runner.run(tmp_path, exclude=["vendor/", "archive/"]) cmd = m.call_args.args[0] @@ -228,7 +228,7 @@ def test_osv_does_not_pass_paths_to_ignore(tmp_path: Path): # --- semgrep-specific: excludes + config wired in --------------------------- def test_semgrep_passes_config_and_excludes(tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") semgrep_runner.run(tmp_path, rules_dir="/rules", exclude=["archive/", "vendor/"]) cmd = m.call_args.args[0] @@ -244,7 +244,7 @@ def test_semgrep_passes_config_and_excludes(tmp_path: Path): def test_gitleaks_writes_report_to_tempfile_in_root(tmp_path: Path): """v8 ignores `--report-path -` (silently writes 0 bytes to stdout). We must pass a real file path inside the scan root.""" - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: gitleaks_runner.run(tmp_path) cmd = m.call_args.args[0] assert "--report-format" in cmd @@ -267,7 +267,7 @@ def _capture(cmd, **kw): Path(cmd[idx + 1]).write_text(TINY_SARIF_JSON) return _fake_completed(0, "", "") - with patch("secscan.runners.subprocess.run", side_effect=_capture): + with patch("security_scan.runners.subprocess.run", side_effect=_capture): gitleaks_runner.run(tmp_path) assert not Path(captured_path["p"]).exists() @@ -276,7 +276,7 @@ def _capture(cmd, **kw): def test_gitleaks_accepts_any_exit_code_when_report_parses(rc, tmp_path: Path): """v7 used rc=77 for "leaks found"; v8 uses rc=1. We trust the SARIF parse, not the exit code: if the report file is valid SARIF the run was successful.""" - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(rc, TINY_SARIF_JSON, "")): + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(rc, TINY_SARIF_JSON, "")): result = gitleaks_runner.run(tmp_path) assert result.completed is True assert result.sarif == TINY_SARIF @@ -284,7 +284,7 @@ def test_gitleaks_accepts_any_exit_code_when_report_parses(rc, tmp_path: Path): def test_gitleaks_no_report_file_written_is_failure(tmp_path: Path): """Genuine failure: scanner didn't write the report. Empty/missing file -> error.""" - with patch("secscan.runners.subprocess.run", return_value=_fake_completed(1, "", "config error")): + with patch("security_scan.runners.subprocess.run", return_value=_fake_completed(1, "", "config error")): result = gitleaks_runner.run(tmp_path) assert result.completed is False assert "no SARIF report written" in (result.error or "") or "exit 1" in (result.error or "") diff --git a/tests/test_sync.py b/tests/test_sync.py index 90358a6..ff1d638 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -1,9 +1,9 @@ from unittest.mock import MagicMock -from secscan.fingerprint import inject_marker, resolve_fingerprint -from secscan.github import ProjectContext, ProjectField -from secscan.models import Finding -from secscan.sync import default_issue, sync +from security_scan.fingerprint import inject_marker, resolve_fingerprint +from security_scan.github import ProjectContext, ProjectField +from security_scan.models import Finding +from security_scan.sync import default_issue, sync def _project(): @@ -112,7 +112,7 @@ def test_marker_is_always_injected_on_created_body(): gh = _gh(existing=[]) sync([f], gh, _project()) body = gh.create_issue.call_args.args[1] if gh.create_issue.call_args.args else gh.create_issue.call_args.kwargs["body"] - assert " ``` -`github.py` lists **all** sub‑issues of the parent (state=all), parses these markers, and builds the set of already‑filed fingerprints. +The parser also accepts the legacy `` marker (issues filed by the +pre‑rename code) so dedup against pre‑existing items still works without backfill. + +`github.py` lists **all** items on the Projects v2 board (any state), parses these +markers from each item's issue body, and builds the set of already‑filed fingerprints. --- ## 6. Create‑decision logic (`sync.py`) -``` -existing_fps = { marker.fp for issue in github.list_subissues(parent, state="all") - if marker := parse_marker(issue.body) } +```python +existing_items = github.list_project_items(project.id) # paginated GraphQL +existing_fps = {marker.fp for it in existing_items if (marker := parse_marker(it.body))} for f in findings: - fp = f.sarif_fingerprint or compute_fingerprint(f) + if not f.meets_floor(severity_floor): + result.skipped_floor += 1 + continue + fp = f.sarif_fingerprint or compute_fingerprint(f) if fp in existing_fps: - continue # already filed (open OR closed) -> never re-file + result.skipped_dup += 1 + continue - # OPTIONAL fuzzy tie-break (only if Gemma available): catch renamed/moved code that - # changed file_path (and thus fp). Ask Gemma: does this finding match any existing - # issue's (rule + snippet) at a different path with high confidence? - if triage.enabled and triage.is_duplicate_of_existing(f, existing_issues): + # OPTIONAL fuzzy tie-break (only if Gemma triage available): catch renamed/moved + # code that changed file_path (and thus fp). + if triage.enabled and triage.is_duplicate_of_existing(f, existing_items): + result.skipped_fuzzy_dup += 1 continue title, body = triage.write_issue(f) if triage.enabled else default_issue(f) - body = inject_marker(body, fp, f) # always inject the deterministic marker - issue = github.create_issue(title, body) # create-only - github.link_subissue(parent, issue) - existing_fps.add(fp) # avoid intra-run dupes + body = inject_marker(body, fp, f) # marker always injected by code + issue = github.create_issue(title, body, labels=_labels_for(f)) + item_id = github.add_to_project(project.id, issue["node_id"]) + github.set_project_field(project.id, item_id, project.severity, f.severity) + github.set_project_field(project.id, item_id, project.category, f.category) + existing_fps.add(fp) ``` **Invariants (enforced in `github.py`, not trusted to the model):** -- Create and link only. **No** edit/close/reopen/delete of issues. +- Create + add‑to‑project + set‑field only. **No** edit/close/reopen/delete of issues. - The deterministic marker is always injected by code, even if Gemma wrote the prose. - Never write a raw secret into a body — only `masked_preview`. -- A scanner that did **not** run/complete contributes **no** findings (so a crashed scanner can't look like "all clear"). +- A scanner that did **not** run/complete contributes **no** findings (so a crashed + scanner can't look like "all clear"). --- -## 7. Config schema (`config.yaml`, mounted read‑only) +## 7. Config schema (`config/config.yaml`, the config dir is bind‑mounted read‑only) ```yaml -repo: "leverj/ezel" # owner/name -ref: "dev" # branch -parent_issue: 451 # user creates this; tool files sub-issues under it -github_token_env: "GITHUB_TOKEN" # name of env var holding the PAT (value never in config) +repo: "leverj/ezel" +ref: "dev" + +project: # the GitHub Projects v2 board findings file into + owner: "leverj" # org or user + number: 5 # project number from the URL: /projects/ + +github_token_env: "GITHUB_TOKEN" # env var holding the PAT (value NEVER in config.yaml) -scanners: # which to run; auto-skipped if stack not present +scanners: osv: true gitleaks: true semgrep: true + trivy: true + trufflehog: true + syft: true # SBOM artifact (no project items filed) + codex: false # OPTIONAL — OpenAI Codex via subscription + gemma: false # OPTIONAL — local Gemma via Ollama + +codex: # tunables for the codex runner + binary: "codex" + model: null # null => use codex CLI's configured default + timeout: 1200 + +gemma: # tunables for the gemma scanner (falls back to triage:* when null) + base_url: null + model: null + keep_alive: null + timeout: 1800 + max_files: 60 # cap to keep prompt size bounded + max_file_bytes: 12000 + max_total_bytes: 200000 + +cross_validate: # only active when both scanners.codex AND scanners.gemma are true + enabled: true + codex_timeout: 300 + gemma_timeout: 180 paths: - exclude: ["archive/", "vendor/", ".github/scripts/"] # globs skipped everywhere + exclude: ["archive/", "vendor/", ".github/scripts/"] -severity_floor: "low" # don't file below this (info-only by default) +severity_floor: "low" # info | low | medium | high | critical -triage: # all optional +triage: # optional Gemma triage (issue prose / fuzzy dedup / Slack intro) enabled: false provider: "ollama" model: "gemma4:26b" base_url: "http://host.docker.internal:11434" keep_alive: "5m" + prewarm: true + intro_timeout: 120 + intro_enabled: true + prose_enabled: false + fuzzy_dup_enabled: false slack: enabled: false - channel_id_env: "SLACK_CHANNEL_ID" # or a webhook URL via env + webhook_url_env: "SLACK_WEBHOOK_URL" # OR channel_id_env + bot_token_env ``` -Token and any Slack secret arrive via **env** (the container reads `os.environ[...]`), never written into `config.yaml`. 1Password / Docker secrets can populate those env vars on the host. +Token and any Slack secret arrive via **env** (the container reads `os.environ[...]`), +never written into `config.yaml`. 1Password / Docker secrets can populate those env +vars on the host. + +The whole **`config/` directory** is the bind‑mount unit. A `secrets.source: 1password` +setup keeps the `.env.1password.tpl` file inside the same directory so it rides along. --- ## 8. Stack detection (`detect.py`) -1. **Manifest walk (primary, zero‑API, reliable):** walk the cloned tree (honoring `paths.exclude`) for manifests/lockfiles and map to scanners + ecosystems: +1. **Manifest walk (primary, zero‑API, reliable):** walk the cloned tree (honoring + `paths.exclude`) for manifests/lockfiles and map to scanners + ecosystems: - `package.json` + `package-lock.json` | `yarn.lock` | `pnpm-lock.yaml` → npm/yarn/pnpm (OSV) - - `Gemfile.lock` → RubyGems (OSV); `Package.resolved` → SwiftPM (OSV) + - `Gemfile.lock` → RubyGems (OSV); `Package.resolved` → SwiftPM (OSV) - `requirements.txt` | `poetry.lock` | `Pipfile.lock` → pip (OSV) - - `go.mod`/`go.sum` → Go (OSV); `Cargo.lock` → Rust (OSV) + - `go.mod`/`go.sum` → Go (OSV); `Cargo.lock` → Rust (OSV) - any source files → Semgrep (its own language autodetect); whole tree → Gitleaks -2. **GitHub Linguist cross‑check (optional hint):** `GET /repos/{o}/{r}/languages` as a sanity check that the walk didn't miss a language. Do **not** rely on it as the only source (it misses ecosystems/lockfiles and odd monorepo layouts). -3. Stacks with no available scanner → printed as "detected, no scanner" and skipped (don't fail the run). +2. **Whole‑tree scanners** (Trivy, Trufflehog, Syft) run once on the repo root, no manifest gating. +3. **Framework detection** — currently surfaces `supabase` when `supabase/config.toml` + exists or `@supabase/supabase-js` is in any `package.json`. Used to enable the + Supabase Semgrep rule pack. +4. **LLM scanners** (codex, gemma) run only when there's at least one recognized source file. +5. Stacks with no available scanner → printed as "detected, no scanner" and skipped (don't fail the run). + +Handles monorepos: there can be many manifests in many dirs. + +--- -Handles monorepos: there can be many manifests in many dirs (e.g. `ezel` had npm in 5 locations + Swift + RubyGems). +## 9. Scanners (`runners/`) — all emit SARIF (or are normalized to it); none execute repo code + +- **OSV‑Scanner** — `osv-scanner --format sarif --recursive ` (parses lockfiles; + no install). Covers npm/yarn/pnpm, RubyGems, SwiftPM, pip, Go, Cargo from one tool. +- **Gitleaks** — `gitleaks detect --report-format sarif --source ` (git‑history + aware; emits a per‑secret fingerprint). +- **Semgrep** — `semgrep scan --config --sarif --metrics=off …` (static; + bundled rule packs include `javascript`, `python`, `secrets`, `xss`, `sqli`, `supabase`). +- **Trivy** — `trivy ` against the cloned tree; SARIF output; + multi‑category normalization in `normalize.py`. +- **Trufflehog** — JSONL output (not SARIF), normalized in `normalize.py`. `--only-verified` + surfaces secrets the scanner validated live against the vendor (CWE‑798 critical). +- **Syft** — produces a CycloneDX SBOM JSON written to `/work/`. No project items filed; + the runner's "SARIF" is a tiny metadata wrapper so the orchestrator can log + reference it. +- **Codex** (optional) — `codex exec -s read-only --output-schema schema.json -o out.json …` + with a strict JSON output contract. Subscription auth (`codex login`); no API key. + `extra["scanner"] = "codex"`; rule_ids namespaced `codex.`. +- **Gemma** (optional) — Ollama `/api/chat` with `format=json`, batched source files + (capped by file count + per‑file bytes + total bytes). Same JSON contract as codex. + `extra["scanner"] = "gemma"`; rule_ids namespaced `gemma.`. + +Pin scanner versions (in the Dockerfile) so "new vs resolved" diffing isn't polluted by +the scanners themselves changing. Each runner returns SARIF JSON (or `None` + a "did +not complete" flag — which must keep that category out of any future close logic the +external system builds). --- -## 9. Scanners (`runners/`) — all emit SARIF, never execute repo code +## 10. Gemma 4 triage (`triage.py`) — optional, guard‑railed + +Distinct from the **gemma scanner** (which produces findings). Triage is post‑processing: -- **OSV‑Scanner** — `osv-scanner --format sarif --recursive ` (parses lockfiles; no install). Covers npm/yarn/pnpm, RubyGems, SwiftPM, pip, Go, Cargo from one tool. -- **Gitleaks** — `gitleaks detect --report-format sarif --source ` (git‑history aware; emits a per‑secret fingerprint). -- **Semgrep** — `semgrep scan --config --sarif --metrics=off --exclude archive` (static; bundle the JS/TS/React + Swift/iOS + Android rules from `ezel_scan.py` so no network rule fetch). +1. **Fuzzy dedup tie‑break** — for findings whose deterministic fp is new, decide if it's + actually a renamed/moved version of an existing item. (Off by default; + `triage.fuzzy_dup_enabled`.) +2. **Prose** — draft issue title/body. (Off by default; `triage.prose_enabled`.) +3. **Slack intro** — one short framing sentence prepended to the deterministic per‑category + Slack digest. (On by default; `triage.intro_enabled`.) -Pin scanner versions (in the Dockerfile) so "new vs resolved" diffing isn't polluted by the scanners themselves changing. Each runner returns SARIF JSON (or `None` + a "did not complete" flag — which must keep that category out of any future close logic the external system builds). +Guardrails (in code, not the prompt): validate every JSON response against its schema and +fall back to deterministic output on malformed responses; feed only the scanner's factual +fields (never invent fix versions); the deterministic marker + masked previews are injected +by code regardless of what the model returns. If Ollama is unreachable, the run still +completes — every Gemma path has a deterministic fallback. --- -## 10. Gemma 4 triage (`triage.py`) — optional, guard‑railed +## 11. Cross‑validation (`cross_validate.py`) — optional, off unless both LLM scanners enabled -Talks to Ollama (`/api/chat` with `tools` for native function calling; `keep_alive` so the ~16 GB model loads only during the run and frees ~5 min after). Three jobs, all additive: +When `scanners.codex` AND `scanners.gemma` are both true: -1. **Fuzzy dedup tie‑break** — for findings whose deterministic fp is new, decide if it's actually a renamed/moved version of an existing issue (returns an existing issue number or "new"). Must cite the finding it's judging. -2. **Prioritization / context** — order findings, add a one‑line "why this matters" using only the scanner's factual fields. Must **not** lower severity below `severity_floor` without an explicit flagged reason. -3. **Prose** — draft issue title/body and the Slack digest text. +1. For every Codex finding → ask Gemma (via Ollama): "real / false_positive / uncertain + + brief reason". +2. For every Gemma finding → ask Codex (via subprocess): same prompt. +3. Annotate `finding.extra["cross_validation"]` with the verdict + reason. +4. If verdict is `false_positive`: downgrade severity one notch (`high → medium`, + `medium → low`, `low → info`). **`critical` is asymmetric — it NEVER auto‑downgrades.** + The cost of missing a real critical is higher than the cost of one noisy critical + in the board. +5. **Findings are NEVER suppressed.** Disagreement is surfaced via the annotation; + humans triage on the project board. -Guardrails (in code, not the prompt): validate every tool call against its JSON schema and reject/retry malformed ones; feed only the scanner's factual fields (never invent fix versions); the deterministic marker + masked previews are injected by code regardless of what the model returns. If Ollama is unreachable or `triage.enabled=false`, fall back to deterministic `default_issue()` templating — the run still completes. +If either validator is unreachable, the verdict for that direction is `uncertain` and +severity stays unchanged — never block the run on a validator failure. --- -## 11. Docker & secrets +## 12. Docker & secrets ``` -Dockerfile: python:3.x-slim + pinned osv-scanner, gitleaks, semgrep, git -Volumes: - /config (ro) -> config.yaml - /rules (ro) -> bundled semgrep rules (or baked into the image) - /work (rw) -> ephemeral per-run clone + scratch (can be tmpfs) +Dockerfile: python:3.11-slim + pinned osv-scanner, gitleaks, semgrep, trivy, trufflehog, syft, git + +Volumes (bind-mounted at runtime — no VOLUME directive, so anonymous volumes never +accumulate when --rm is used): + /config (ro) -> the user's whole config directory (config.yaml + .env.1password.tpl + …) + /rules (ro) -> optional override of the image-baked semgrep rules + /work (rw) -> ephemeral per-run clone + SBOM output (wiped each run) + Secrets: GITHUB_TOKEN, SLACK_* via env (docker run --env-file, Docker secret, or 1Password injection) -Entrypoint: python -m security-scan --config /config/config.yaml + +Entrypoint: + python -m security_scan --config /config/config.yaml --work-dir /work ``` -Stateless: the container holds no state between runs; everything durable is in GitHub Issues. The clone lives in `/work` and is wiped each run. Token file (if used instead of env) must be `600` and is never logged (mask in all output). + +Stateless: the container holds no state between runs; everything durable is in GitHub +Issues + the Projects v2 board. The clone lives in `/work` and is wiped each run. + +**Image manifest** (`/app/SECURITY-SCAN-MANIFEST.yaml`) — see §15. --- -## 12. Execution flow (`main.py`) +## 13. Execution flow (`main.py`) ``` -1. load config + token (fail fast if token missing / parent_issue unset) -2. shallow|full clone repo@ref into /work (full clone only if a history-secret scan is wanted) -3. detect stack -> list of (scanner, targets) -4. run each enabled+relevant scanner -> SARIF (record which completed) -5. normalize SARIF -> Findings ; drop paths in exclude ; drop < severity_floor -6. fingerprint each Finding -7. list parent's sub-issues (open+closed) -> existing fingerprints -8. for each new fingerprint: (optional Gemma fuzzy-dup check) -> create + link sub-issue -9. (optional) Gemma-written Slack digest -> post once -10. print a deterministic summary (created N, skipped M dup, scanners run/failed) +1. load config + token (fail fast if token missing / project unresolved) +2. shallow|full clone repo@ref into /work +3. resolve Projects v2 board (GraphQL); idempotently ensure Severity + Category single-select fields +4. detect stack -> list of (scanner, targets) +5. run each enabled+relevant scanner -> SARIF/JSON (record which completed) +6. normalize results -> Findings ; drop paths in exclude ; drop < severity_floor +7. if both codex AND gemma ran -> cross_validate.cross_validate(findings, …) +8. fingerprint each Finding (or use SARIF-supplied fingerprint) +9. list existing project items -> existing fingerprints +10. for each new fingerprint: + (optional Gemma fuzzy-dup check) -> create_issue + add_to_project + set Severity/Category +11. (optional) Slack digest (Gemma-written intro + deterministic per-category sections) +12. print a deterministic summary (created N, skipped M dup, scanners run/failed) ``` ---- +A scanner that did NOT complete contributes ZERO findings — so a crashed scanner never +reads as "all clear" to downstream tooling. -## 13. Test plan +--- -- **Unit:** fingerprint stability (same finding across line shifts → same fp; rename → different fp, caught by fuzzy pass); marker round‑trip (inject → parse); SARIF→Finding for one fixture per scanner; severity normalization; exclude‑path filtering; masked‑preview never contains the raw value. -- **Dedup logic:** given a fixture set of existing sub‑issues (open + closed) and a finding set, assert create‑only + never‑re‑file (closed fp ⇒ skipped). -- **Scanner integration:** run each scanner against a tiny synthetic repo with one planted issue each; assert SARIF parses and the finding surfaces. -- **Graceful degradation:** Ollama down → deterministic path still files issues; a scanner binary missing → that category skipped with a note, others unaffected. -- **End‑to‑end dry‑run:** `--dry-run` (no issue creation) prints what *would* be filed. Verify against a real repo before wiring the token. -- **Safety:** assert no `npm install`/`bundle install`/`pod install` is ever invoked; assert the token never appears in logs or issue bodies. +## 14. Test plan + +- **Unit:** fingerprint stability (same finding across line shifts → same fp; rename → + different fp, caught by fuzzy pass); marker round‑trip (inject → parse); legacy marker + compat; SARIF→Finding for one fixture per scanner; severity normalization; exclude‑path + filtering; masked‑preview never contains the raw value. +- **Dedup logic:** given a fixture set of existing project items (open + closed) and a + finding set, assert create‑only + never‑re‑file (closed fp ⇒ skipped). +- **Cross‑validation:** unit‑tested with mocked Ollama HTTP and mocked codex subprocess. + Verifies asymmetric downgrade (critical never), never‑suppress invariant, and graceful + degradation when a validator is unreachable. +- **GraphQL ops:** mocked `requests.Session` — resolve_project, list_project_items + (paginated), add_to_project, set_project_field; dry‑run path makes zero HTTP calls. +- **Scanner integration:** run each scanner against a tiny synthetic repo with one + planted issue each; assert SARIF parses and the finding surfaces. +- **Graceful degradation:** Ollama down → deterministic path still files issues; a + scanner binary missing → that category skipped with a note, others unaffected. +- **End‑to‑end dry‑run:** `--dry-run` (no issue creation) prints what *would* be filed. + Verifies the project resolution + listing path against a real board. +- **Safety:** assert no `npm install`/`bundle install`/`pod install` is ever invoked; + assert the token never appears in logs or issue bodies; codex sandbox is `read-only`; + raw secrets are never in issue bodies. --- -## 14. Build order for Claude Code (milestones) +## 15. Image manifest contract -1. `config.py` + `Finding` model + `fingerprint.py` (+ unit tests) — the deterministic core. -2. `github.py` (clone, list sub‑issues open+closed, create+link) with a `--dry-run`. -3. `runners/` + `normalize.py` for one scanner (Semgrep), end‑to‑end on a synthetic repo. -4. Add OSV‑Scanner + Gitleaks runners. -5. `detect.py` (manifest walk) + `sync.py` (create‑decision) → full deterministic pipeline. -6. Dockerfile + volumes + env secrets; dry‑run in container against a real repo. -7. `notify.py` (Slack) — optional. -8. `triage.py` (Gemma 4 via Ollama) — optional, last; everything must already work without it. +The image bakes `SECURITY-SCAN-MANIFEST.yaml` at `/app/SECURITY-SCAN-MANIFEST.yaml`. +Consumers read it without starting the scanner: + +```bash +docker run --rm --entrypoint cat \ + leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml +``` -Ship after step 6 as a working deterministic tool; 7–8 are additive. +Top‑level keys: + +| Key | Purpose | +|---|---| +| `version` | Image version (matches `pyproject.toml` and the git tag). | +| `config_schema_version` | Bumps only when the YAML schema changes in a breaking way. | +| `docker_image` | Full repo name (`leverj/security-scan`) for use by consumers. | +| `released` | Release date. | +| `changelog` | Short bullet list — surfaced verbatim to the user on the upgrade prompt. | +| `breaking_changes` | List of `{id, summary, user_action}` items requiring explicit user confirmation. | +| `config.new_fields` | Fields the consumer should ADD to a user's config.yaml when missing, with documented defaults. | +| `config.renamed_fields` | Fields the consumer should rename in place. May require user input where the rename isn't 1:1. | +| `config.removed_fields` | Fields the consumer should strip with confirmation. | +| `image_paths` | Documentation of where things live inside the image (mount targets, source). | + +The publish workflow (`.github/workflows/publish.yml`) refuses to push unless +`pyproject.toml`'s version and the manifest's version both match the git tag. This is +the contract that lets the consumer skill in `leverj/ai-skills` evolve in lockstep +with the image — schema migration is declared by the image, not coded into the skill. --- -## 15. Lineage & deferred roadmap +## 16. Build/release flow + +1. Develop on a feature branch; CI lints + tests + does a no‑push docker build on each PR. +2. Merge to `main`. +3. Tag a release: `git tag v0.X.Y && git push origin v0.X.Y`. +4. `publish.yml` builds multi‑arch (amd64 + arm64), tags `leverj/security-scan:vX.Y.Z` + + `:latest`, pushes to Docker Hub, and smoke‑tests the manifest is readable. +5. The companion skill in `leverj/ai-skills` (or any other consumer) sees the new tag, + fetches the candidate manifest, surfaces the changelog + migrations to the user, and + applies them on confirmation. + +Required repository secrets for the publish job: `DOCKERHUB_USERNAME`, `DOCKERHUB_TOKEN`. + +--- -- v1 generalizes the proven `ezel_scan.py` (stack detection, secret masking, conservative create‑only sub‑issue sync, bundled Semgrep rules) into a config‑driven, Dockerized, single‑repo tool. -- Deferred, in rough order: GitHub App auth → multi‑repo + parallelism (WAL or per‑repo state) → DAST/pen‑test lane (staging only, authorized targets) → DefectDojo/Dependency‑Track aggregation when correlating many tools/repos → CISO/GRC dashboard (CISO Assistant for compliance) as an always‑on backend the daily job feeds. +## 17. Lineage & deferred roadmap + +- v1 generalized `ezel_scan.py` (a hand‑rolled per‑repo scanner) into a generic, Dockerized + single‑repo tool using parent‑epic + sub‑issue storage. +- v2 (this spec) drops the sub‑issue tree in favor of Projects v2 (lifts the 100‑item cap, + adds custom fields, simpler triage UI), adds Codex + Gemma LLM SAST + cross‑validation, + and adds the image manifest contract for consumer skills. +- Deferred, in rough order: GitHub App auth → multi‑repo + parallelism (per‑project state) → + DAST/pen‑test lane (staging only, authorized targets) → Live Supabase Security Advisor + parity (DB‑connected lane, see [`leverj/security-scanner#4`](https://github.com/leverj/security-scanner/issues/4)) → + DefectDojo/Dependency‑Track aggregation when correlating many tools/repos → CISO/GRC + dashboard as an always‑on backend the daily job feeds. diff --git a/security-scan.sh b/security-scan.sh index 73a2252..c49fecd 100755 --- a/security-scan.sh +++ b/security-scan.sh @@ -26,9 +26,9 @@ # Default config directory: ./config/. Override with one of: # --config /path/to/cfg.yaml # explicit file path (its parent dir is mounted) # SECURITY_SCAN_CONFIG=... # same thing via env var -# SECSCAN_CONFIG_DIR=... # mount this dir instead; expects config.yaml inside +# SECURITY_SCAN_CONFIG_DIR=... # mount this dir instead; expects config.yaml inside # -# When the skill packages security-scan, point SECSCAN_CONFIG_DIR at the per-project +# When the skill packages security-scan, point SECURITY_SCAN_CONFIG_DIR at the per-project # config the agent maintains for the user. set -euo pipefail @@ -123,7 +123,7 @@ cmd_build() { } cmd_check() { - local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config_dir="${SECURITY_SCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local ok=1 @@ -212,7 +212,7 @@ cmd_check() { cmd_run() { command -v docker >/dev/null || die "docker not on PATH" - local config_dir="${SECSCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config_dir="${SECURITY_SCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local extra_args=() local have_dry_run=0 @@ -258,7 +258,7 @@ To set up: \$EDITOR config/config.yaml # set repo, ref, project, secrets.source See README.md ("Setup: secrets") for env-vs-1Password choice. -Or set SECSCAN_CONFIG_DIR=/path/to/your-config-dir to use a different directory. +Or set SECURITY_SCAN_CONFIG_DIR=/path/to/your-config-dir to use a different directory. EOF exit 1 fi @@ -392,7 +392,7 @@ usage: defaults: --dry-run is added unless you pass --no-dry-run - --config-dir defaults to ./config/ (override with SECSCAN_CONFIG_DIR env) + --config-dir defaults to ./config/ (override with SECURITY_SCAN_CONFIG_DIR env) --config defaults to /config.yaml (override with SECURITY_SCAN_CONFIG env) image tag defaults to "security-scan:latest" (override with SECURITY_SCAN_IMAGE env) diff --git a/security_scan/__init__.py b/security_scan/__init__.py index 33be793..d1913c1 100644 --- a/security_scan/__init__.py +++ b/security_scan/__init__.py @@ -1,3 +1,3 @@ -"""security_scan — stateless single-repo security scanner that files findings as GitHub sub-issues.""" +"""security_scan — stateless single-repo security scanner; files findings into a GitHub Projects v2 board.""" -__version__ = "0.1.0" +__version__ = "0.2.0" diff --git a/security_scan/notify.py b/security_scan/notify.py index a7d787e..c8e4055 100644 --- a/security_scan/notify.py +++ b/security_scan/notify.py @@ -112,9 +112,9 @@ def _default_digest( ) -> str: """Slack mrkdwn digest of ACTIONABLE findings (newly filed this run). - Per-category sections list only the findings that became open sub-issues - this run — items that were dup-skipped (already filed previously) or - below the severity floor aren't shown. The footer still reports the + Per-category sections list only the findings that became open project + items this run — items that were dup-skipped (already filed previously) + or below the severity floor aren't shown. The footer still reports the skip counts so you can see the gates were applied, but the sections themselves only contain new bugs to triage. diff --git a/security_scan/runners/syft.py b/security_scan/runners/syft.py index 397ba4b..ffdfb2d 100644 --- a/security_scan/runners/syft.py +++ b/security_scan/runners/syft.py @@ -1,10 +1,10 @@ """Syft runner — produces a CycloneDX SBOM artifact for the scanned tree. -Unlike the other scanners, Syft does not file sub-issues. It writes the SBOM -to disk so it can be archived/uploaded by the caller. RunnerResult.sarif -carries a small metadata dict (path + component count + format) so the -orchestrator can log a one-line summary and downstream Slack digests can -reference it. +Unlike the other scanners, Syft does not file findings (no project items). +It writes the SBOM to disk so it can be archived/uploaded by the caller. +RunnerResult.sarif carries a small metadata dict (path + component count + +format) so the orchestrator can log a one-line summary and downstream Slack +digests can reference it. """ from __future__ import annotations diff --git a/security_scan/sync.py b/security_scan/sync.py index 44cd77f..dd339ca 100644 --- a/security_scan/sync.py +++ b/security_scan/sync.py @@ -129,12 +129,13 @@ def sync( def _labels_for(f: Finding) -> list[str]: - """The label set applied to a sub-issue. + """The label set applied to each issue filed. - `security` is the existing umbrella label. `security_scan:` lets you - filter the parent's sub-issue list by category in the GitHub UI. - `security_scan:` lets you triage by severity. All labels are namespaced - under `security_scan:` so they're easy to clean up if you ever drop the tool. + `security` is the existing umbrella label. `security-scan:` lets you + filter project-board items by category alongside the Category single-select + field. `security-scan:` parallels Severity. All scanner-applied + labels are namespaced under `security-scan:` so they're easy to clean up if + you ever drop the tool. """ return [ "security", diff --git a/tools/backfill_markers.py b/tools/backfill_markers.py deleted file mode 100644 index 321f251..0000000 --- a/tools/backfill_markers.py +++ /dev/null @@ -1,194 +0,0 @@ -"""One-time backfill: add security_scan markers to sub-issues filed by an earlier tool. - -Reads marker-less sub-issues under a parent issue, parses the ezel_scan format -(or a compatible one) to recover (rule_id, file_path, category), computes -security_scan's fingerprint, and PATCHes the body to inject the marker. Future -`security_scan run` invocations then dedup correctly against these issues. - -Usage: - python tools/backfill_markers.py --owner leverj --repo ezel --parent 451 \\ - --work-prefix file:///work/ezel --dry-run - python tools/backfill_markers.py --owner leverj --repo ezel --parent 451 \\ - --work-prefix file:///work/ezel # writes for real - -Env: - GITHUB_TOKEN must be set (or use `op run --env-file=.env.1password.tpl -- ...`). -""" - -from __future__ import annotations - -import argparse -import os -import re -import sys -from dataclasses import dataclass - -import requests - -# Allow running this from the repo root with `-m` or as a script. -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from security_scan.fingerprint import compute_fingerprint, inject_marker, parse_marker -from security_scan.models import Finding - -_API = "https://api.github.com" - -# Ezel-scan body fields we know how to read. -_FIELD_RE = re.compile(r"^(?PType|Ecosystem|Package|Installed|Advisory):\s*(?P.+)$", re.M) - -# Map detected ecosystems (lowercased) to the canonical lockfile path in the repo. -# Tailor this to the repo you're backfilling against; for leverj/ezel it's: -_ECO_TO_LOCKFILE = { - "npm": "yarn.lock", - "yarn": "yarn.lock", - "pnpm": "yarn.lock", # if pnpm-lock.yaml exists, change this - "rubygems": "ios-native/Gemfile.lock", - "swiftpm": "ios-native/Ezel.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved", -} - - -@dataclass -class ParsedIssue: - number: int - state: str - title: str - body: str - category: str | None - ecosystem: str | None - package: str | None - rule_id: str | None # CVE-XXXX (preferred) or GHSA-XXXX - - -def parse_ezel_scan_body(title: str, body: str) -> ParsedIssue | None: - """Best-effort parse. Returns None when the issue isn't a dependency vuln we recognize.""" - fields = {m.group("key").lower(): m.group("val").strip() for m in _FIELD_RE.finditer(body or "")} - - # ezel_scan's "Type: dependency vulnerability ..." indicates a dep finding. - is_dep = "dependency" in fields.get("type", "").lower() - if not is_dep: - # Other ezel_scan types (secret, sast) — out of scope for this pass since the - # file_path is too unpredictable to safely backfill without scanning. Skip. - return None - - eco = fields.get("ecosystem", "").lower() or None - pkg = fields.get("package") or None - - # Advisory: "CVE-2026-XXXX GHSA-..." — prefer CVE (matches what osv-scanner emits as ruleId). - adv = fields.get("advisory", "") - cve_match = re.search(r"\bCVE-\d{4}-\d+\b", adv) or re.search(r"\bCVE-\d{4}-\d+\b", title) - ghsa_match = re.search(r"\bGHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}\b", adv) - rule_id = (cve_match.group(0) if cve_match else (ghsa_match.group(0) if ghsa_match else None)) - - return ParsedIssue( - number=0, state="", title=title, body=body, - category="dependency", ecosystem=eco, package=pkg, rule_id=rule_id, - ) - - -def list_subissues(session: requests.Session, owner: str, repo: str, parent: int) -> list[dict]: - issues: list[dict] = [] - url = f"{_API}/repos/{owner}/{repo}/issues/{parent}/sub_issues" - params: dict | None = {"per_page": 100, "state": "all"} - while url: - r = session.get(url, params=params, timeout=30) - r.raise_for_status() - issues.extend(r.json() or []) - link = r.headers.get("Link") or "" - url = None - for part in link.split(","): - seg = part.strip() - if 'rel="next"' in seg: - lt, gt = seg.find("<"), seg.find(">") - if lt != -1 and gt != -1: - url = seg[lt + 1:gt] - params = None # next-link encodes them - break - return issues - - -def patch_body(session: requests.Session, owner: str, repo: str, number: int, body: str) -> None: - r = session.patch( - f"{_API}/repos/{owner}/{repo}/issues/{number}", - json={"body": body}, - timeout=30, - ) - r.raise_for_status() - - -def main() -> int: - ap = argparse.ArgumentParser() - ap.add_argument("--owner", required=True) - ap.add_argument("--repo", required=True) - ap.add_argument("--parent", type=int, required=True) - ap.add_argument("--work-prefix", default="file:///work/ezel", - help="Path prefix that matches what security_scan/osv-scanner emit " - "(e.g. file:///work/).") - ap.add_argument("--dry-run", action="store_true") - args = ap.parse_args() - - token = os.environ.get("GITHUB_TOKEN") - if not token: - print("error: GITHUB_TOKEN unset", file=sys.stderr) - return 2 - - session = requests.Session() - session.headers.update({ - "Authorization": f"Bearer {token}", - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - "User-Agent": "security_scan-backfill/0.1", - }) - - print(f"listing sub-issues of {args.owner}/{args.repo}#{args.parent} ...", file=sys.stderr) - issues = list_subissues(session, args.owner, args.repo, args.parent) - print(f" total: {len(issues)}", file=sys.stderr) - - candidates = [i for i in issues if not parse_marker(i.get("body") or "") and i.get("state") == "open"] - print(f" marker-less open: {len(candidates)}", file=sys.stderr) - print(file=sys.stderr) - - patched = 0 - skipped_no_parse = 0 - skipped_no_eco_map = 0 - for issue in candidates: - parsed = parse_ezel_scan_body(issue.get("title", ""), issue.get("body", "") or "") - if not parsed or not parsed.rule_id or not parsed.ecosystem: - print(f" - #{issue['number']:>4d} SKIP (can't parse): {issue['title'][:80]}") - skipped_no_parse += 1 - continue - lockfile = _ECO_TO_LOCKFILE.get(parsed.ecosystem) - if not lockfile: - print(f" - #{issue['number']:>4d} SKIP (no lockfile map for ecosystem={parsed.ecosystem!r}): {issue['title'][:80]}") - skipped_no_eco_map += 1 - continue - file_path = f"{args.work_prefix.rstrip('/')}/{lockfile}" - - finding = Finding( - scanner="osv", - category="dependency", - rule_id=parsed.rule_id, - severity="medium", # not used by the fingerprint - file_path=file_path, - line=None, - title="", - message="", - ) - fp = compute_fingerprint(finding) - new_body = inject_marker(issue.get("body") or "", fp, finding) - - action = "WOULD PATCH" if args.dry_run else "PATCH" - print(f" ✓ #{issue['number']:>4d} {action} fp={fp} rule={parsed.rule_id} -> {lockfile}") - if not args.dry_run: - patch_body(session, args.owner, args.repo, issue["number"], new_body) - patched += 1 - - print(file=sys.stderr) - print( - f"summary: {'would patch' if args.dry_run else 'patched'} {patched} · " - f"skipped {skipped_no_parse} (unparseable) + {skipped_no_eco_map} (no eco map)", - file=sys.stderr, - ) - return 0 - - -if __name__ == "__main__": - sys.exit(main())