diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock deleted file mode 100644 index a55d131..0000000 --- a/.claude/scheduled_tasks.lock +++ /dev/null @@ -1 +0,0 @@ -{"sessionId":"c4bc600b-023e-4b9d-af63-b33c36b96d25","pid":8636,"procStart":"Wed May 27 04:44:14 2026","acquiredAt":1779892167665} \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cb9aaa..f41e3bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: pip install -e ".[dev]" - name: Lint (ruff) - run: ruff check secscan/ tests/ + run: ruff check security_scan/ tests/ - name: Tests (pytest) run: pytest -q @@ -58,6 +58,6 @@ jobs: with: context: . push: false - tags: secscan:ci + tags: security-scan:ci # Disable provenance/sbom for faster CI; can re-enable when we cut a release. provenance: false diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..a6e243e --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,98 @@ +name: Publish image + +# Builds and publishes the security-scan image to Docker Hub on every tag named v*. +# The tag must match the [project.version] in pyproject.toml and the +# `version:` in SECURITY-SCAN-MANIFEST.yaml (a guard step verifies this). +# +# Required repository secrets: +# DOCKERHUB_USERNAME the Docker Hub user/org that owns leverj/security-scan +# DOCKERHUB_TOKEN Docker Hub access token with read+write on the repo +# +# Cut a release: +# git tag v0.2.0 && git push origin v0.2.0 +# +# The workflow tags the image with: +# leverj/security-scan:v0.2.0 (immutable per release) +# leverj/security-scan:latest (always the most recent tag) + +on: + push: + tags: ["v*"] + workflow_dispatch: + inputs: + tag: + description: "Tag to build (e.g., v0.2.0). Must match pyproject.toml + manifest version." + required: true + +permissions: + contents: read + +env: + IMAGE: leverj/security-scan + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag || github.ref }} + + - name: Resolve tag + id: tag + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + tag="${{ github.event.inputs.tag }}" + else + tag="${GITHUB_REF#refs/tags/}" + fi + echo "tag=$tag" >> "$GITHUB_OUTPUT" + # Strip the leading 'v' for comparison against pyproject / manifest. + echo "version=${tag#v}" >> "$GITHUB_OUTPUT" + + - name: Verify version alignment + run: | + py_version=$(grep -E '^version\s*=' pyproject.toml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + mf_version=$(grep -E '^version:' SECURITY-SCAN-MANIFEST.yaml | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + want='${{ steps.tag.outputs.version }}' + + echo "tag=$want pyproject=$py_version manifest=$mf_version" + + if [[ "$py_version" != "$want" ]]; then + echo "::error::pyproject.toml version ($py_version) != tag ($want). Bump pyproject.toml or fix the tag." >&2 + exit 1 + fi + if [[ "$mf_version" != "$want" ]]; then + echo "::error::SECURITY-SCAN-MANIFEST.yaml version ($mf_version) != tag ($want). Bump the manifest or fix the tag." >&2 + exit 1 + fi + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: true + # Multi-arch — runners build amd64/arm64 in parallel. + platforms: linux/amd64,linux/arm64 + tags: | + ${{ env.IMAGE }}:${{ steps.tag.outputs.tag }} + ${{ env.IMAGE }}:latest + provenance: false + + - name: Smoke-test the published image (manifest readable) + run: | + docker run --rm --entrypoint cat \ + "${{ env.IMAGE }}:${{ steps.tag.outputs.tag }}" \ + /app/SECURITY-SCAN-MANIFEST.yaml | head -5 diff --git a/.gitignore b/.gitignore index a9cbaed..a7b94c7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,15 +11,18 @@ htmlcov/ work/ /tmp_* -# Per-deployment config (use config.example.yaml as the template; keep secrets out of git) -config.yaml +# Per-deployment config (use config/config.example.yaml as the template; keep secrets out of git) +config/config.yaml # Personal 1Password reference template (paths to your vault items) -.env.1password.tpl +config/.env.1password.tpl # IDE .idea/ .vscode/ +# Claude Code session state (per-checkout; not part of the repo) +.claude/ + # OS .DS_Store diff --git a/Dockerfile b/Dockerfile index a97be28..e9a6a41 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# secscan — single-repo security scanner. Stateless. State lives in GitHub Issues. +# security-scan — single-repo security scanner. Stateless. State lives in GitHub Issues. # # Mount points (bind-mount at runtime — no VOLUME directive, so anonymous volumes # never accumulate when --rm is used): @@ -109,11 +109,15 @@ RUN set -eux; \ chmod +x /usr/local/bin/syft; \ syft --version -# --- secscan itself ------------------------------------------------------- +# --- security-scan itself ------------------------------------------------------- WORKDIR /app COPY pyproject.toml /app/pyproject.toml -COPY secscan /app/secscan +COPY security_scan /app/security_scan COPY README.md /app/README.md +# Manifest the consuming skill reads to see version + needed config migrations. +# Pull it out without starting the scanner: +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml +COPY SECURITY-SCAN-MANIFEST.yaml /app/SECURITY-SCAN-MANIFEST.yaml RUN pip install --no-cache-dir /app # Make sure the mount points exist (no VOLUME directive — keeps `--rm` from @@ -121,5 +125,5 @@ RUN pip install --no-cache-dir /app RUN mkdir -p /config /rules /work # Default entrypoint runs the scanner against /config/config.yaml. -ENTRYPOINT ["python", "-m", "secscan", "--config", "/config/config.yaml", "--work-dir", "/work"] +ENTRYPOINT ["python", "-m", "security_scan", "--config", "/config/config.yaml", "--work-dir", "/work"] CMD [] diff --git a/README.md b/README.md index 22943cd..a543824 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# secscan +# security-scan [![CI](https://github.com/leverj/security-scanner/actions/workflows/ci.yml/badge.svg)](https://github.com/leverj/security-scanner/actions/workflows/ci.yml) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE) @@ -17,27 +17,27 @@ Closing/fixing findings is out of scope — another system owns that. ```bash # 1. Create (or pick) a GitHub Projects v2 board for security findings. # Note its number (visible in the URL: /projects/). -# On first run secscan provisions two single-select fields on the board: +# On first run security-scan provisions two single-select fields on the board: # - Severity (critical, high, medium, low, info) # - Category (dependency, secret, sast, iac, license) # 2. Copy the example config -cp config.example.yaml config.yaml -$EDITOR config.yaml # set repo, ref, project.owner, project.number +cp config/config.example.yaml config/config.yaml +$EDITOR config/config.yaml # set repo, ref, project.owner, project.number # 3. Set up secrets — pick ONE of the two paths in the next section # 4. Verify your setup, then run -./secscan.sh check # green checks across the board? -./secscan.sh build -./secscan.sh run # defaults to --dry-run; add --no-dry-run to actually file issues +./security-scan.sh check # green checks across the board? +./security-scan.sh build +./security-scan.sh run # defaults to --dry-run; add --no-dry-run to actually file issues ``` --- ## Setup: secrets -secscan needs a GitHub Personal Access Token, and optionally a Slack webhook URL. +security-scan needs a GitHub Personal Access Token, and optionally a Slack webhook URL. **Secrets never go into `config.yaml`** — they come in via env vars at runtime. `config.yaml` declares which path you're using: @@ -68,7 +68,7 @@ export GITHUB_TOKEN=github_pat_... # Optional Slack — get a webhook from https://api.slack.com/apps export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/... -./secscan.sh run +./security-scan.sh run ``` To persist, put the `export` lines in `~/.zshrc` or `~/.bashrc`. The script verifies @@ -86,8 +86,8 @@ brew install 1password-cli op signin # Copy the template and edit the vault/item paths to point at your own entries -cp .env.1password.tpl.example .env.1password.tpl -$EDITOR .env.1password.tpl +cp config/.env.1password.tpl.example config/.env.1password.tpl +$EDITOR config/.env.1password.tpl ``` `.env.1password.tpl` then looks like: @@ -105,7 +105,7 @@ secrets: ``` ```bash -./secscan.sh run # auto-wraps with: op run --env-file=.env.1password.tpl -- docker run ... +./security-scan.sh run # auto-wraps with: op run --env-file=.env.1password.tpl -- docker run ... ``` The file `.env.1password.tpl` is `.gitignore`d. The committed @@ -116,7 +116,7 @@ and never commit your filled-in copy. For container orchestrators (Docker Swarm, K8s, GitHub Actions, etc.), populate `GITHUB_TOKEN` (and friends) via your platform's secret mechanism so it appears -in the container's environment. With `secrets.source: env`, `secscan.sh` (or a +in the container's environment. With `secrets.source: env`, `security-scan.sh` (or a direct `docker run`) will pick it up. --- @@ -136,7 +136,7 @@ need re-surfacing of regressions, that's the external fixing system's concern. ## Troubleshooting -`./secscan.sh check` reports the status of every prerequisite: +`./security-scan.sh check` reports the status of every prerequisite: ``` == config == @@ -144,7 +144,7 @@ need re-surfacing of regressions, that's the external fixing system's concern. == docker == ✓ docker is running == image == - ✓ secscan:latest present # ⚠ "not built yet" if you skipped `build` + ✓ security-scan:latest present # ⚠ "not built yet" if you skipped `build` == secrets (1password) == ✓ op (1Password CLI) installed ✓ op signed in @@ -157,12 +157,12 @@ Common failure modes and what `check` says: | Symptom | Fix | |---|---| -| `config not found` | `cp config.example.yaml config.yaml` | +| `config not found` | `cp config/config.example.yaml config/config.yaml` | | `GITHUB_TOKEN unset` (env source) | `export GITHUB_TOKEN=…` or switch to `secrets.source: "1password"` | | `op not installed` (1Password source) | `brew install 1password-cli && op signin` | -| `.env.1password.tpl missing` | `cp .env.1password.tpl.example .env.1password.tpl && $EDITOR …` | +| `.env.1password.tpl missing` | `cp config/.env.1password.tpl.example config/.env.1password.tpl && $EDITOR …` | | `SLACK_… unset` (slack.enabled=true) | Either export the var, add it to the 1Password env file, or set `slack.enabled: false` | -| `image not built yet` | `./secscan.sh build` | +| `image not built yet` | `./security-scan.sh build` | | `docker daemon not reachable` | Start Docker Desktop | --- @@ -176,10 +176,28 @@ python3 -m venv .venv && .venv/bin/pip install -e ".[dev]" The scanner binaries (osv-scanner, gitleaks, semgrep) live only inside the Docker image — local tests use SARIF fixtures and mocked subprocesses. To exercise the -real binaries, run via `./secscan.sh run`. +real binaries, run via `./security-scan.sh run`. --- +## Use as a Claude Code skill + +The companion bundle at [`leverj/ai-skills`](https://github.com/leverj/ai-skills) +ships a `security-scan` skill that drives this image directly: + +``` +/plugin marketplace add leverj/ai-skills +/plugin install leverj@leverj-ai-skills +# then: /leverj:security-scan run +``` + +The skill pulls and runs the published Docker image +`leverj/security-scan:`, bind-mounts your `config/` directory at +`/config:ro`, and offers a user-confirmed upgrade flow when a newer image +version is available (the image ships a `SECURITY-SCAN-MANIFEST.yaml` describing +its version + any config fields the skill should add to your local +`config.yaml`). + ## Spec -See [secscan-spec.md](secscan-spec.md) for the full design. +See [security-scan-spec.md](security-scan-spec.md) for the full design. diff --git a/SECURITY-SCAN-MANIFEST.yaml b/SECURITY-SCAN-MANIFEST.yaml new file mode 100644 index 0000000..6b80cf4 --- /dev/null +++ b/SECURITY-SCAN-MANIFEST.yaml @@ -0,0 +1,116 @@ +# SECURITY-SCAN-MANIFEST.yaml +# +# Declarative contract between this image and any tool (skill, CI job, etc.) +# that drives it. Baked into the image at /app/SECURITY-SCAN-MANIFEST.yaml. +# +# Read it from outside the running container with: +# docker run --rm --entrypoint cat leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml +# +# Contract: +# - `version` matches pyproject.toml's [project.version] and the docker tag. +# - `config_schema_version` bumps only when the YAML schema changes in a way +# that needs migration (adding optional fields with defaults does NOT +# require a bump; renames and removals do). +# - `config.new_fields` lets a smart skill add missing fields to a user's +# config.yaml on upgrade WITHOUT clobbering values they've already set. +# - `config.renamed_fields` and `config.removed_fields` capture +# breaking changes for the skill to surface to the user. +# +# Skills should compare a user's local "pinned tag" against the latest +# manifest, and on upgrade: +# 1. Show `changelog` to the user. +# 2. If `breaking_changes` is non-empty, require confirmation per item. +# 3. Apply `config.renamed_fields` (rename in-place; show diff). +# 4. Add any `config.new_fields` not already present (with documented defaults). +# 5. Strip any `config.removed_fields` (show diff, confirm). +# +# Adding to this file in a new release is non-breaking. Removing fields is +# breaking — skills must tolerate older manifests missing keys. + +version: "0.2.0" +config_schema_version: 2 +docker_image: "leverj/security-scan" +released: "2026-06-02" + +# One-liners for the upgrade prompt the skill shows users. +changelog: + - "BREAKING: config moved from a single file to a directory (config/config.yaml). Bind-mount config/ at /config:ro." + - "BREAKING: replaced parent_issue (int) with project.{owner,number} — findings file into a GitHub Projects v2 board, not as sub-issues." + - "PAT now needs `project` scope in addition to `repo`." + - "Added Codex + Gemma LLM SAST scanners (off by default) with bidirectional cross-validation." + - "Added bundled Semgrep rules: XSS, SQLi, Supabase migration patterns." + +breaking_changes: + - id: "config-as-directory" + summary: "The host-side mount target is now the config DIRECTORY, not a single config.yaml file." + user_action: "Move config.yaml + .env.1password.tpl into a config/ directory and bind-mount that directory at /config:ro." + - id: "projects-v2" + summary: "parent_issue (int) is removed. Findings now file into a GitHub Projects v2 board." + user_action: "Add `project: {owner, number}` block to config.yaml; add `project` scope to your PAT; optionally bulk-add existing sub-issues to the new board for clean dedup on the first run." + +config: + # Optional fields the skill should ADD to a user's config.yaml when missing. + # `default` is the value to insert. `since` is the schema version that + # introduced the field — skills can use it to decide whether to apply. + new_fields: + - path: "project.owner" + since: 2 + required: true + default: null + note: "Org or user that owns the target Projects v2 board. Required." + - path: "project.number" + since: 2 + required: true + default: null + note: "Project number (the integer in the URL: /projects/). Required." + - path: "scanners.codex" + since: 2 + default: false + note: "Enable OpenAI Codex LLM SAST (uses your local `codex` CLI subscription; no API key)." + - path: "scanners.gemma" + since: 2 + default: false + note: "Enable local Gemma LLM SAST via Ollama." + - path: "codex" + since: 2 + default: + binary: "codex" + model: null + timeout: 1200 + note: "Codex CLI tunables. Only used when scanners.codex is true." + - path: "gemma" + since: 2 + default: + base_url: null + model: null + keep_alive: null + timeout: 1800 + max_files: 60 + max_file_bytes: 12000 + max_total_bytes: 200000 + note: "Gemma SAST tunables. base_url/model/keep_alive fall back to triage:* when null." + - path: "cross_validate" + since: 2 + default: + enabled: true + codex_timeout: 300 + gemma_timeout: 180 + note: "When both scanners.codex AND scanners.gemma are true, each reviews the other's findings. False positives downgrade severity one notch; critical never auto-downgrades; findings are never suppressed." + + # Fields that were renamed. Skill should apply the rename in the user's config. + renamed_fields: + - from: "parent_issue" + to: "project" + since: 2 + note: "parent_issue (int) -> project (mapping). security-scan no longer files as sub-issues under a parent epic; findings are flat items in a Projects v2 board. The skill should drop parent_issue from the user's config and prompt for project.{owner,number}." + + removed_fields: [] + +# Files inside the image worth knowing about (for documentation purposes — +# skills don't typically need to reach in past the entrypoint). +image_paths: + manifest: "/app/SECURITY-SCAN-MANIFEST.yaml" + source: "/app/security-scan/" + rules: "/app/security-scan/rules/" + config_mount: "/config" + work_mount: "/work" diff --git a/.env.1password.tpl.example b/config/.env.1password.tpl.example similarity index 100% rename from .env.1password.tpl.example rename to config/.env.1password.tpl.example diff --git a/config.example.yaml b/config/config.example.yaml similarity index 91% rename from config.example.yaml rename to config/config.example.yaml index ae0e798..471201f 100644 --- a/config.example.yaml +++ b/config/config.example.yaml @@ -1,8 +1,8 @@ -# secscan config — mount read-only at /config/config.yaml inside the container. +# security-scan config — mount read-only at /config/config.yaml inside the container. # Secrets (token, Slack creds) come from env vars referenced by *_env keys below, # never from this file. # -# The `secrets:` block below tells ./secscan.sh how to populate those env vars +# The `secrets:` block below tells ./security-scan.sh how to populate those env vars # before invoking the container. Python code reads only os.environ, regardless. repo: "leverj/ezel" @@ -11,7 +11,7 @@ ref: "dev" # Target a GitHub Projects v2 board. Findings file as flat items here — no # parent epic / sub-issue relationship. The board's URL is # https://github.com/orgs//projects/ (or /users//...). -# secscan idempotently provisions two single-select fields on first run: +# security-scan idempotently provisions two single-select fields on first run: # - Severity (critical, high, medium, low, info) # - Category (dependency, secret, sast, iac, license) # PAT must have BOTH `repo` and `project` scopes. @@ -20,7 +20,7 @@ project: number: 5 github_token_env: "GITHUB_TOKEN" -# How ./secscan.sh sources the env vars named in *_env keys above. +# How ./security-scan.sh sources the env vars named in *_env keys above. # source: env -> assume GITHUB_TOKEN (etc.) are already exported in your shell # source: 1password -> auto-prefix with `op run --env-file=` secrets: @@ -47,7 +47,7 @@ scanners: codex: false # OpenAI Codex via local `codex` CLI (uses your subscription) gemma: false # Local Gemma 4 via Ollama -# Codex CLI tunables. Auth is via `codex login` outside this tool; secscan +# Codex CLI tunables. Auth is via `codex login` outside this tool; security-scan # never sees an API key. The CLI must be on PATH. codex: binary: "codex" @@ -91,7 +91,7 @@ triage: # Timeout for the heavy paths (fuzzy-dedup, prose). A ~17 GB model's first # call of the day can take minutes; subsequent calls are fast (keep_alive). timeout: 600 - # Start the model loading in a BACKGROUND thread when secscan starts so it + # Start the model loading in a BACKGROUND thread when security-scan starts so it # warms in parallel with the scanners. Strongly recommended for big models. prewarm: true # Slack intro is a single one-liner at the end; cap it tightly so a slow diff --git a/pyproject.toml b/pyproject.toml index f6e65bc..2de1af0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,9 @@ requires = ["setuptools>=68"] build-backend = "setuptools.build_meta" [project] -name = "secscan" -version = "0.1.0" -description = "Stateless single-repo security scanner that files findings as GitHub sub-issues" +name = "security-scan" +version = "0.2.0" +description = "Stateless single-repo security scanner; files findings into a GitHub Projects v2 board" requires-python = ">=3.11" dependencies = [ "PyYAML>=6.0", @@ -20,14 +20,14 @@ dev = [ ] [project.scripts] -secscan = "secscan.main:cli" +security-scan = "security_scan.main:cli" [tool.setuptools.packages.find] -include = ["secscan*"] +include = ["security_scan*"] exclude = ["tests*"] [tool.setuptools.package-data] -secscan = ["rules/**/*.yaml", "rules/**/*.yml"] +security_scan = ["rules/**/*.yaml", "rules/**/*.yml"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/secscan-spec.md b/secscan-spec.md deleted file mode 100644 index a52be8c..0000000 --- a/secscan-spec.md +++ /dev/null @@ -1,278 +0,0 @@ -# secscan — Architecture & Build Spec (v1) - -A single‑repo, stateless, self‑hosted security scanner that detects a repo's tech stack, -runs the right scanners, and files each finding as a deduplicated GitHub sub‑issue under a -user‑provided parent issue. Optional local‑LLM (Gemma 4) triage and Slack digest. Closing / -fixing findings is **out of scope** — another system owns that. - -This document is written to be handed to Claude Code and built module by module. - ---- - -## 1. Goals & non‑goals - -**Goals (v1)** -- Generic: nothing org‑specific. A user supplies a repo, a branch, a parent issue number, and a token; it works for anyone. -- Stateless container: no internal database. All persistent state lives in **GitHub Issues**. Config + secrets come from mapped volumes / env. -- Auto‑detect the stack and run only the relevant scanners. -- Deterministic, auditable dedup. The LLM never owns correctness‑critical decisions. -- "Model proposes, code disposes" — irreversible actions (create issue, post Slack) are deterministic Python; the model only enriches. - -**Non‑goals (explicitly deferred)** -- Closing / reopening / fixing issues (external system). -- A local file DB (GitHub Issues is the state). -- Multi‑repo orchestration, parallel scanning, GitHub App auth. -- DAST / pen‑test lane, DefectDojo, CISO/compliance dashboard, CISO Assistant. -- Hostile‑repo sandboxing (v1 assumes you scan **your own** repo — trusted code). - ---- - -## 2. Locked design decisions - -| # | Decision | Rationale | -|---|----------|-----------| -| Dedup | **Deterministic fingerprint is the source of truth; Gemma is a fuzzy tie‑breaker + prose writer only.** | Reproducible, auditable. The LLM can't dup‑spam or silently drop a finding. | -| Create rule | **Dedup against OPEN *and* CLOSED sub‑issues; never re‑file a fingerprint that already exists in any state.** | Simplest, quietest. Any closed issue (fixed or won't‑fix) permanently suppresses re‑filing. Accepted blind spot: a fixed‑then‑regressed finding is not re‑surfaced (that's the external fixing system's concern). | -| State | **GitHub Issues only.** No file DB in v1. | Dedup needs only the set of existing sub‑issues (open+closed) + their embedded fingerprints. | -| LLM | **Optional.** Core path is fully deterministic and runs with no GPU/model. Gemma adds triage/prose/fuzzy‑match when available. | "Generic, anyone can run it" must not require a GPU. | -| Auth | **PAT via env** (1Password/Docker‑secret injection optional). | Single repo, single owner. (GitHub App deferred to the multi‑tenant version.) | -| Concurrency | **Sequential.** | One repo, daily cadence. | -| Repo execution | **Never execute repo code.** Lockfile parsing + static analysis only. | Safety; matches the proven `ezel_scan.py` discipline. | - ---- - -## 3. Module breakdown - -``` -secscan/ - config.py # load + validate config (YAML) and env (token) - detect.py # stack detection (manifest walk + optional Linguist cross-check) - runners/ # one module per scanner, each returns SARIF (or is normalized to it) - osv.py # OSV-Scanner (SCA: npm/yarn/pnpm, RubyGems, SwiftPM, pip, go, cargo, ...) - gitleaks.py # Gitleaks (secrets, git-history aware) - semgrep.py # Semgrep (SAST, bundled offline ruleset) - normalize.py # SARIF -> internal Finding model (one shape for all scanners) - fingerprint.py # deterministic, line-number-free fingerprint + marker (de)serialize - github.py # clone, list sub-issues (open+closed), create issue, link sub-issue - triage.py # OPTIONAL Gemma 4 (Ollama): fuzzy-dedup tie-break + issue/Slack prose - notify.py # OPTIONAL Slack digest - sync.py # the create-decision logic (dedup -> create-only) - main.py # orchestrator: config -> clone -> detect -> run -> normalize -> - # fingerprint -> sync -> notify -``` - -**Hard dependency boundary:** `detect/runners/normalize/fingerprint/github/sync` are deterministic and must work with `triage.py` and `notify.py` absent or failing. `triage` and `notify` are strictly additive. - ---- - -## 4. Internal Finding model - -Everything normalizes to this one shape (from SARIF). Keep it small and scanner‑agnostic. - -```python -@dataclass -class Finding: - scanner: str # "osv" | "gitleaks" | "semgrep" - category: str # "dependency" | "secret" | "sast" - rule_id: str # e.g. "GHSA-xxxx", "generic-api-key", "ezel-command-injection" - severity: str # normalized: critical|high|medium|low|info - file_path: str # repo-relative, forward slashes - line: int | None # for display only — NEVER part of the fingerprint - title: str # short, human title (deterministic default; Gemma may rewrite) - message: str # scanner message / advisory summary - masked_preview: str # for secrets: masked value only — NEVER the raw secret - sarif_fingerprint: str | None # SARIF partialFingerprints/fingerprints if present - extra: dict # ecosystem, installed/fixed version, CVE/GHSA, range, etc. -``` - -**Severity normalization:** map each tool's scale to `critical/high/medium/low/info`. SARIF `level` (error/warning/note) + `security-severity` property → normalized severity. - ---- - -## 5. Fingerprint & marker - -**Primary identity (deterministic, line‑number‑free):** -``` -key_basis = rule_id + "\0" + file_path + "\0" + snippet_or_secretfp -fingerprint = "fp_" + sha256(key_basis).hexdigest()[:16] -``` -- Prefer the SARIF‑provided `fingerprints` / `partialFingerprints` when the tool emits them (most do) — they're designed for exactly this and survive line drift. -- `snippet_or_secretfp`: for SAST, a whitespace‑normalized snippet of the matched region (or the enclosing symbol name); for secrets, the scanner's hash of the value (Gitleaks emits one) — **never the raw secret**; for deps, empty (rule_id already = GHSA/CVE which is unique per package‑advisory). Result is stable across line moves. -- **Line numbers are excluded** so reformatting/refactoring doesn't spawn duplicates. - -**Marker** embedded in every issue body (hidden HTML comment), so a future run can read it back: -``` - -``` - -`github.py` lists **all** sub‑issues of the parent (state=all), parses these markers, and builds the set of already‑filed fingerprints. - ---- - -## 6. Create‑decision logic (`sync.py`) - -``` -existing_fps = { marker.fp for issue in github.list_subissues(parent, state="all") - if marker := parse_marker(issue.body) } - -for f in findings: - fp = f.sarif_fingerprint or compute_fingerprint(f) - - if fp in existing_fps: - continue # already filed (open OR closed) -> never re-file - - # OPTIONAL fuzzy tie-break (only if Gemma available): catch renamed/moved code that - # changed file_path (and thus fp). Ask Gemma: does this finding match any existing - # issue's (rule + snippet) at a different path with high confidence? - if triage.enabled and triage.is_duplicate_of_existing(f, existing_issues): - continue - - title, body = triage.write_issue(f) if triage.enabled else default_issue(f) - body = inject_marker(body, fp, f) # always inject the deterministic marker - issue = github.create_issue(title, body) # create-only - github.link_subissue(parent, issue) - existing_fps.add(fp) # avoid intra-run dupes -``` - -**Invariants (enforced in `github.py`, not trusted to the model):** -- Create and link only. **No** edit/close/reopen/delete of issues. -- The deterministic marker is always injected by code, even if Gemma wrote the prose. -- Never write a raw secret into a body — only `masked_preview`. -- A scanner that did **not** run/complete contributes **no** findings (so a crashed scanner can't look like "all clear"). - ---- - -## 7. Config schema (`config.yaml`, mounted read‑only) - -```yaml -repo: "leverj/ezel" # owner/name -ref: "dev" # branch -parent_issue: 451 # user creates this; tool files sub-issues under it -github_token_env: "GITHUB_TOKEN" # name of env var holding the PAT (value never in config) - -scanners: # which to run; auto-skipped if stack not present - osv: true - gitleaks: true - semgrep: true - -paths: - exclude: ["archive/", "vendor/", ".github/scripts/"] # globs skipped everywhere - -severity_floor: "low" # don't file below this (info-only by default) - -triage: # all optional - enabled: false - provider: "ollama" - model: "gemma4:26b" - base_url: "http://host.docker.internal:11434" - keep_alive: "5m" - -slack: - enabled: false - channel_id_env: "SLACK_CHANNEL_ID" # or a webhook URL via env -``` - -Token and any Slack secret arrive via **env** (the container reads `os.environ[...]`), never written into `config.yaml`. 1Password / Docker secrets can populate those env vars on the host. - ---- - -## 8. Stack detection (`detect.py`) - -1. **Manifest walk (primary, zero‑API, reliable):** walk the cloned tree (honoring `paths.exclude`) for manifests/lockfiles and map to scanners + ecosystems: - - `package.json` + `package-lock.json` | `yarn.lock` | `pnpm-lock.yaml` → npm/yarn/pnpm (OSV) - - `Gemfile.lock` → RubyGems (OSV); `Package.resolved` → SwiftPM (OSV) - - `requirements.txt` | `poetry.lock` | `Pipfile.lock` → pip (OSV) - - `go.mod`/`go.sum` → Go (OSV); `Cargo.lock` → Rust (OSV) - - any source files → Semgrep (its own language autodetect); whole tree → Gitleaks -2. **GitHub Linguist cross‑check (optional hint):** `GET /repos/{o}/{r}/languages` as a sanity check that the walk didn't miss a language. Do **not** rely on it as the only source (it misses ecosystems/lockfiles and odd monorepo layouts). -3. Stacks with no available scanner → printed as "detected, no scanner" and skipped (don't fail the run). - -Handles monorepos: there can be many manifests in many dirs (e.g. `ezel` had npm in 5 locations + Swift + RubyGems). - ---- - -## 9. Scanners (`runners/`) — all emit SARIF, never execute repo code - -- **OSV‑Scanner** — `osv-scanner --format sarif --recursive ` (parses lockfiles; no install). Covers npm/yarn/pnpm, RubyGems, SwiftPM, pip, Go, Cargo from one tool. -- **Gitleaks** — `gitleaks detect --report-format sarif --source ` (git‑history aware; emits a per‑secret fingerprint). -- **Semgrep** — `semgrep scan --config --sarif --metrics=off --exclude archive` (static; bundle the JS/TS/React + Swift/iOS + Android rules from `ezel_scan.py` so no network rule fetch). - -Pin scanner versions (in the Dockerfile) so "new vs resolved" diffing isn't polluted by the scanners themselves changing. Each runner returns SARIF JSON (or `None` + a "did not complete" flag — which must keep that category out of any future close logic the external system builds). - ---- - -## 10. Gemma 4 triage (`triage.py`) — optional, guard‑railed - -Talks to Ollama (`/api/chat` with `tools` for native function calling; `keep_alive` so the ~16 GB model loads only during the run and frees ~5 min after). Three jobs, all additive: - -1. **Fuzzy dedup tie‑break** — for findings whose deterministic fp is new, decide if it's actually a renamed/moved version of an existing issue (returns an existing issue number or "new"). Must cite the finding it's judging. -2. **Prioritization / context** — order findings, add a one‑line "why this matters" using only the scanner's factual fields. Must **not** lower severity below `severity_floor` without an explicit flagged reason. -3. **Prose** — draft issue title/body and the Slack digest text. - -Guardrails (in code, not the prompt): validate every tool call against its JSON schema and reject/retry malformed ones; feed only the scanner's factual fields (never invent fix versions); the deterministic marker + masked previews are injected by code regardless of what the model returns. If Ollama is unreachable or `triage.enabled=false`, fall back to deterministic `default_issue()` templating — the run still completes. - ---- - -## 11. Docker & secrets - -``` -Dockerfile: python:3.x-slim + pinned osv-scanner, gitleaks, semgrep, git -Volumes: - /config (ro) -> config.yaml - /rules (ro) -> bundled semgrep rules (or baked into the image) - /work (rw) -> ephemeral per-run clone + scratch (can be tmpfs) -Secrets: - GITHUB_TOKEN, SLACK_* via env (docker run --env-file, Docker secret, or 1Password injection) -Entrypoint: python -m secscan --config /config/config.yaml -``` -Stateless: the container holds no state between runs; everything durable is in GitHub Issues. The clone lives in `/work` and is wiped each run. Token file (if used instead of env) must be `600` and is never logged (mask in all output). - ---- - -## 12. Execution flow (`main.py`) - -``` -1. load config + token (fail fast if token missing / parent_issue unset) -2. shallow|full clone repo@ref into /work (full clone only if a history-secret scan is wanted) -3. detect stack -> list of (scanner, targets) -4. run each enabled+relevant scanner -> SARIF (record which completed) -5. normalize SARIF -> Findings ; drop paths in exclude ; drop < severity_floor -6. fingerprint each Finding -7. list parent's sub-issues (open+closed) -> existing fingerprints -8. for each new fingerprint: (optional Gemma fuzzy-dup check) -> create + link sub-issue -9. (optional) Gemma-written Slack digest -> post once -10. print a deterministic summary (created N, skipped M dup, scanners run/failed) -``` - ---- - -## 13. Test plan - -- **Unit:** fingerprint stability (same finding across line shifts → same fp; rename → different fp, caught by fuzzy pass); marker round‑trip (inject → parse); SARIF→Finding for one fixture per scanner; severity normalization; exclude‑path filtering; masked‑preview never contains the raw value. -- **Dedup logic:** given a fixture set of existing sub‑issues (open + closed) and a finding set, assert create‑only + never‑re‑file (closed fp ⇒ skipped). -- **Scanner integration:** run each scanner against a tiny synthetic repo with one planted issue each; assert SARIF parses and the finding surfaces. -- **Graceful degradation:** Ollama down → deterministic path still files issues; a scanner binary missing → that category skipped with a note, others unaffected. -- **End‑to‑end dry‑run:** `--dry-run` (no issue creation) prints what *would* be filed. Verify against a real repo before wiring the token. -- **Safety:** assert no `npm install`/`bundle install`/`pod install` is ever invoked; assert the token never appears in logs or issue bodies. - ---- - -## 14. Build order for Claude Code (milestones) - -1. `config.py` + `Finding` model + `fingerprint.py` (+ unit tests) — the deterministic core. -2. `github.py` (clone, list sub‑issues open+closed, create+link) with a `--dry-run`. -3. `runners/` + `normalize.py` for one scanner (Semgrep), end‑to‑end on a synthetic repo. -4. Add OSV‑Scanner + Gitleaks runners. -5. `detect.py` (manifest walk) + `sync.py` (create‑decision) → full deterministic pipeline. -6. Dockerfile + volumes + env secrets; dry‑run in container against a real repo. -7. `notify.py` (Slack) — optional. -8. `triage.py` (Gemma 4 via Ollama) — optional, last; everything must already work without it. - -Ship after step 6 as a working deterministic tool; 7–8 are additive. - ---- - -## 15. Lineage & deferred roadmap - -- v1 generalizes the proven `ezel_scan.py` (stack detection, secret masking, conservative create‑only sub‑issue sync, bundled Semgrep rules) into a config‑driven, Dockerized, single‑repo tool. -- Deferred, in rough order: GitHub App auth → multi‑repo + parallelism (WAL or per‑repo state) → DAST/pen‑test lane (staging only, authorized targets) → DefectDojo/Dependency‑Track aggregation when correlating many tools/repos → CISO/GRC dashboard (CISO Assistant for compliance) as an always‑on backend the daily job feeds. diff --git a/secscan/__init__.py b/secscan/__init__.py deleted file mode 100644 index fefcead..0000000 --- a/secscan/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""secscan — stateless single-repo security scanner that files findings as GitHub sub-issues.""" - -__version__ = "0.1.0" diff --git a/security-scan-spec.md b/security-scan-spec.md new file mode 100644 index 0000000..ec1209a --- /dev/null +++ b/security-scan-spec.md @@ -0,0 +1,488 @@ +# security-scan — Architecture & Build Spec (v2) + +A single‑repo, stateless, self‑hosted security scanner that detects a repo's tech stack, +runs the right scanners, and files each finding as a deduplicated issue into a user‑provided +**GitHub Projects v2 board**. Optional local‑LLM (Gemma 4) and cloud‑LLM (Codex) SAST lanes +with bidirectional cross‑validation. Distributed as a Docker image (`leverj/security-scan`) +with a baked‑in manifest that lets consumers (e.g. the `leverj/ai-skills` skill) do +user‑confirmed version upgrades with automated config migration. Closing / fixing findings +is **out of scope** — another system owns that. + +v1 of this spec described a parent‑epic + sub‑issue model. v2 (this document) replaces that +with a flat Projects v2 board (no sub‑issue tree), adds Codex + Gemma LLM SAST + cross‑validation, +and adds the image manifest contract. + +--- + +## 1. Goals & non‑goals + +**Goals** +- Generic: nothing org‑specific. A user supplies a repo, a branch, a Projects v2 board + (owner + number), and a PAT with `repo` + `project` scopes; it works for anyone. +- Stateless container: no internal database. All persistent state lives in **GitHub + Issues + their Projects v2 board membership**. Config + secrets come from a mapped + config directory / env. +- Auto‑detect the stack and run only the relevant scanners. +- Deterministic, auditable dedup. LLMs never own correctness‑critical decisions. +- "Model proposes, code disposes" — irreversible actions (create issue, add project item, + set custom field, post Slack) are deterministic Python; the model only enriches + (prose, triage, cross‑validation verdicts). +- Distributable: the image is the contract. Consumers (skills, CI jobs) drive the image + and pull a manifest out of it to know what version's inside and what config the new + version expects. + +**Non‑goals (explicitly deferred)** +- Closing / reopening / fixing issues (external system). +- A local file DB (project items + their bodies are the state). +- Multi‑repo orchestration, parallel scanning, GitHub App auth. +- DAST / pen‑test lane, DefectDojo, CISO/compliance dashboard. +- Hostile‑repo sandboxing (v1+v2 assume you scan **your own** repo — trusted code). +- Live config‑schema enforcement at the boundary; we rely on the consumer skill to do + the manifest‑driven migration step before invoking the image. + +--- + +## 2. Locked design decisions + +| # | Decision | Rationale | +|---|----------|-----------| +| Storage | **Findings file as issues in the target repo and as items on a GitHub Projects v2 board** owned by the user. Each item carries `Severity` + `Category` single‑select fields. | Removes the 100‑sub‑issue cap that blocked the original sub‑issue model. Flat board + custom fields give a better triage UI. | +| Dedup | **Deterministic fingerprint is the source of truth; LLMs are fuzzy tie‑breakers + prose writers only.** | Reproducible, auditable. No LLM can dup‑spam or silently drop a finding. | +| Create rule | **Dedup against OPEN *and* CLOSED project items; never re‑file a fingerprint that already exists in any state.** | Simplest, quietest. Any closed issue (fixed or won't‑fix) permanently suppresses re‑filing. Accepted blind spot: a fixed‑then‑regressed finding is not re‑surfaced (that's the external fixing system's concern). | +| State | **GitHub Issues + Projects v2 only.** No file DB. | Dedup needs the set of existing project items (open+closed) + their embedded fingerprints. | +| LLM | **Optional.** Core path is fully deterministic and runs with no GPU/cloud‑subscription. Gemma (Ollama) + Codex (subscription CLI) are opt‑in feature flags. | "Generic, anyone can run it" must not require a GPU or a paid subscription. | +| Cross‑validation | **When both Codex and Gemma scanners are on, each reviews the other's findings.** False‑positive verdicts downgrade severity one notch; critical never auto‑downgrades. Findings are never suppressed. | LLM blind spots are asymmetric. Surfacing disagreement to humans is better than silently dropping findings. | +| Auth | **PAT via env** (1Password / Docker‑secret injection optional). PAT requires `repo` + `project` scopes. | Single repo, single owner. (GitHub App deferred to the multi‑tenant version.) | +| Distribution | **Published Docker image `leverj/security-scan:`** on Docker Hub. Multi‑arch (amd64+arm64). Image carries a baked‑in `/app/SECURITY-SCAN-MANIFEST.yaml`. | Consumers pin a tag, query Docker Hub for upgrades, read the manifest from the candidate image to learn what changed before pulling for real. | +| Concurrency | **Sequential.** | One repo, daily cadence. | +| Repo execution | **Never execute repo code.** Lockfile parsing + static analysis + LLM reading only. | Safety. The cloned tree is scanned in `read-only` mode by the LLM lanes too. | + +--- + +## 3. Module breakdown + +``` +security_scan/ + config.py # load + validate config (YAML) and env (token) + detect.py # stack detection (manifest walk; emits per-scanner targets) + runners/ # one module per scanner; each returns SARIF (or is normalized to it) + osv.py # OSV-Scanner (SCA: npm/yarn/pnpm, RubyGems, SwiftPM, pip, go, cargo, …) + gitleaks.py # Gitleaks (secrets, git-history aware) + semgrep.py # Semgrep (SAST, bundled offline rule packs) + trivy.py # Trivy (vuln + secret + IaC + license, all in one) + trufflehog.py # Trufflehog (verified-live secrets — validates against vendor) + syft.py # Syft (SBOM — CycloneDX artifact only, no findings) + codex.py # OPTIONAL OpenAI Codex via local `codex` CLI (subscription) + gemma.py # OPTIONAL Local Gemma via Ollama + normalize.py # SARIF -> internal Finding model (one shape for all scanners) + fingerprint.py # deterministic, line-number-free fingerprint + marker (de)serialize + github.py # clone + GraphQL ProjectsV2 ops (resolve_project, list_project_items, + # add_to_project, set_project_field) + REST create_issue + triage.py # OPTIONAL Gemma 4 (Ollama): fuzzy-dedup tie-break + issue/Slack prose + cross_validate.py # OPTIONAL bidirectional review when codex AND gemma scanners are both on + notify.py # OPTIONAL Slack digest + sync.py # the create-decision logic (dedup -> create-only -> set fields) + main.py # orchestrator: config -> clone -> detect -> scan -> normalize -> + # cross-validate -> sync -> notify + rules/ # bundled Semgrep rules: javascript, python, secrets, xss, sqli, supabase +``` + +**Hard dependency boundary:** `detect/runners/normalize/fingerprint/github/sync` are +deterministic and must work with `triage.py`, `notify.py`, `cross_validate.py`, and the +LLM runners (`codex.py`, `gemma.py`) absent or failing. The LLM lanes and Slack are +strictly additive. + +--- + +## 4. Internal Finding model + +Everything normalizes to this one shape (from SARIF). Keep it small and scanner‑agnostic. + +```python +@dataclass +class Finding: + scanner: str # "osv" | "gitleaks" | "semgrep" | "trivy" | "trufflehog" | "codex" | "gemma" + category: str # "dependency" | "secret" | "secret-verified" | "sast" | "iac" | "license" + rule_id: str # e.g. "GHSA-xxxx", "generic-api-key", "codex.auth-bypass" + severity: str # normalized: critical|high|medium|low|info + file_path: str # repo-relative, forward slashes + line: int | None # for display only — NEVER part of the fingerprint + title: str # short, human title (deterministic default; Gemma may rewrite) + message: str # scanner message / advisory summary + masked_preview: str # for secrets: masked value only — NEVER the raw secret + sarif_fingerprint: str | None # SARIF partialFingerprints/fingerprints if present + extra: dict # ecosystem, installed/fixed version, CVE/GHSA, cross_validation, … +``` + +**Severity normalization:** map each tool's scale to `critical/high/medium/low/info`. +SARIF `level` (error/warning/note) + `security-severity` property → normalized severity. + +**Cross‑validation annotation** lives in `extra["cross_validation"]` when applicable: +```python +{"validator": "gemma" | "codex", + "verdict": "real" | "false_positive" | "uncertain", + "reason": "<= 300 chars", + "original_severity": ""} +``` + +--- + +## 5. Fingerprint & marker + +**Primary identity (deterministic, line‑number‑free):** +``` +key_basis = rule_id + "\0" + file_path + "\0" + snippet_or_secretfp +fingerprint = "fp_" + sha256(key_basis).hexdigest()[:16] +``` +- Prefer the SARIF‑provided `fingerprints` / `partialFingerprints` when the tool emits + them (most do) — they're designed for exactly this and survive line drift. +- `snippet_or_secretfp`: + - SAST: whitespace‑normalized snippet of the matched region (or enclosing symbol name); + - secrets: the scanner's hash of the value (Gitleaks emits one) — **never the raw secret**; + - deps: empty (`rule_id` already = GHSA/CVE, unique per package‑advisory). +- **Line numbers are excluded** so reformatting/refactoring doesn't spawn duplicates. + +**Marker** embedded in every issue body (hidden HTML comment): +``` + +``` + +The parser also accepts the legacy `` marker (issues filed by the +pre‑rename code) so dedup against pre‑existing items still works without backfill. + +`github.py` lists **all** items on the Projects v2 board (any state), parses these +markers from each item's issue body, and builds the set of already‑filed fingerprints. + +--- + +## 6. Create‑decision logic (`sync.py`) + +```python +existing_items = github.list_project_items(project.id) # paginated GraphQL +existing_fps = {marker.fp for it in existing_items if (marker := parse_marker(it.body))} + +for f in findings: + if not f.meets_floor(severity_floor): + result.skipped_floor += 1 + continue + + fp = f.sarif_fingerprint or compute_fingerprint(f) + if fp in existing_fps: + result.skipped_dup += 1 + continue + + # OPTIONAL fuzzy tie-break (only if Gemma triage available): catch renamed/moved + # code that changed file_path (and thus fp). + if triage.enabled and triage.is_duplicate_of_existing(f, existing_items): + result.skipped_fuzzy_dup += 1 + continue + + title, body = triage.write_issue(f) if triage.enabled else default_issue(f) + body = inject_marker(body, fp, f) # marker always injected by code + issue = github.create_issue(title, body, labels=_labels_for(f)) + item_id = github.add_to_project(project.id, issue["node_id"]) + github.set_project_field(project.id, item_id, project.severity, f.severity) + github.set_project_field(project.id, item_id, project.category, f.category) + existing_fps.add(fp) +``` + +**Invariants (enforced in `github.py`, not trusted to the model):** +- Create + add‑to‑project + set‑field only. **No** edit/close/reopen/delete of issues. +- The deterministic marker is always injected by code, even if Gemma wrote the prose. +- Never write a raw secret into a body — only `masked_preview`. +- A scanner that did **not** run/complete contributes **no** findings (so a crashed + scanner can't look like "all clear"). + +--- + +## 7. Config schema (`config/config.yaml`, the config dir is bind‑mounted read‑only) + +```yaml +repo: "leverj/ezel" +ref: "dev" + +project: # the GitHub Projects v2 board findings file into + owner: "leverj" # org or user + number: 5 # project number from the URL: /projects/ + +github_token_env: "GITHUB_TOKEN" # env var holding the PAT (value NEVER in config.yaml) + +scanners: + osv: true + gitleaks: true + semgrep: true + trivy: true + trufflehog: true + syft: true # SBOM artifact (no project items filed) + codex: false # OPTIONAL — OpenAI Codex via subscription + gemma: false # OPTIONAL — local Gemma via Ollama + +codex: # tunables for the codex runner + binary: "codex" + model: null # null => use codex CLI's configured default + timeout: 1200 + +gemma: # tunables for the gemma scanner (falls back to triage:* when null) + base_url: null + model: null + keep_alive: null + timeout: 1800 + max_files: 60 # cap to keep prompt size bounded + max_file_bytes: 12000 + max_total_bytes: 200000 + +cross_validate: # only active when both scanners.codex AND scanners.gemma are true + enabled: true + codex_timeout: 300 + gemma_timeout: 180 + +paths: + exclude: ["archive/", "vendor/", ".github/scripts/"] + +severity_floor: "low" # info | low | medium | high | critical + +triage: # optional Gemma triage (issue prose / fuzzy dedup / Slack intro) + enabled: false + provider: "ollama" + model: "gemma4:26b" + base_url: "http://host.docker.internal:11434" + keep_alive: "5m" + prewarm: true + intro_timeout: 120 + intro_enabled: true + prose_enabled: false + fuzzy_dup_enabled: false + +slack: + enabled: false + webhook_url_env: "SLACK_WEBHOOK_URL" # OR channel_id_env + bot_token_env +``` + +Token and any Slack secret arrive via **env** (the container reads `os.environ[...]`), +never written into `config.yaml`. 1Password / Docker secrets can populate those env +vars on the host. + +The whole **`config/` directory** is the bind‑mount unit. A `secrets.source: 1password` +setup keeps the `.env.1password.tpl` file inside the same directory so it rides along. + +--- + +## 8. Stack detection (`detect.py`) + +1. **Manifest walk (primary, zero‑API, reliable):** walk the cloned tree (honoring + `paths.exclude`) for manifests/lockfiles and map to scanners + ecosystems: + - `package.json` + `package-lock.json` | `yarn.lock` | `pnpm-lock.yaml` → npm/yarn/pnpm (OSV) + - `Gemfile.lock` → RubyGems (OSV); `Package.resolved` → SwiftPM (OSV) + - `requirements.txt` | `poetry.lock` | `Pipfile.lock` → pip (OSV) + - `go.mod`/`go.sum` → Go (OSV); `Cargo.lock` → Rust (OSV) + - any source files → Semgrep (its own language autodetect); whole tree → Gitleaks +2. **Whole‑tree scanners** (Trivy, Trufflehog, Syft) run once on the repo root, no manifest gating. +3. **Framework detection** — currently surfaces `supabase` when `supabase/config.toml` + exists or `@supabase/supabase-js` is in any `package.json`. Used to enable the + Supabase Semgrep rule pack. +4. **LLM scanners** (codex, gemma) run only when there's at least one recognized source file. +5. Stacks with no available scanner → printed as "detected, no scanner" and skipped (don't fail the run). + +Handles monorepos: there can be many manifests in many dirs. + +--- + +## 9. Scanners (`runners/`) — all emit SARIF (or are normalized to it); none execute repo code + +- **OSV‑Scanner** — `osv-scanner --format sarif --recursive ` (parses lockfiles; + no install). Covers npm/yarn/pnpm, RubyGems, SwiftPM, pip, Go, Cargo from one tool. +- **Gitleaks** — `gitleaks detect --report-format sarif --source ` (git‑history + aware; emits a per‑secret fingerprint). +- **Semgrep** — `semgrep scan --config --sarif --metrics=off …` (static; + bundled rule packs include `javascript`, `python`, `secrets`, `xss`, `sqli`, `supabase`). +- **Trivy** — `trivy ` against the cloned tree; SARIF output; + multi‑category normalization in `normalize.py`. +- **Trufflehog** — JSONL output (not SARIF), normalized in `normalize.py`. `--only-verified` + surfaces secrets the scanner validated live against the vendor (CWE‑798 critical). +- **Syft** — produces a CycloneDX SBOM JSON written to `/work/`. No project items filed; + the runner's "SARIF" is a tiny metadata wrapper so the orchestrator can log + reference it. +- **Codex** (optional) — `codex exec -s read-only --output-schema schema.json -o out.json …` + with a strict JSON output contract. Subscription auth (`codex login`); no API key. + `extra["scanner"] = "codex"`; rule_ids namespaced `codex.`. +- **Gemma** (optional) — Ollama `/api/chat` with `format=json`, batched source files + (capped by file count + per‑file bytes + total bytes). Same JSON contract as codex. + `extra["scanner"] = "gemma"`; rule_ids namespaced `gemma.`. + +Pin scanner versions (in the Dockerfile) so "new vs resolved" diffing isn't polluted by +the scanners themselves changing. Each runner returns SARIF JSON (or `None` + a "did +not complete" flag — which must keep that category out of any future close logic the +external system builds). + +--- + +## 10. Gemma 4 triage (`triage.py`) — optional, guard‑railed + +Distinct from the **gemma scanner** (which produces findings). Triage is post‑processing: + +1. **Fuzzy dedup tie‑break** — for findings whose deterministic fp is new, decide if it's + actually a renamed/moved version of an existing item. (Off by default; + `triage.fuzzy_dup_enabled`.) +2. **Prose** — draft issue title/body. (Off by default; `triage.prose_enabled`.) +3. **Slack intro** — one short framing sentence prepended to the deterministic per‑category + Slack digest. (On by default; `triage.intro_enabled`.) + +Guardrails (in code, not the prompt): validate every JSON response against its schema and +fall back to deterministic output on malformed responses; feed only the scanner's factual +fields (never invent fix versions); the deterministic marker + masked previews are injected +by code regardless of what the model returns. If Ollama is unreachable, the run still +completes — every Gemma path has a deterministic fallback. + +--- + +## 11. Cross‑validation (`cross_validate.py`) — optional, off unless both LLM scanners enabled + +When `scanners.codex` AND `scanners.gemma` are both true: + +1. For every Codex finding → ask Gemma (via Ollama): "real / false_positive / uncertain + + brief reason". +2. For every Gemma finding → ask Codex (via subprocess): same prompt. +3. Annotate `finding.extra["cross_validation"]` with the verdict + reason. +4. If verdict is `false_positive`: downgrade severity one notch (`high → medium`, + `medium → low`, `low → info`). **`critical` is asymmetric — it NEVER auto‑downgrades.** + The cost of missing a real critical is higher than the cost of one noisy critical + in the board. +5. **Findings are NEVER suppressed.** Disagreement is surfaced via the annotation; + humans triage on the project board. + +If either validator is unreachable, the verdict for that direction is `uncertain` and +severity stays unchanged — never block the run on a validator failure. + +--- + +## 12. Docker & secrets + +``` +Dockerfile: python:3.11-slim + pinned osv-scanner, gitleaks, semgrep, trivy, trufflehog, syft, git + +Volumes (bind-mounted at runtime — no VOLUME directive, so anonymous volumes never +accumulate when --rm is used): + /config (ro) -> the user's whole config directory (config.yaml + .env.1password.tpl + …) + /rules (ro) -> optional override of the image-baked semgrep rules + /work (rw) -> ephemeral per-run clone + SBOM output (wiped each run) + +Secrets: + GITHUB_TOKEN, SLACK_* via env (docker run --env-file, Docker secret, or 1Password injection) + +Entrypoint: + python -m security_scan --config /config/config.yaml --work-dir /work +``` + +Stateless: the container holds no state between runs; everything durable is in GitHub +Issues + the Projects v2 board. The clone lives in `/work` and is wiped each run. + +**Image manifest** (`/app/SECURITY-SCAN-MANIFEST.yaml`) — see §15. + +--- + +## 13. Execution flow (`main.py`) + +``` +1. load config + token (fail fast if token missing / project unresolved) +2. shallow|full clone repo@ref into /work +3. resolve Projects v2 board (GraphQL); idempotently ensure Severity + Category single-select fields +4. detect stack -> list of (scanner, targets) +5. run each enabled+relevant scanner -> SARIF/JSON (record which completed) +6. normalize results -> Findings ; drop paths in exclude ; drop < severity_floor +7. if both codex AND gemma ran -> cross_validate.cross_validate(findings, …) +8. fingerprint each Finding (or use SARIF-supplied fingerprint) +9. list existing project items -> existing fingerprints +10. for each new fingerprint: + (optional Gemma fuzzy-dup check) -> create_issue + add_to_project + set Severity/Category +11. (optional) Slack digest (Gemma-written intro + deterministic per-category sections) +12. print a deterministic summary (created N, skipped M dup, scanners run/failed) +``` + +A scanner that did NOT complete contributes ZERO findings — so a crashed scanner never +reads as "all clear" to downstream tooling. + +--- + +## 14. Test plan + +- **Unit:** fingerprint stability (same finding across line shifts → same fp; rename → + different fp, caught by fuzzy pass); marker round‑trip (inject → parse); legacy marker + compat; SARIF→Finding for one fixture per scanner; severity normalization; exclude‑path + filtering; masked‑preview never contains the raw value. +- **Dedup logic:** given a fixture set of existing project items (open + closed) and a + finding set, assert create‑only + never‑re‑file (closed fp ⇒ skipped). +- **Cross‑validation:** unit‑tested with mocked Ollama HTTP and mocked codex subprocess. + Verifies asymmetric downgrade (critical never), never‑suppress invariant, and graceful + degradation when a validator is unreachable. +- **GraphQL ops:** mocked `requests.Session` — resolve_project, list_project_items + (paginated), add_to_project, set_project_field; dry‑run path makes zero HTTP calls. +- **Scanner integration:** run each scanner against a tiny synthetic repo with one + planted issue each; assert SARIF parses and the finding surfaces. +- **Graceful degradation:** Ollama down → deterministic path still files issues; a + scanner binary missing → that category skipped with a note, others unaffected. +- **End‑to‑end dry‑run:** `--dry-run` (no issue creation) prints what *would* be filed. + Verifies the project resolution + listing path against a real board. +- **Safety:** assert no `npm install`/`bundle install`/`pod install` is ever invoked; + assert the token never appears in logs or issue bodies; codex sandbox is `read-only`; + raw secrets are never in issue bodies. + +--- + +## 15. Image manifest contract + +The image bakes `SECURITY-SCAN-MANIFEST.yaml` at `/app/SECURITY-SCAN-MANIFEST.yaml`. +Consumers read it without starting the scanner: + +```bash +docker run --rm --entrypoint cat \ + leverj/security-scan: /app/SECURITY-SCAN-MANIFEST.yaml +``` + +Top‑level keys: + +| Key | Purpose | +|---|---| +| `version` | Image version (matches `pyproject.toml` and the git tag). | +| `config_schema_version` | Bumps only when the YAML schema changes in a breaking way. | +| `docker_image` | Full repo name (`leverj/security-scan`) for use by consumers. | +| `released` | Release date. | +| `changelog` | Short bullet list — surfaced verbatim to the user on the upgrade prompt. | +| `breaking_changes` | List of `{id, summary, user_action}` items requiring explicit user confirmation. | +| `config.new_fields` | Fields the consumer should ADD to a user's config.yaml when missing, with documented defaults. | +| `config.renamed_fields` | Fields the consumer should rename in place. May require user input where the rename isn't 1:1. | +| `config.removed_fields` | Fields the consumer should strip with confirmation. | +| `image_paths` | Documentation of where things live inside the image (mount targets, source). | + +The publish workflow (`.github/workflows/publish.yml`) refuses to push unless +`pyproject.toml`'s version and the manifest's version both match the git tag. This is +the contract that lets the consumer skill in `leverj/ai-skills` evolve in lockstep +with the image — schema migration is declared by the image, not coded into the skill. + +--- + +## 16. Build/release flow + +1. Develop on a feature branch; CI lints + tests + does a no‑push docker build on each PR. +2. Merge to `main`. +3. Tag a release: `git tag v0.X.Y && git push origin v0.X.Y`. +4. `publish.yml` builds multi‑arch (amd64 + arm64), tags `leverj/security-scan:vX.Y.Z` + + `:latest`, pushes to Docker Hub, and smoke‑tests the manifest is readable. +5. The companion skill in `leverj/ai-skills` (or any other consumer) sees the new tag, + fetches the candidate manifest, surfaces the changelog + migrations to the user, and + applies them on confirmation. + +Required repository secrets for the publish job: `DOCKERHUB_USERNAME`, `DOCKERHUB_TOKEN`. + +--- + +## 17. Lineage & deferred roadmap + +- v1 generalized `ezel_scan.py` (a hand‑rolled per‑repo scanner) into a generic, Dockerized + single‑repo tool using parent‑epic + sub‑issue storage. +- v2 (this spec) drops the sub‑issue tree in favor of Projects v2 (lifts the 100‑item cap, + adds custom fields, simpler triage UI), adds Codex + Gemma LLM SAST + cross‑validation, + and adds the image manifest contract for consumer skills. +- Deferred, in rough order: GitHub App auth → multi‑repo + parallelism (per‑project state) → + DAST/pen‑test lane (staging only, authorized targets) → Live Supabase Security Advisor + parity (DB‑connected lane, see [`leverj/security-scanner#4`](https://github.com/leverj/security-scanner/issues/4)) → + DefectDojo/Dependency‑Track aggregation when correlating many tools/repos → CISO/GRC + dashboard as an always‑on backend the daily job feeds. diff --git a/secscan.sh b/security-scan.sh similarity index 73% rename from secscan.sh rename to security-scan.sh index 42b88bd..c49fecd 100755 --- a/secscan.sh +++ b/security-scan.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -# Convenience wrapper for building and running the secscan container. +# Convenience wrapper for building and running the security-scan container. # -# ./secscan.sh build -> docker build the image -# ./secscan.sh run [args...] -> docker run, default --dry-run, forwards extra args -# ./secscan.sh check -> validate setup (config, secrets, docker, image) +# ./security-scan.sh build -> docker build the image +# ./security-scan.sh run [args...] -> docker run, default --dry-run, forwards extra args +# ./security-scan.sh check -> validate setup (config, secrets, docker, image) # # Two things are config-driven and read from config.yaml at runtime: # @@ -18,13 +18,24 @@ # Required env (only when slack.enabled=true AND secrets.source=env): # the var named by slack.webhook_url_env (or BOTH channel_id_env and bot_token_env) # -# Config: defaults to ./config.yaml; override with `--config /path/to/cfg.yaml` before -# any other args, or set SECSCAN_CONFIG=... in env. +# Config layout (bind-mounted as a single directory into the container): +# +# config/config.yaml # required — main settings +# config/.env.1password.tpl # optional — only when secrets.source=1password +# +# Default config directory: ./config/. Override with one of: +# --config /path/to/cfg.yaml # explicit file path (its parent dir is mounted) +# SECURITY_SCAN_CONFIG=... # same thing via env var +# SECURITY_SCAN_CONFIG_DIR=... # mount this dir instead; expects config.yaml inside +# +# When the skill packages security-scan, point SECURITY_SCAN_CONFIG_DIR at the per-project +# config the agent maintains for the user. set -euo pipefail -IMAGE="${SECSCAN_IMAGE:-secscan:latest}" +IMAGE="${SECURITY_SCAN_IMAGE:-security-scan:latest}" HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEFAULT_CONFIG_DIR="$HERE/config" die() { echo "error: $*" >&2; exit 1; } warn() { echo "warning: $*" >&2; } @@ -112,14 +123,15 @@ cmd_build() { } cmd_check() { - local config="${SECSCAN_CONFIG:-$HERE/config.yaml}" + local config_dir="${SECURITY_SCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local ok=1 echo "== config ==" if [[ -f "$config" ]]; then echo " ✓ $config" else - echo " ✗ $config (cp config.example.yaml config.yaml)" + echo " ✗ $config (cp config/config.example.yaml config/config.yaml)" ok=0 fi @@ -140,7 +152,7 @@ cmd_check() { if docker image inspect "$IMAGE" >/dev/null 2>&1; then echo " ✓ $IMAGE present" else - echo " ⚠ $IMAGE not built yet — run: ./secscan.sh build" + echo " ⚠ $IMAGE not built yet — run: ./security-scan.sh build" fi if [[ -f "$config" ]]; then @@ -166,10 +178,12 @@ cmd_check() { ;; 1password|1Password|op) local ef; ef="$(read_config_field "$config" "secrets.env_file" ".env.1password.tpl")" - [[ "$ef" = /* ]] || ef="$HERE/$ef" + # Resolve env_file relative to the config directory (so the whole config/ + # dir is the unit of bind-mount). + [[ "$ef" = /* ]] || ef="$config_dir/$ef" if command -v op >/dev/null; then echo " ✓ op (1Password CLI) installed"; else echo " ✗ op not installed (brew install 1password-cli)"; ok=0; fi if op account list >/dev/null 2>&1; then echo " ✓ op signed in"; else echo " ⚠ op not signed in (run: op signin)"; fi - if [[ -f "$ef" ]]; then echo " ✓ $ef present"; else echo " ✗ $ef missing (cp .env.1password.tpl.example .env.1password.tpl)"; ok=0; fi + if [[ -f "$ef" ]]; then echo " ✓ $ef present"; else echo " ✗ $ef missing (cp config/.env.1password.tpl.example config/.env.1password.tpl)"; ok=0; fi ;; *) echo " ✗ secrets.source must be 'env' or '1password', got: $secrets_source" @@ -187,10 +201,10 @@ cmd_check() { echo if [[ $ok -eq 1 ]]; then - echo "all good. try: ./secscan.sh run" + echo "all good. try: ./security-scan.sh run" return 0 else - echo "fix the ✗ items above, then re-run ./secscan.sh check" + echo "fix the ✗ items above, then re-run ./security-scan.sh check" return 1 fi } @@ -198,19 +212,24 @@ cmd_check() { cmd_run() { command -v docker >/dev/null || die "docker not on PATH" - local config="${SECSCAN_CONFIG:-$HERE/config.yaml}" + local config_dir="${SECURITY_SCAN_CONFIG_DIR:-$DEFAULT_CONFIG_DIR}" + local config="${SECURITY_SCAN_CONFIG:-$config_dir/config.yaml}" local extra_args=() local have_dry_run=0 while [[ $# -gt 0 ]]; do case "$1" in - --config) config="$2"; shift 2 ;; - --config=*) config="${1#--config=}"; shift ;; + --config) config="$2"; config_dir="$(dirname "$2")"; shift 2 ;; + --config=*) config="${1#--config=}"; config_dir="$(dirname "$config")"; shift ;; + --config-dir) config_dir="$2"; config="$2/config.yaml"; shift 2 ;; + --config-dir=*) config_dir="${1#--config-dir=}"; config="$config_dir/config.yaml"; shift ;; --dry-run) have_dry_run=1; extra_args+=("$1"); shift ;; --) shift; extra_args+=("$@"); break ;; *) extra_args+=("$1"); shift ;; esac done + # Canonicalize. + config_dir="$(cd "$config_dir" 2>/dev/null && pwd || echo "$config_dir")" # Default to --dry-run unless the caller asked for the real path. Build a new # array so `--no-dry-run` is removed cleanly (rather than replaced with ""). @@ -235,10 +254,11 @@ cmd_run() { error: config not found at $config To set up: - cp config.example.yaml config.yaml - \$EDITOR config.yaml # set repo, ref, parent_issue, secrets.source + cp config/config.example.yaml config/config.yaml + \$EDITOR config/config.yaml # set repo, ref, project, secrets.source See README.md ("Setup: secrets") for env-vs-1Password choice. +Or set SECURITY_SCAN_CONFIG_DIR=/path/to/your-config-dir to use a different directory. EOF exit 1 fi @@ -270,7 +290,7 @@ Two ways to fix this: 1) Export it now: export GITHUB_TOKEN=github_pat_xxx # see README.md "Option A" - ./secscan.sh run + ./security-scan.sh run 2) Switch to 1Password (recommended for daily use): # in config.yaml @@ -280,9 +300,9 @@ Two ways to fix this: # then: cp .env.1password.tpl.example .env.1password.tpl \$EDITOR .env.1password.tpl # set op:// vault paths - ./secscan.sh run + ./security-scan.sh run -Run \`./secscan.sh check\` to see your full setup status. +Run \`./security-scan.sh check\` to see your full setup status. EOF exit 1 fi @@ -299,13 +319,13 @@ Either export them: …or set slack.enabled: false in $config to disable Slack for this run. -Run \`./secscan.sh check\` to see your full setup status. +Run \`./security-scan.sh check\` to see your full setup status. EOF exit 1 fi - echo "secrets: env (shell exports) slack: $SLACK_MODE" >&2 + echo "secrets: env (shell exports) slack: $SLACK_MODE config-dir: $config_dir" >&2 exec docker run --rm \ - -v "$config":/config/config.yaml:ro \ + -v "$config_dir":/config:ro \ "${env_args[@]}" \ "$IMAGE" "${extra_args[@]+"${extra_args[@]}"}" ;; @@ -323,16 +343,16 @@ EOF exit 1 fi local ef="$env_file" - [[ "$ef" = /* ]] || ef="$HERE/$ef" + [[ "$ef" = /* ]] || ef="$config_dir/$ef" if [[ ! -f "$ef" ]]; then cat >&2 <// paths + cp config/.env.1password.tpl.example config/.env.1password.tpl + \$EDITOR config/.env.1password.tpl # set op://// paths -The template lists every env var secscan understands. +The template lists every env var security-scan understands. EOF exit 1 fi @@ -342,9 +362,9 @@ EOF grep -qE "^\s*${v}\s*=" "$ef" || warn "$v not referenced in $ef but slack.enabled=true; add 'op://...' line or set slack.enabled: false" done fi - echo "secrets: 1password ($ef) slack: $SLACK_MODE" >&2 + echo "secrets: 1password ($ef) slack: $SLACK_MODE config-dir: $config_dir" >&2 exec op run --env-file="$ef" -- docker run --rm \ - -v "$config":/config/config.yaml:ro \ + -v "$config_dir":/config:ro \ "${env_args[@]}" \ "$IMAGE" "${extra_args[@]+"${extra_args[@]}"}" ;; @@ -360,17 +380,24 @@ case "${1:-}" in check) shift; cmd_check "$@" ;; ""|-h|--help) cat </config.yaml (override with SECURITY_SCAN_CONFIG env) + image tag defaults to "security-scan:latest" (override with SECURITY_SCAN_IMAGE env) + +The whole --config-dir is bind-mounted read-only at /config inside the container, +so any related files (the 1Password env template, etc.) ride along. secrets (driven by config.yaml): secrets.source: env -> use already-exported shell variables @@ -385,7 +412,7 @@ slack (driven by config.yaml): slack.enabled: true with slack.channel_id_env + slack.bot_token_env -> both named vars must be set (uses chat.postMessage) -Run \`./secscan.sh check\` for a full setup status. +Run \`./security-scan.sh check\` for a full setup status. EOF ;; *) die "unknown command: $1 (try 'build', 'run', or 'check')" ;; diff --git a/security_scan/__init__.py b/security_scan/__init__.py new file mode 100644 index 0000000..d1913c1 --- /dev/null +++ b/security_scan/__init__.py @@ -0,0 +1,3 @@ +"""security_scan — stateless single-repo security scanner; files findings into a GitHub Projects v2 board.""" + +__version__ = "0.2.0" diff --git a/secscan/__main__.py b/security_scan/__main__.py similarity index 52% rename from secscan/__main__.py rename to security_scan/__main__.py index 9e20d9f..2f7510e 100644 --- a/secscan/__main__.py +++ b/security_scan/__main__.py @@ -1,4 +1,4 @@ -from secscan.main import cli +from security_scan.main import cli if __name__ == "__main__": cli() diff --git a/secscan/config.py b/security_scan/config.py similarity index 98% rename from secscan/config.py rename to security_scan/config.py index 45142de..3a30b33 100644 --- a/secscan/config.py +++ b/security_scan/config.py @@ -12,7 +12,7 @@ import yaml -from secscan.models import SEVERITY_ORDER +from security_scan.models import SEVERITY_ORDER class ConfigError(ValueError): @@ -37,7 +37,7 @@ class ScannersConfig: @dataclass class CodexConfig: """Tunables for the local Codex CLI runner. Auth is via `codex login` - (ChatGPT subscription); secscan never sees an API key.""" + (ChatGPT subscription); security_scan never sees an API key.""" binary: str = "codex" model: str | None = None # None => use codex's configured default timeout: int = 1200 # seconds; LLM scans can run minutes diff --git a/secscan/cross_validate.py b/security_scan/cross_validate.py similarity index 99% rename from secscan/cross_validate.py rename to security_scan/cross_validate.py index 68aa6fd..d712567 100644 --- a/secscan/cross_validate.py +++ b/security_scan/cross_validate.py @@ -39,7 +39,7 @@ import requests -from secscan.models import SEVERITY_ORDER, Finding +from security_scan.models import SEVERITY_ORDER, Finding # Severity downgrade ladder. Critical is intentionally NOT downgraded — the # asymmetry is deliberate (worst case for FP-on-critical is one extra issue diff --git a/secscan/detect.py b/security_scan/detect.py similarity index 100% rename from secscan/detect.py rename to security_scan/detect.py diff --git a/secscan/fingerprint.py b/security_scan/fingerprint.py similarity index 87% rename from secscan/fingerprint.py rename to security_scan/fingerprint.py index afc41c6..acc40e4 100644 --- a/secscan/fingerprint.py +++ b/security_scan/fingerprint.py @@ -10,10 +10,13 @@ import hashlib import re -from secscan.models import Finding +from security_scan.models import Finding MARKER_RE = re.compile( - r"" + # Accept legacy `secscan:` marker too so issues filed by the pre-rename code + # still match for dedup. New markers are written as `security-scan:` (see + # inject_marker below). + r"" ) @@ -68,7 +71,7 @@ def resolve_fingerprint(f: Finding) -> str: def inject_marker(body: str, fp: str, f: Finding) -> str: """Append the hidden marker to an issue body. Code-owned, regardless of LLM prose.""" - marker = f"" + marker = f"" if MARKER_RE.search(body): return MARKER_RE.sub(marker, body) sep = "\n\n" if body and not body.endswith("\n") else "" diff --git a/secscan/github.py b/security_scan/github.py similarity index 94% rename from secscan/github.py rename to security_scan/github.py index dc0071a..acb6592 100644 --- a/secscan/github.py +++ b/security_scan/github.py @@ -19,11 +19,11 @@ _API = "https://api.github.com" _GRAPHQL = "https://api.github.com/graphql" -_UA = "secscan/0.1" +_UA = "security_scan/0.1" _ACCEPT = "application/vnd.github+json" _API_VERSION = "2022-11-28" -# Single-select options + colors that secscan creates on the target Project v2 if +# Single-select options + colors that security_scan creates on the target Project v2 if # the user hasn't created them already. GitHub's `ProjectV2SingleSelectFieldOptionColor` # enum accepts: GRAY, BLUE, GREEN, YELLOW, ORANGE, RED, PINK, PURPLE. _SEVERITY_OPTIONS: list[tuple[str, str]] = [ @@ -131,7 +131,7 @@ def _scrub(self, text: str) -> str: def resolve_project(self, owner: str, number: int) -> ProjectContext: """Find the Projects v2 board by (owner, number). Idempotently ensures - single-select `Severity` and `Category` fields exist with the secscan + single-select `Severity` and `Category` fields exist with the security_scan option set. Re-running is safe. """ if self.dry_run: @@ -257,18 +257,18 @@ def create_issue(self, title: str, body: str, labels: list[str] | None = None) - # Color palette per category/severity. Anything unmapped becomes mid-grey. _LABEL_COLOR = { # categories - "secscan:dependency": "5319e7", # purple — language/OS package CVEs - "secscan:secret": "d93f0b", # red — pattern-matched secret - "secscan:secret-verified": "b60205", # dark red — live/verified secret - "secscan:sast": "fbca04", # yellow — code patterns - "secscan:iac": "0e8a16", # green — IaC misconfig - "secscan:license": "1d76db", # blue — license issues + "security-scan:dependency": "5319e7", # purple — language/OS package CVEs + "security-scan:secret": "d93f0b", # red — pattern-matched secret + "security-scan:secret-verified": "b60205", # dark red — live/verified secret + "security-scan:sast": "fbca04", # yellow — code patterns + "security-scan:iac": "0e8a16", # green — IaC misconfig + "security-scan:license": "1d76db", # blue — license issues # severities - "secscan:critical": "b60205", - "secscan:high": "d93f0b", - "secscan:medium": "fbca04", - "secscan:low": "c5def5", - "secscan:info": "ededed", + "security-scan:critical": "b60205", + "security-scan:high": "d93f0b", + "security-scan:medium": "fbca04", + "security-scan:low": "c5def5", + "security-scan:info": "ededed", } _LABEL_CREATED: set[str] # populated in __init__ @@ -282,7 +282,7 @@ def _ensure_label(self, name: str) -> None: self._request( "POST", f"{_API}/repos/{self.owner}/{self.name}/labels", - json={"name": name, "color": color, "description": "secscan-managed label"}, + json={"name": name, "color": color, "description": "security_scan-managed label"}, ) except GitHubError as e: # 422 = label already exists with this name; anything else is a real problem. @@ -346,7 +346,7 @@ def _ensure_single_select_field( if missing: print( f"github: project field {name!r} is missing options {missing}; " - "secscan won't be able to set those values until you add them", + "security_scan won't be able to set those values until you add them", file=sys.stderr, ) return ProjectField(id=existing["id"], options=opts) diff --git a/secscan/main.py b/security_scan/main.py similarity index 95% rename from secscan/main.py rename to security_scan/main.py index b7655a3..dd1655c 100644 --- a/secscan/main.py +++ b/security_scan/main.py @@ -14,19 +14,19 @@ import tempfile from pathlib import Path -from secscan.config import Config, ConfigError, load_config -from secscan.detect import DetectionResult, ScannerTarget, detect_stack -from secscan.github import GitHub, GitHubError -from secscan.models import Finding -from secscan.normalize import normalize_sarif -from secscan.notify import post_digest -from secscan.runners import RunnerResult -from secscan.sync import SyncResult, sync +from security_scan.config import Config, ConfigError, load_config +from security_scan.detect import DetectionResult, ScannerTarget, detect_stack +from security_scan.github import GitHub, GitHubError +from security_scan.models import Finding +from security_scan.normalize import normalize_sarif +from security_scan.notify import post_digest +from security_scan.runners import RunnerResult +from security_scan.sync import SyncResult, sync def cli(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( - prog="secscan", + prog="security_scan", description="Stateless single-repo security scanner; files findings into a GitHub Projects v2 board.", ) parser.add_argument("--config", required=True, help="Path to config.yaml") @@ -45,7 +45,7 @@ def cli(argv: list[str] | None = None) -> int: def run(cfg: Config, dry_run: bool = False, work_dir: str | None = None, keep_work: bool = False) -> int: - work_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="secscan-")) + work_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="security_scan-")) repo_dir = work_root / cfg.repo_name gh = GitHub(cfg.github_token, cfg.repo_owner, cfg.repo_name, dry_run=dry_run) @@ -83,7 +83,7 @@ def run(cfg: Config, dry_run: bool = False, work_dir: str | None = None, keep_wo if (cfg.cross_validate.enabled and "codex" in completed_scanners and "gemma" in completed_scanners): - from secscan.cross_validate import cross_validate + from security_scan.cross_validate import cross_validate before = sum(1 for f in findings if f.scanner in ("codex", "gemma")) print(f"cross-validate: reviewing {before} LLM finding(s) bidirectionally", file=sys.stderr) cross_validate( @@ -195,7 +195,7 @@ def _scan_and_normalize( def _invoke_runner(t: ScannerTarget, cfg: Config, repo_dir: Path, semgrep_rules: Path | str | None) -> RunnerResult: """Dynamically import the runner so missing optional bits never block import-time.""" - mod = importlib.import_module(f"secscan.runners.{t.scanner}") + mod = importlib.import_module(f"security_scan.runners.{t.scanner}") if t.scanner == "osv": return mod.run(t.targets[0], exclude=cfg.paths.exclude) if t.scanner == "gitleaks": @@ -300,7 +300,7 @@ def _maybe_triage(cfg: Config): return None try: # Lazy import to avoid touching `requests` when triage is off. - from secscan.triage import Triage + from security_scan.triage import Triage t = Triage(cfg.triage) # Kick off model warm-up in the background; scans run in parallel. t.start_warmup() diff --git a/secscan/models.py b/security_scan/models.py similarity index 100% rename from secscan/models.py rename to security_scan/models.py diff --git a/secscan/normalize.py b/security_scan/normalize.py similarity index 99% rename from secscan/normalize.py rename to security_scan/normalize.py index e8758ed..acc32d9 100644 --- a/secscan/normalize.py +++ b/security_scan/normalize.py @@ -11,7 +11,7 @@ import sys from fnmatch import fnmatchcase -from secscan.models import Finding, normalize_severity +from security_scan.models import Finding, normalize_severity _CATEGORY = { "osv": "dependency", diff --git a/secscan/notify.py b/security_scan/notify.py similarity index 95% rename from secscan/notify.py rename to security_scan/notify.py index c2f4176..c8e4055 100644 --- a/secscan/notify.py +++ b/security_scan/notify.py @@ -12,9 +12,9 @@ import requests -from secscan.config import SlackConfig -from secscan.models import Finding -from secscan.sync import SyncResult +from security_scan.config import SlackConfig +from security_scan.models import Finding +from security_scan.sync import SyncResult def post_digest( @@ -112,9 +112,9 @@ def _default_digest( ) -> str: """Slack mrkdwn digest of ACTIONABLE findings (newly filed this run). - Per-category sections list only the findings that became open sub-issues - this run — items that were dup-skipped (already filed previously) or - below the severity floor aren't shown. The footer still reports the + Per-category sections list only the findings that became open project + items this run — items that were dup-skipped (already filed previously) + or below the severity floor aren't shown. The footer still reports the skip counts so you can see the gates were applied, but the sections themselves only contain new bugs to triage. @@ -131,7 +131,7 @@ def _default_digest( by_sev[f.severity] = by_sev.get(f.severity, 0) + 1 lines: list[str] = [ - f":lock: *secscan* — `{repo}@{ref}` — " + f":lock: *security_scan* — `{repo}@{ref}` — " f"" ] diff --git a/secscan/rules/javascript.yaml b/security_scan/rules/javascript.yaml similarity index 100% rename from secscan/rules/javascript.yaml rename to security_scan/rules/javascript.yaml diff --git a/secscan/rules/python.yaml b/security_scan/rules/python.yaml similarity index 100% rename from secscan/rules/python.yaml rename to security_scan/rules/python.yaml diff --git a/secscan/rules/secrets.yaml b/security_scan/rules/secrets.yaml similarity index 100% rename from secscan/rules/secrets.yaml rename to security_scan/rules/secrets.yaml diff --git a/secscan/rules/sqli.yaml b/security_scan/rules/sqli.yaml similarity index 100% rename from secscan/rules/sqli.yaml rename to security_scan/rules/sqli.yaml diff --git a/secscan/rules/supabase.yaml b/security_scan/rules/supabase.yaml similarity index 100% rename from secscan/rules/supabase.yaml rename to security_scan/rules/supabase.yaml diff --git a/secscan/rules/xss.yaml b/security_scan/rules/xss.yaml similarity index 100% rename from secscan/rules/xss.yaml rename to security_scan/rules/xss.yaml diff --git a/secscan/runners/__init__.py b/security_scan/runners/__init__.py similarity index 100% rename from secscan/runners/__init__.py rename to security_scan/runners/__init__.py diff --git a/secscan/runners/codex.py b/security_scan/runners/codex.py similarity index 98% rename from secscan/runners/codex.py rename to security_scan/runners/codex.py index e4842c7..df4929b 100644 --- a/secscan/runners/codex.py +++ b/security_scan/runners/codex.py @@ -131,7 +131,7 @@ def run( if shutil.which(binary) is None: return RunnerResult("codex", None, False, f"binary not found: {binary}") - with tempfile.TemporaryDirectory(prefix="codex-secscan-") as td: + with tempfile.TemporaryDirectory(prefix="codex-security_scan-") as td: schema_path = Path(td) / "schema.json" output_path = Path(td) / "output.json" schema_path.write_text(json.dumps(_SCHEMA)) @@ -160,7 +160,7 @@ def run( text=True, timeout=timeout, check=False, - # Don't inherit secscan's env wholesale — keep CODEX_HOME etc., but + # Don't inherit security_scan's env wholesale — keep CODEX_HOME etc., but # strip anything that might confuse the agent. Codex reads its own # config from ~/.codex/. env={**os.environ}, diff --git a/secscan/runners/gemma.py b/security_scan/runners/gemma.py similarity index 99% rename from secscan/runners/gemma.py rename to security_scan/runners/gemma.py index e565b15..a486f90 100644 --- a/secscan/runners/gemma.py +++ b/security_scan/runners/gemma.py @@ -25,7 +25,7 @@ from . import RunnerResult -# Extensions worth feeding to the model. Mirrors secscan/detect._SEMGREP_EXTS with +# Extensions worth feeding to the model. Mirrors security_scan/detect._SEMGREP_EXTS with # a few SQL/HCL/TF additions since LLM reading isn't limited to semgrep's parsers. _SOURCE_EXTS = { ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", diff --git a/secscan/runners/gitleaks.py b/security_scan/runners/gitleaks.py similarity index 100% rename from secscan/runners/gitleaks.py rename to security_scan/runners/gitleaks.py diff --git a/secscan/runners/osv.py b/security_scan/runners/osv.py similarity index 93% rename from secscan/runners/osv.py rename to security_scan/runners/osv.py index 9a9b735..de32be9 100644 --- a/secscan/runners/osv.py +++ b/security_scan/runners/osv.py @@ -11,7 +11,7 @@ def run(root: Path, exclude: list[str] | None = None, binary: str = "osv-scanner") -> RunnerResult: # NOTE: we intentionally do NOT pass --paths-to-ignore: the flag's name and # presence varies across osv-scanner versions (it's a hard error on 1.9.2). - # secscan.normalize.normalize_sarif() filters excluded paths post-hoc, so we + # security_scan.normalize.normalize_sarif() filters excluded paths post-hoc, so we # get the same effect with zero version coupling. _ = exclude # accepted for signature stability; intentionally unused here cmd = [binary, "--format", "sarif", "--skip-git", "--recursive"] diff --git a/secscan/runners/semgrep.py b/security_scan/runners/semgrep.py similarity index 100% rename from secscan/runners/semgrep.py rename to security_scan/runners/semgrep.py diff --git a/secscan/runners/syft.py b/security_scan/runners/syft.py similarity index 85% rename from secscan/runners/syft.py rename to security_scan/runners/syft.py index 397ba4b..ffdfb2d 100644 --- a/secscan/runners/syft.py +++ b/security_scan/runners/syft.py @@ -1,10 +1,10 @@ """Syft runner — produces a CycloneDX SBOM artifact for the scanned tree. -Unlike the other scanners, Syft does not file sub-issues. It writes the SBOM -to disk so it can be archived/uploaded by the caller. RunnerResult.sarif -carries a small metadata dict (path + component count + format) so the -orchestrator can log a one-line summary and downstream Slack digests can -reference it. +Unlike the other scanners, Syft does not file findings (no project items). +It writes the SBOM to disk so it can be archived/uploaded by the caller. +RunnerResult.sarif carries a small metadata dict (path + component count + +format) so the orchestrator can log a one-line summary and downstream Slack +digests can reference it. """ from __future__ import annotations diff --git a/secscan/runners/trivy.py b/security_scan/runners/trivy.py similarity index 100% rename from secscan/runners/trivy.py rename to security_scan/runners/trivy.py diff --git a/secscan/runners/trufflehog.py b/security_scan/runners/trufflehog.py similarity index 100% rename from secscan/runners/trufflehog.py rename to security_scan/runners/trufflehog.py diff --git a/secscan/sync.py b/security_scan/sync.py similarity index 87% rename from secscan/sync.py rename to security_scan/sync.py index cb98b77..dd339ca 100644 --- a/secscan/sync.py +++ b/security_scan/sync.py @@ -10,9 +10,9 @@ from dataclasses import dataclass, field from typing import Protocol -from secscan.fingerprint import inject_marker, parse_marker, resolve_fingerprint -from secscan.github import GitHub, ProjectContext -from secscan.models import Finding +from security_scan.fingerprint import inject_marker, parse_marker, resolve_fingerprint +from security_scan.github import GitHub, ProjectContext +from security_scan.models import Finding class Triage(Protocol): @@ -129,15 +129,16 @@ def sync( def _labels_for(f: Finding) -> list[str]: - """The label set applied to a sub-issue. + """The label set applied to each issue filed. - `security` is the existing umbrella label. `secscan:` lets you - filter the parent's sub-issue list by category in the GitHub UI. - `secscan:` lets you triage by severity. All labels are namespaced - under `secscan:` so they're easy to clean up if you ever drop the tool. + `security` is the existing umbrella label. `security-scan:` lets you + filter project-board items by category alongside the Category single-select + field. `security-scan:` parallels Severity. All scanner-applied + labels are namespaced under `security-scan:` so they're easy to clean up if + you ever drop the tool. """ return [ "security", - f"secscan:{f.category}", - f"secscan:{f.severity}", + f"security-scan:{f.category}", + f"security-scan:{f.severity}", ] diff --git a/secscan/triage.py b/security_scan/triage.py similarity index 98% rename from secscan/triage.py rename to security_scan/triage.py index 8a33234..cbb9605 100644 --- a/secscan/triage.py +++ b/security_scan/triage.py @@ -20,10 +20,10 @@ import requests -from secscan.config import TriageConfig -from secscan.fingerprint import parse_marker -from secscan.models import Finding -from secscan.sync import SyncResult, default_issue +from security_scan.config import TriageConfig +from security_scan.fingerprint import parse_marker +from security_scan.models import Finding +from security_scan.sync import SyncResult, default_issue class Triage: diff --git a/tests/test_codex_runner.py b/tests/test_codex_runner.py index 5f1d764..e33951f 100644 --- a/tests/test_codex_runner.py +++ b/tests/test_codex_runner.py @@ -6,7 +6,7 @@ from pathlib import Path from unittest.mock import patch -from secscan.runners import codex as codex_runner +from security_scan.runners import codex as codex_runner def _fake_completed(rc=0, stdout="", stderr=""): @@ -39,8 +39,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/usr/bin/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/usr/bin/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) assert result.completed is True @@ -78,8 +78,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) rule_ids = [r["ruleId"] for r in result.sarif["runs"][0]["results"]] assert "codex.already-prefixed" in rule_ids @@ -87,7 +87,7 @@ def _fake_run(cmd, **kw): def test_runner_handles_missing_binary(tmp_path): - with patch("secscan.runners.codex.shutil.which", return_value=None): + with patch("security_scan.runners.codex.shutil.which", return_value=None): result = codex_runner.run(tmp_path) assert result.completed is False assert "binary not found" in result.error @@ -95,8 +95,8 @@ def test_runner_handles_missing_binary(tmp_path): def test_runner_detects_auth_failure(tmp_path): """When codex isn't logged in it exits non-zero with an auth message — surface clearly.""" - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(1, "", "Error: not logged in. Run `codex login`.")): result = codex_runner.run(tmp_path) assert result.completed is False @@ -105,8 +105,8 @@ def test_runner_detects_auth_failure(tmp_path): def test_runner_returns_failure_on_non_zero_exit(tmp_path): - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(2, "", "internal model error")): result = codex_runner.run(tmp_path) assert result.completed is False @@ -115,8 +115,8 @@ def test_runner_returns_failure_on_non_zero_exit(tmp_path): def test_runner_failure_when_no_output_file_written(tmp_path): """codex exited cleanly but produced no output — likely refused the task.""" - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", return_value=_fake_completed(0)): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", return_value=_fake_completed(0)): result = codex_runner.run(tmp_path) assert result.completed is False assert "no output" in result.error.lower() @@ -128,8 +128,8 @@ def _fake_run(cmd, **kw): Path(cmd[idx + 1]).write_text("this is not json {{{ <-- broken") return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) assert result.completed is False assert "parse" in result.error.lower() @@ -137,8 +137,8 @@ def _fake_run(cmd, **kw): def test_runner_timeout(tmp_path): import subprocess - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="codex", timeout=10)): result = codex_runner.run(tmp_path, timeout=10) assert result.completed is False @@ -155,8 +155,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) paths = [r["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] for r in result.sarif["runs"][0]["results"]] @@ -172,8 +172,8 @@ def _fake_run(cmd, **kw): ])) return _fake_completed(0) - with patch("secscan.runners.codex.shutil.which", return_value="/x/codex"), \ - patch("secscan.runners.codex.subprocess.run", side_effect=_fake_run): + with patch("security_scan.runners.codex.shutil.which", return_value="/x/codex"), \ + patch("security_scan.runners.codex.subprocess.run", side_effect=_fake_run): result = codex_runner.run(tmp_path) r = result.sarif["runs"][0]["results"][0] assert r["properties"]["security-severity"] == "5.5" # medium diff --git a/tests/test_config.py b/tests/test_config.py index d7cefcb..1018646 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,7 @@ import pytest -from secscan.config import ConfigError, load_config +from security_scan.config import ConfigError, load_config def write(tmp_path, name, body): diff --git a/tests/test_cross_validate.py b/tests/test_cross_validate.py index 71f52a8..d081f63 100644 --- a/tests/test_cross_validate.py +++ b/tests/test_cross_validate.py @@ -11,8 +11,8 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.cross_validate import cross_validate -from secscan.models import Finding +from security_scan.cross_validate import cross_validate +from security_scan.models import Finding def _f(scanner, rule_id, severity="high"): @@ -66,9 +66,9 @@ def test_disabled_when_only_one_scanner_enabled(tmp_path): def test_gemma_marks_codex_finding_real_keeps_severity(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "real", "reason": "definitely real"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] @@ -81,9 +81,9 @@ def test_gemma_marks_codex_finding_real_keeps_severity(tmp_path): def test_gemma_marks_codex_finding_false_positive_downgrades(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "false_positive", "reason": "not exploitable"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] @@ -96,9 +96,9 @@ def test_critical_never_auto_downgrades_on_fp(tmp_path): """Asymmetric guardrail: critical findings stay critical even if the validator disagrees. The cost of missing a real critical is too high.""" f = _f("codex", "rce.eval", severity="critical") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "false_positive", "reason": "looks fine"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "critical" # protected @@ -108,9 +108,9 @@ def test_critical_never_auto_downgrades_on_fp(tmp_path): def test_uncertain_does_not_downgrade(tmp_path): f = _f("codex", "auth.foo", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "uncertain", "reason": "can't tell"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "high" @@ -119,9 +119,9 @@ def test_uncertain_does_not_downgrade(tmp_path): def test_unrecognized_verdict_treated_as_uncertain(tmp_path): f = _f("codex", "x", severity="medium") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", return_value=_ollama_ok({"verdict": "OBVIOUSLY_FAKE", "reason": "what"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert f.severity == "medium" @@ -130,9 +130,9 @@ def test_unrecognized_verdict_treated_as_uncertain(tmp_path): def test_codex_marks_gemma_finding_false_positive_downgrades(tmp_path): f = _f("gemma", "py.eval", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.subprocess.run", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.subprocess.run", side_effect=_codex_completed(0, {"verdict": "false_positive", "reason": "test code, not prod"})): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) @@ -146,8 +146,8 @@ def test_ollama_unreachable_skips_gemma_review(tmp_path): """If Ollama can't be reached, codex findings simply get no review — not failure.""" import requests f = _f("codex", "x", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", side_effect=requests.ConnectionError("down")): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) assert "cross_validation" not in (f.extra or {}) @@ -156,8 +156,8 @@ def test_ollama_unreachable_skips_gemma_review(tmp_path): def test_codex_missing_skips_codex_review_of_gemma_findings(tmp_path): f = _f("gemma", "x", severity="high") - with patch("secscan.cross_validate.shutil.which", return_value=None), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()): + with patch("security_scan.cross_validate.shutil.which", return_value=None), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) # Gemma finding not reviewed because codex CLI is missing. assert "cross_validation" not in (f.extra or {}) @@ -168,9 +168,9 @@ def test_validator_failure_yields_uncertain(tmp_path): verdict — never block the finding or crash the run.""" f = _f("codex", "x", severity="high") import requests - with patch("secscan.cross_validate.shutil.which", return_value="/x/codex"), \ - patch("secscan.cross_validate.requests.get", return_value=_ping_ok()), \ - patch("secscan.cross_validate.requests.post", + with patch("security_scan.cross_validate.shutil.which", return_value="/x/codex"), \ + patch("security_scan.cross_validate.requests.get", return_value=_ping_ok()), \ + patch("security_scan.cross_validate.requests.post", side_effect=requests.ConnectionError("post failed")): cross_validate([f], repo_dir=tmp_path, codex_enabled=True, gemma_enabled=True) cv = f.extra["cross_validation"] diff --git a/tests/test_detect.py b/tests/test_detect.py index 239e449..877ce2e 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -1,6 +1,6 @@ from pathlib import Path -from secscan.detect import ScannerTarget, detect_stack +from security_scan.detect import ScannerTarget, detect_stack ALL_ON = { "osv": True, "gitleaks": True, "semgrep": True, diff --git a/tests/test_e2e_dryrun.py b/tests/test_e2e_dryrun.py index b99aa19..08b9e6e 100644 --- a/tests/test_e2e_dryrun.py +++ b/tests/test_e2e_dryrun.py @@ -17,7 +17,7 @@ import pytest -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -25,10 +25,10 @@ SlackConfig, TriageConfig, ) -from secscan.fingerprint import parse_marker, resolve_fingerprint -from secscan.github import ProjectContext, ProjectField -from secscan.normalize import normalize_sarif -from secscan.runners import RunnerResult +from security_scan.fingerprint import parse_marker, resolve_fingerprint +from security_scan.github import ProjectContext, ProjectField +from security_scan.normalize import normalize_sarif +from security_scan.runners import RunnerResult def _synthetic_repo(root: Path) -> None: @@ -167,7 +167,7 @@ def _make_fake_gh(state="OPEN", existing_with_fp: list[str] | None = None) -> Ma "number": i + 1, "state": state, "title": "old", - "body": f"prose\n", + "body": f"prose\n", }) fake_gh = MagicMock() @@ -191,13 +191,13 @@ def create(title, body, labels=None): def test_full_dryrun_pipeline_files_three_findings(cfg, tmp_path): - from secscan.main import run + from security_scan.main import run fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): rc = run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) assert rc == 0 @@ -211,7 +211,7 @@ def test_full_dryrun_pipeline_files_three_findings(cfg, tmp_path): def test_dryrun_does_not_post_to_real_github(cfg, tmp_path): """The actual GitHub class in dry_run mode must make zero HTTP requests across issue creation AND every Projects v2 mutation.""" - from secscan.github import GitHub + from security_scan.github import GitHub captured_requests = [] @@ -236,13 +236,13 @@ def fake_request(*a, **kw): def test_marker_roundtrip_on_dryrun_bodies(cfg, tmp_path): """Every body that the pipeline would have posted must contain a parseable marker.""" - from secscan.main import run + from security_scan.main import run fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) for issue in fake_gh.captured: @@ -253,17 +253,17 @@ def test_marker_roundtrip_on_dryrun_bodies(cfg, tmp_path): def test_closed_existing_fingerprint_suppresses_refile(cfg, tmp_path): """The spec invariant: a closed project item with our fingerprint never refiles.""" - from secscan.main import run + from security_scan.main import run findings = normalize_sarif(_semgrep_sarif(), "semgrep") semgrep_fp = resolve_fingerprint(findings[0]) fake_gh = _make_fake_gh(state="CLOSED", existing_with_fp=[semgrep_fp]) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) # 3 findings total; the semgrep one matches a closed-existing fp -> skip. @@ -286,14 +286,14 @@ def test_fingerprint_survives_line_shift_in_source(cfg): def test_raw_secret_never_in_issue_body(cfg, tmp_path): """End-to-end check that the raw AWS key never reaches a posted body.""" - from secscan.main import run + from security_scan.main import run raw_secret = "TEST_FAKE_SECRET_VALUE" fake_gh = _make_fake_gh() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) for issue in fake_gh.captured: diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index 31f097b..71101cd 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -1,10 +1,10 @@ -from secscan.fingerprint import ( +from security_scan.fingerprint import ( compute_fingerprint, inject_marker, parse_marker, resolve_fingerprint, ) -from secscan.models import Finding +from security_scan.models import Finding def _sast(file_path="src/a.js", line=10, snippet="exec(userInput)"): @@ -96,11 +96,11 @@ def test_marker_idempotent_inject(): body = inject_marker("prose", fp, f) body2 = inject_marker(body, fp, f) # Should not duplicate the marker - assert body2.count("") is None + assert parse_marker("") is None diff --git a/tests/test_gemma_runner.py b/tests/test_gemma_runner.py index a88a430..9b1e81b 100644 --- a/tests/test_gemma_runner.py +++ b/tests/test_gemma_runner.py @@ -6,7 +6,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.runners import gemma as gemma_runner +from security_scan.runners import gemma as gemma_runner def _ollama_resp(payload: dict, status=200): @@ -44,7 +44,7 @@ def test_runner_happy_path(tmp_path): "message": "Concatenating user input into SQL.", "snippet": "db.query('... ' + n)"}, ] } - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)) as p: + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)) as p: result = gemma_runner.run(tmp_path) assert result.completed is True results = result.sarif["runs"][0]["results"] @@ -60,7 +60,7 @@ def test_runner_happy_path(tmp_path): def test_runner_unreachable_ollama(tmp_path): _drop_source(tmp_path) import requests - with patch("secscan.runners.gemma.requests.post", + with patch("security_scan.runners.gemma.requests.post", side_effect=requests.ConnectionError("ollama down")): result = gemma_runner.run(tmp_path) assert result.completed is False @@ -72,7 +72,7 @@ def test_runner_http_error(tmp_path): r = MagicMock() r.status_code = 500 r.text = "server error" - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "500" in result.error @@ -83,7 +83,7 @@ def test_runner_parse_error_on_malformed_content(tmp_path): r = MagicMock() r.status_code = 200 r.json.return_value = {"message": {"content": "not json at all"}} - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "parse" in result.error.lower() @@ -96,7 +96,7 @@ def test_runner_namespaces_rule_id(tmp_path): "title": "t", "message": "m"}, {"file": "y.py", "rule_id": "raw-rule", "severity": "low", "title": "t", "message": "m"}, ]} - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): result = gemma_runner.run(tmp_path) rule_ids = {r["ruleId"] for r in result.sarif["runs"][0]["results"]} assert "gemma.raw-rule" in rule_ids @@ -117,7 +117,7 @@ def _capture(*args, **kwargs): captured["body"] = kwargs["json"] return _ollama_resp({"findings": []}) - with patch("secscan.runners.gemma.requests.post", side_effect=_capture): + with patch("security_scan.runners.gemma.requests.post", side_effect=_capture): gemma_runner.run(tmp_path, max_files=3, max_file_bytes=1000, max_total_bytes=5000) user_msg = next(m["content"] for m in captured["body"]["messages"] if m["role"] == "user") @@ -142,7 +142,7 @@ def _capture(*args, **kwargs): captured["body"] = kwargs["json"] return _ollama_resp({"findings": []}) - with patch("secscan.runners.gemma.requests.post", side_effect=_capture): + with patch("security_scan.runners.gemma.requests.post", side_effect=_capture): gemma_runner.run(tmp_path) user_msg = next(m["content"] for m in captured["body"]["messages"] if m["role"] == "user") @@ -159,7 +159,7 @@ def test_runner_drops_findings_without_file(tmp_path): {"file": "", "rule_id": "no-path", "severity": "low", "title": "t", "message": "m"}, {"file": "src/auth.py", "rule_id": "ok", "severity": "low", "title": "t", "message": "m"}, ]} - with patch("secscan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): + with patch("security_scan.runners.gemma.requests.post", return_value=_ollama_resp(payload)): result = gemma_runner.run(tmp_path) paths = [r["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] for r in result.sarif["runs"][0]["results"]] @@ -171,7 +171,7 @@ def test_runner_findings_not_a_list_is_failure(tmp_path): r = MagicMock() r.status_code = 200 r.json.return_value = {"message": {"content": json.dumps({"findings": "not a list"})}} - with patch("secscan.runners.gemma.requests.post", return_value=r): + with patch("security_scan.runners.gemma.requests.post", return_value=r): result = gemma_runner.run(tmp_path) assert result.completed is False assert "schema" in result.error.lower() diff --git a/tests/test_github.py b/tests/test_github.py index 4b2dfd6..81e09b9 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -7,7 +7,7 @@ import pytest import requests -from secscan.github import GitHub, GitHubError, ProjectField +from security_scan.github import GitHub, GitHubError, ProjectField TOKEN = "ghp_supersecrettoken_abcdef123456" @@ -31,7 +31,7 @@ def _gh(dry_run=False): def test_clone_shallow_uses_depth_1(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo", shallow=True) args = m.call_args.args[0] assert args[0] == "git" @@ -45,7 +45,7 @@ def test_clone_shallow_uses_depth_1(tmp_path): def test_clone_full_omits_depth(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo", shallow=False) args = m.call_args.args[0] assert "--depth=1" not in args @@ -55,7 +55,7 @@ def test_clone_url_has_no_credentials(tmp_path): """The clone URL must not embed the token — git would persist it into .git/config.""" gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo") args = m.call_args.args[0] url = next(a for a in args if a.startswith("https://")) @@ -69,7 +69,7 @@ def test_clone_passes_token_via_one_shot_config(tmp_path): gh = _gh() completed = MagicMock(returncode=0, stdout="", stderr="") - with patch("secscan.github.subprocess.run", return_value=completed) as m: + with patch("security_scan.github.subprocess.run", return_value=completed) as m: gh.clone("dev", tmp_path / "repo") args = m.call_args.args[0] assert "-c" in args @@ -88,7 +88,7 @@ def test_clone_scrubs_token_from_error(tmp_path): gh = _gh() leaky = f"fatal: could not read from https://x-access-token:{TOKEN}@github.com/leverj/ezel.git" completed = MagicMock(returncode=128, stdout="", stderr=leaky) - with patch("secscan.github.subprocess.run", return_value=completed): + with patch("security_scan.github.subprocess.run", return_value=completed): with pytest.raises(GitHubError) as ei: gh.clone("dev", tmp_path / "repo") assert TOKEN not in str(ei.value) @@ -102,14 +102,14 @@ def test_create_issue_posts_correct_payload(): created = {"id": 9001, "node_id": "I_xxx", "number": 42, "title": "t", "body": "b", "html_url": "u", "state": "open"} resp = _resp(201, json_body=created, headers={}) with patch.object(requests.Session, "request", return_value=resp) as m: - out = gh.create_issue("t", "b", labels=["security", "secscan"]) + out = gh.create_issue("t", "b", labels=["security", "security_scan"]) assert out == created call = m.call_args method = call.args[0] if call.args else call.kwargs["method"] url = call.args[1] if len(call.args) > 1 else call.kwargs["url"] assert method == "POST" assert url == "https://api.github.com/repos/leverj/ezel/issues" - assert call.kwargs["json"] == {"title": "t", "body": "b", "labels": ["security", "secscan"]} + assert call.kwargs["json"] == {"title": "t", "body": "b", "labels": ["security", "security_scan"]} assert gh.session.headers["Authorization"] == f"Bearer {TOKEN}" assert gh.session.headers["Accept"] == "application/vnd.github+json" assert gh.session.headers["X-GitHub-Api-Version"] == "2022-11-28" @@ -318,7 +318,7 @@ def test_set_project_field_calls_update_mutation(): def test_set_project_field_unknown_option_is_noop(): - """If the user renamed an option, secscan must not crash — silently skip.""" + """If the user renamed an option, security_scan must not crash — silently skip.""" gh = _gh() field = ProjectField(id="FID", options={"critical": "o-crit"}) with patch.object(requests.Session, "request") as m: @@ -354,7 +354,7 @@ def test_retry_on_500(): gh = _gh() bad = _resp(500, json_body={"message": "boom"}) good = _resp(201, json_body={"id": 1, "node_id": "I_x", "number": 1, "title": "t", "body": "b", "html_url": "u", "state": "open"}) - with patch("secscan.github.time.sleep") as sl, \ + with patch("security_scan.github.time.sleep") as sl, \ patch.object(requests.Session, "request", side_effect=[bad, good]) as m: gh.create_issue("t", "b") assert m.call_count == 2 @@ -371,7 +371,7 @@ def test_rate_limit_waits_and_retries(): headers={"X-RateLimit-Remaining": "0", "X-RateLimit-Reset": str(reset_at)}, ) good = _resp(201, json_body={"id": 1, "node_id": "I_x", "number": 1, "title": "t", "body": "b", "html_url": "u", "state": "open"}) - with patch("secscan.github.time.sleep") as sl, \ + with patch("security_scan.github.time.sleep") as sl, \ patch.object(requests.Session, "request", side_effect=[limited, good]) as m: gh.create_issue("t", "b") assert m.call_count == 2 diff --git a/tests/test_main.py b/tests/test_main.py index 1b1648b..1656b88 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -4,7 +4,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -12,8 +12,8 @@ SlackConfig, TriageConfig, ) -from secscan.github import ProjectContext, ProjectField -from secscan.runners import RunnerResult +from security_scan.github import ProjectContext, ProjectField +from security_scan.runners import RunnerResult def _cfg(tmp_path, **kw): @@ -115,7 +115,7 @@ def create(title, body, labels=None): def test_e2e_dry_run_creates_no_issues(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) @@ -123,10 +123,10 @@ def test_e2e_dry_run_creates_no_issues(tmp_path): fake_gh = _fresh_gh(dry_run=True) results = _scanner_results() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=results["osv"]) as o, \ - patch("secscan.runners.gitleaks.run", return_value=results["gitleaks"]) as gl, \ - patch("secscan.runners.semgrep.run", return_value=results["semgrep"]) as sg: + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=results["osv"]) as o, \ + patch("security_scan.runners.gitleaks.run", return_value=results["gitleaks"]) as gl, \ + patch("security_scan.runners.semgrep.run", return_value=results["semgrep"]) as sg: fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) @@ -139,7 +139,7 @@ def test_e2e_dry_run_creates_no_issues(tmp_path): def test_e2e_creates_issues_when_not_dry_run(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) @@ -147,10 +147,10 @@ def test_e2e_creates_issues_when_not_dry_run(tmp_path): fake_gh = _fresh_gh(dry_run=False) results = _scanner_results() - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=results["osv"]), \ - patch("secscan.runners.gitleaks.run", return_value=results["gitleaks"]), \ - patch("secscan.runners.semgrep.run", return_value=results["semgrep"]): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=results["osv"]), \ + patch("security_scan.runners.gitleaks.run", return_value=results["gitleaks"]), \ + patch("security_scan.runners.semgrep.run", return_value=results["semgrep"]): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -163,17 +163,17 @@ def test_e2e_creates_issues_when_not_dry_run(tmp_path): def test_failed_scanner_does_not_block_others(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", None, False, "binary not found")), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", None, False, "binary not found")), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -182,17 +182,17 @@ def test_failed_scanner_does_not_block_others(tmp_path): def test_all_scanners_fail_returns_error(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", None, False, "x")), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", None, False, "x")), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", None, False, "x")): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", None, False, "x")), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", None, False, "x")), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", None, False, "x")): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) @@ -202,15 +202,15 @@ def test_all_scanners_fail_returns_error(tmp_path): def test_repo_dir_is_wiped_even_when_work_dir_provided(tmp_path): """Security: the clone must be removed even when the caller supplied --work-dir.""" - from secscan.main import run + from security_scan.main import run cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=False) @@ -220,15 +220,15 @@ def test_repo_dir_is_wiped_even_when_work_dir_provided(tmp_path): def test_keep_work_preserves_clone(tmp_path): - from secscan.main import run + from security_scan.main import run cfg = _cfg(tmp_path) fake_gh = _fresh_gh(dry_run=True) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) run(cfg, dry_run=True, work_dir=str(tmp_path), keep_work=True) @@ -236,17 +236,17 @@ def test_keep_work_preserves_clone(tmp_path): def test_severity_floor_skips_low_findings(tmp_path): - from secscan.main import run + from security_scan.main import run repo_dir = tmp_path / "name" _populate_synthetic_repo(repo_dir) cfg = _cfg(tmp_path, severity_floor="critical") # only critical fake_gh = _fresh_gh(dry_run=False) - with patch("secscan.main.GitHub", return_value=fake_gh), \ - patch("secscan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ - patch("secscan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ - patch("secscan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): + with patch("security_scan.main.GitHub", return_value=fake_gh), \ + patch("security_scan.runners.osv.run", return_value=RunnerResult("osv", _osv_sarif(), True)), \ + patch("security_scan.runners.gitleaks.run", return_value=RunnerResult("gitleaks", _gitleaks_sarif(), True)), \ + patch("security_scan.runners.semgrep.run", return_value=RunnerResult("semgrep", _semgrep_sarif(), True)): fake_gh.clone.side_effect = _clone_populates(None) rc = run(cfg, dry_run=False, work_dir=str(tmp_path), keep_work=True) diff --git a/tests/test_models.py b/tests/test_models.py index 5a8abf0..5fca28a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,4 +1,4 @@ -from secscan.models import Finding, normalize_severity +from security_scan.models import Finding, normalize_severity def test_normalize_severity_from_security_severity_score(): diff --git a/tests/test_new_scanners.py b/tests/test_new_scanners.py index edfb309..3a4e2b3 100644 --- a/tests/test_new_scanners.py +++ b/tests/test_new_scanners.py @@ -4,10 +4,10 @@ from pathlib import Path from unittest.mock import patch -from secscan.normalize import normalize_sarif -from secscan.runners import syft as syft_runner -from secscan.runners import trivy as trivy_runner -from secscan.runners import trufflehog as trufflehog_runner +from security_scan.normalize import normalize_sarif +from security_scan.runners import syft as syft_runner +from security_scan.runners import trivy as trivy_runner +from security_scan.runners import trufflehog as trufflehog_runner FIXTURES = Path(__file__).parent / "fixtures" @@ -26,14 +26,14 @@ def _completed(rc=0, stdout="", stderr=""): def test_trivy_runner_happy_path(tmp_path): sarif = (FIXTURES / "sarif" / "trivy.json").read_text() - with patch("secscan.runners.subprocess.run", return_value=_completed(0, sarif, "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, sarif, "")): result = trivy_runner.run(tmp_path) assert result.completed and result.sarif is not None assert result.scanner == "trivy" def test_trivy_cmd_includes_all_scanners(tmp_path): - with patch("secscan.runners.subprocess.run", return_value=_completed(0, "{}", "")) as m: + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, "{}", "")) as m: trivy_runner.run(tmp_path, exclude=["vendor/"]) cmd = m.call_args.args[0] # Joined --scanners value @@ -73,7 +73,7 @@ def test_trivy_exclude_filter(tmp_path): def test_trivy_binary_not_found(tmp_path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError("trivy")): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError("trivy")): result = trivy_runner.run(tmp_path) assert not result.completed assert "binary not found" in (result.error or "") @@ -84,7 +84,7 @@ def test_trivy_binary_not_found(tmp_path): def test_trufflehog_runner_wraps_jsonl(tmp_path): jsonl = (FIXTURES / "trufflehog.jsonl").read_text() - with patch("secscan.runners.subprocess.run", return_value=_completed(0, jsonl, "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, jsonl, "")): result = trufflehog_runner.run(tmp_path) assert result.completed assert isinstance(result.sarif, dict) @@ -134,7 +134,7 @@ def test_trufflehog_skips_unparseable_lines(tmp_path, capsys): def test_trufflehog_exit_code_nonzero_is_failure(tmp_path): - with patch("secscan.runners.subprocess.run", return_value=_completed(2, "", "config error")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(2, "", "config error")): result = trufflehog_runner.run(tmp_path) assert not result.completed assert "exit 2" in (result.error or "") @@ -157,7 +157,7 @@ def fake_run(cmd, **kw): p.stderr = "" return p - with patch("secscan.runners.subprocess.run", side_effect=fake_run): + with patch("security_scan.runners.subprocess.run", side_effect=fake_run): result = syft_runner.run(tmp_path, output_path=sbom_path) assert result.completed meta = result.sarif["_syft_sbom"] @@ -168,13 +168,13 @@ def fake_run(cmd, **kw): def test_syft_runner_failure_missing_output(tmp_path): sbom_path = tmp_path / "should-not-exist.json" - with patch("secscan.runners.subprocess.run", return_value=_completed(0, "", "")): + with patch("security_scan.runners.subprocess.run", return_value=_completed(0, "", "")): result = syft_runner.run(tmp_path, output_path=sbom_path) assert not result.completed def test_syft_binary_not_found(tmp_path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError("syft")): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError("syft")): result = syft_runner.run(tmp_path, output_path=tmp_path / "x.json") assert not result.completed assert "binary not found" in (result.error or "") diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 1619f18..e791fd0 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -3,8 +3,8 @@ import pytest -from secscan.fingerprint import resolve_fingerprint -from secscan.normalize import normalize_sarif +from security_scan.fingerprint import resolve_fingerprint +from security_scan.normalize import normalize_sarif FIXTURES = Path(__file__).parent / "fixtures" / "sarif" diff --git a/tests/test_notify.py b/tests/test_notify.py index c45378e..6d73466 100644 --- a/tests/test_notify.py +++ b/tests/test_notify.py @@ -1,9 +1,9 @@ from unittest.mock import MagicMock, patch -from secscan.config import SlackConfig -from secscan.models import Finding -from secscan.notify import _default_digest, post_digest -from secscan.sync import SyncResult +from security_scan.config import SlackConfig +from security_scan.models import Finding +from security_scan.notify import _default_digest, post_digest +from security_scan.sync import SyncResult def _f(sev): @@ -12,7 +12,7 @@ def _f(sev): def test_disabled_slack_is_noop(monkeypatch): slack = SlackConfig(enabled=False) - monkeypatch.setattr("secscan.notify.requests.post", lambda *a, **kw: (_ for _ in ()).throw(AssertionError("called"))) + monkeypatch.setattr("security_scan.notify.requests.post", lambda *a, **kw: (_ for _ in ()).throw(AssertionError("called"))) assert post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) is False @@ -20,12 +20,12 @@ def test_webhook_called_with_text(monkeypatch): monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: ok = post_digest(slack, [_f("high")], SyncResult(created=[{"number": 1}]), "o/n", "main", "owner", 42) assert ok is True args, kwargs = mp.call_args assert args[0] == "https://hooks.slack.test/x" - assert "secscan" in kwargs["json"]["text"] + assert "security_scan" in kwargs["json"]["text"] def test_webhook_missing_env_returns_false(monkeypatch, capsys): @@ -40,7 +40,7 @@ def test_chat_postmessage_used_when_channel_set(monkeypatch): slack = SlackConfig(enabled=True, channel_id_env="SLACK_CHANNEL_ID", bot_token_env="SLACK_BOT_TOKEN") resp = MagicMock(status_code=200) resp.json.return_value = {"ok": True} - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: ok = post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) assert ok is True assert mp.call_args.args[0] == "https://slack.com/api/chat.postMessage" @@ -97,7 +97,7 @@ def test_default_digest_below_floor_only_says_so(): def test_default_digest_groups_by_category(): - from secscan.models import Finding + from security_scan.models import Finding findings = [ Finding("trivy", "dependency", "CVE-2024-1", "critical", "package-lock.json", 1, "t", "m", extra={"package": "left-pad", "installed_version": "1.0.0", @@ -123,7 +123,7 @@ def test_default_digest_groups_by_category(): def test_one_liner_does_not_repeat_rule_id_when_package_extra_missing(): """OSV often leaves extras empty; the message text has the package name. Don't render '`CVE-X` · `CVE-X` · no fix' — that's noise.""" - from secscan.models import Finding + from security_scan.models import Finding f = Finding( "osv", "dependency", "CVE-2026-33169", "medium", "Gemfile.lock", 1, "title", @@ -142,7 +142,7 @@ def test_one_liner_does_not_repeat_rule_id_when_package_extra_missing(): def test_default_digest_caps_per_section(): - from secscan.models import Finding + from security_scan.models import Finding findings = [ Finding("semgrep", "sast", f"rule-{i}", "medium", "f.js", i, f"t{i}", "m") for i in range(10) @@ -162,7 +162,7 @@ def test_intro_is_prepended_to_structured_digest(monkeypatch): slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) actionable = [_f("high"), _f("medium")] - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: post_digest( slack, actionable, SyncResult(created=[{"n": 1}, {"n": 2}], created_findings=actionable), @@ -180,7 +180,7 @@ def test_digest_text_legacy_param_still_overrides(monkeypatch): monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") resp = MagicMock(status_code=200) - with patch("secscan.notify.requests.post", return_value=resp) as mp: + with patch("security_scan.notify.requests.post", return_value=resp) as mp: post_digest(slack, [_f("high")], SyncResult(), "o/n", "main", "owner", 9, digest_text="exact replacement") assert mp.call_args.kwargs["json"]["text"] == "exact replacement" @@ -190,6 +190,6 @@ def test_failure_is_non_blocking(monkeypatch): import requests monkeypatch.setenv("SLACK_WEBHOOK_URL", "https://hooks.slack.test/x") slack = SlackConfig(enabled=True, webhook_url_env="SLACK_WEBHOOK_URL") - with patch("secscan.notify.requests.post", side_effect=requests.ConnectionError("down")): + with patch("security_scan.notify.requests.post", side_effect=requests.ConnectionError("down")): ok = post_digest(slack, [], SyncResult(), "o/n", "main", "owner", 1) assert ok is False # didn't raise diff --git a/tests/test_resolve_rules.py b/tests/test_resolve_rules.py index 9d2ca3d..d954250 100644 --- a/tests/test_resolve_rules.py +++ b/tests/test_resolve_rules.py @@ -8,7 +8,7 @@ from pathlib import Path from unittest.mock import patch -from secscan.config import ( +from security_scan.config import ( Config, PathsConfig, ProjectConfig, @@ -16,7 +16,7 @@ SlackConfig, TriageConfig, ) -from secscan.main import _has_rule_files, _resolve_semgrep_rules +from security_scan.main import _has_rule_files, _resolve_semgrep_rules def _cfg(rules=None): @@ -68,7 +68,7 @@ def test_resolver_skips_empty_rules_mount_falls_through_to_bundled(tmp_path: Pat bundled.mkdir() (bundled / "r.yaml").write_text("rules: []") - with patch("secscan.main.Path") as P: + with patch("security_scan.main.Path") as P: # `Path("/rules")` -> empty mount; bundled discovered via __file__ parent / "rules" def fake_path(arg): if arg == "/rules": @@ -76,7 +76,7 @@ def fake_path(arg): return Path(arg) P.side_effect = fake_path # Make `Path(__file__).parent / "rules"` resolve to our bundled stub. - monkeypatch.setattr("secscan.main.__file__", str(bundled / "main.py")) + monkeypatch.setattr("security_scan.main.__file__", str(bundled / "main.py")) # Re-patching Path through to real Path for the parent / "rules" computation # is fiddly; instead, call the resolver but verify behavior through _has_rule_files. @@ -90,7 +90,7 @@ def test_resolver_returns_auto_when_nothing_has_rules(tmp_path: Path, monkeypatc the resolver falls back to 'auto'.""" no_rules_pkg = tmp_path / "pkg" no_rules_pkg.mkdir() - monkeypatch.setattr("secscan.main.__file__", str(no_rules_pkg / "main.py")) + monkeypatch.setattr("security_scan.main.__file__", str(no_rules_pkg / "main.py")) # Force the /rules check to fail (typical host system has no /rules) # by relying on the real filesystem; if /rules exists on the test host that's still # fine because it would have to contain *.yaml/yml/json to count. diff --git a/tests/test_runners.py b/tests/test_runners.py index 271e3c8..536a10c 100644 --- a/tests/test_runners.py +++ b/tests/test_runners.py @@ -9,10 +9,10 @@ import pytest -from secscan.runners import RunnerResult, _run -from secscan.runners import gitleaks as gitleaks_runner -from secscan.runners import osv as osv_runner -from secscan.runners import semgrep as semgrep_runner +from security_scan.runners import RunnerResult, _run +from security_scan.runners import gitleaks as gitleaks_runner +from security_scan.runners import osv as osv_runner +from security_scan.runners import semgrep as semgrep_runner TINY_SARIF = { "version": "2.1.0", @@ -56,7 +56,7 @@ def _assert_no_execute_verbs(cmd: list[str]) -> None: # --- _run -------------------------------------------------------------------- def test_run_invokes_subprocess_with_cwd(tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, "hello", "") rc, out, err = _run(["echo", "hi"], cwd=tmp_path) assert (rc, out, err) == (0, "hello", "") @@ -78,7 +78,7 @@ def test_run_invokes_subprocess_with_cwd(tmp_path: Path): ], ) def test_runner_exit_zero_returns_parsed_sarif(module, kwargs, scanner, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: result: RunnerResult = module.run(tmp_path, **kwargs) assert result.completed is True assert result.scanner == scanner @@ -107,7 +107,7 @@ def test_runner_exit_zero_returns_parsed_sarif(module, kwargs, scanner, tmp_path def test_runner_vulns_found_exit_code_is_success( module, kwargs, scanner, vuln_rc, tmp_path: Path ): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(vuln_rc, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(vuln_rc, TINY_SARIF_JSON, "")) as m: result = module.run(tmp_path, **kwargs) assert m.called assert result.completed is True @@ -127,7 +127,7 @@ def test_runner_vulns_found_exit_code_is_success( ], ) def test_runner_binary_not_found(module, kwargs, binary_name, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=FileNotFoundError(binary_name)): + with patch("security_scan.runners.subprocess.run", side_effect=FileNotFoundError(binary_name)): result = module.run(tmp_path, **kwargs) assert result.completed is False assert result.sarif is None @@ -146,7 +146,7 @@ def test_runner_binary_not_found(module, kwargs, binary_name, tmp_path: Path): ], ) def test_runner_unexpected_exit_code_is_failure(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(99, "", "boom") result = module.run(tmp_path, **kwargs) assert result.completed is False @@ -165,7 +165,7 @@ def test_runner_unexpected_exit_code_is_failure(module, kwargs, tmp_path: Path): ], ) def test_runner_unparseable_json_is_failure(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, "not json at all <<<", "")): + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, "not json at all <<<", "")): result = module.run(tmp_path, **kwargs) assert result.completed is False assert result.sarif is None @@ -184,7 +184,7 @@ def test_runner_unparseable_json_is_failure(module, kwargs, tmp_path: Path): ], ) def test_runner_cmd_has_no_execute_verbs(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") module.run(tmp_path, **kwargs) cmd = m.call_args.args[0] @@ -202,7 +202,7 @@ def test_runner_cmd_has_no_execute_verbs(module, kwargs, tmp_path: Path): ], ) def test_runner_subprocess_cwd_is_set(module, kwargs, tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") module.run(tmp_path, **kwargs) cwd = m.call_args.kwargs.get("cwd") @@ -216,7 +216,7 @@ def test_osv_does_not_pass_paths_to_ignore(tmp_path: Path): """osv-scanner's exclude flag name varies by version (and is unsupported on 1.9.2). We rely on post-hoc filtering in normalize.py instead — assert the flag is never passed even when excludes are configured.""" - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") osv_runner.run(tmp_path, exclude=["vendor/", "archive/"]) cmd = m.call_args.args[0] @@ -228,7 +228,7 @@ def test_osv_does_not_pass_paths_to_ignore(tmp_path: Path): # --- semgrep-specific: excludes + config wired in --------------------------- def test_semgrep_passes_config_and_excludes(tmp_path: Path): - with patch("secscan.runners.subprocess.run") as m: + with patch("security_scan.runners.subprocess.run") as m: m.return_value = _fake_completed(0, TINY_SARIF_JSON, "") semgrep_runner.run(tmp_path, rules_dir="/rules", exclude=["archive/", "vendor/"]) cmd = m.call_args.args[0] @@ -244,7 +244,7 @@ def test_semgrep_passes_config_and_excludes(tmp_path: Path): def test_gitleaks_writes_report_to_tempfile_in_root(tmp_path: Path): """v8 ignores `--report-path -` (silently writes 0 bytes to stdout). We must pass a real file path inside the scan root.""" - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(0, TINY_SARIF_JSON, "")) as m: gitleaks_runner.run(tmp_path) cmd = m.call_args.args[0] assert "--report-format" in cmd @@ -267,7 +267,7 @@ def _capture(cmd, **kw): Path(cmd[idx + 1]).write_text(TINY_SARIF_JSON) return _fake_completed(0, "", "") - with patch("secscan.runners.subprocess.run", side_effect=_capture): + with patch("security_scan.runners.subprocess.run", side_effect=_capture): gitleaks_runner.run(tmp_path) assert not Path(captured_path["p"]).exists() @@ -276,7 +276,7 @@ def _capture(cmd, **kw): def test_gitleaks_accepts_any_exit_code_when_report_parses(rc, tmp_path: Path): """v7 used rc=77 for "leaks found"; v8 uses rc=1. We trust the SARIF parse, not the exit code: if the report file is valid SARIF the run was successful.""" - with patch("secscan.runners.subprocess.run", side_effect=_fake_side_effect(rc, TINY_SARIF_JSON, "")): + with patch("security_scan.runners.subprocess.run", side_effect=_fake_side_effect(rc, TINY_SARIF_JSON, "")): result = gitleaks_runner.run(tmp_path) assert result.completed is True assert result.sarif == TINY_SARIF @@ -284,7 +284,7 @@ def test_gitleaks_accepts_any_exit_code_when_report_parses(rc, tmp_path: Path): def test_gitleaks_no_report_file_written_is_failure(tmp_path: Path): """Genuine failure: scanner didn't write the report. Empty/missing file -> error.""" - with patch("secscan.runners.subprocess.run", return_value=_fake_completed(1, "", "config error")): + with patch("security_scan.runners.subprocess.run", return_value=_fake_completed(1, "", "config error")): result = gitleaks_runner.run(tmp_path) assert result.completed is False assert "no SARIF report written" in (result.error or "") or "exit 1" in (result.error or "") diff --git a/tests/test_sync.py b/tests/test_sync.py index 90358a6..ff1d638 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -1,9 +1,9 @@ from unittest.mock import MagicMock -from secscan.fingerprint import inject_marker, resolve_fingerprint -from secscan.github import ProjectContext, ProjectField -from secscan.models import Finding -from secscan.sync import default_issue, sync +from security_scan.fingerprint import inject_marker, resolve_fingerprint +from security_scan.github import ProjectContext, ProjectField +from security_scan.models import Finding +from security_scan.sync import default_issue, sync def _project(): @@ -112,7 +112,7 @@ def test_marker_is_always_injected_on_created_body(): gh = _gh(existing=[]) sync([f], gh, _project()) body = gh.create_issue.call_args.args[1] if gh.create_issue.call_args.args else gh.create_issue.call_args.kwargs["body"] - assert "