nanohype · stxkxs · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,24 @@
+# Keep the Docker build context minimal — the Dockerfile only needs
+# package*.json, tsconfig.json, src/, and sources.example.json. Excluding the
+# rest speeds builds and keeps local secrets/artifacts out of image layers.
+node_modules
+dist
+coverage
+.git
+.github
+.gitignore
+.gitleaks.toml
+.editorconfig
+.prettierrc
+.prettierignore
+eslint.config.js
+vitest.config.ts
+chart
+docs
+gitops
+*.md
+.env*
+platform.yaml
+renovate.json
+Taskfile.yaml
+src/**/*.test.ts
diff --git a/.env.example b/.env.example
@@ -20,9 +20,11 @@ AWS_REGION=us-east-1
 BEDROCK_LLM_MODEL=us.anthropic.claude-sonnet-4-20250514-v1:0
 BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
 
-# ─── Direct API keys (only if using anthropic/openai providers) ───
+# ─── Direct API keys + models (only if using anthropic/openai providers) ───
 # ANTHROPIC_API_KEY=
 # OPENAI_API_KEY=
+# ANTHROPIC_LLM_MODEL=claude-sonnet-4-6   # direct-Anthropic model (default shown)
+# OPENAI_LLM_MODEL=gpt-4o                  # direct-OpenAI model (default shown)
 
 # ─── Embeddings ───
 EMBEDDING_DIMENSIONS=1024          # 1024 for Titan v2, 1536 for OpenAI
@@ -37,6 +39,9 @@ VECTOR_PROVIDER=memory             # memory (data lost on restart) | pgvector (d
 # PGDATABASE=competitive_intelligence
 # PGUSER=
 # PGPASSWORD=
+# Path to a CA bundle (e.g. the Amazon RDS global CA) for verifying the pgvector
+# TLS connection. Unset → Node's built-in trust store (sufficient for RDS/Aurora).
+# PG_CA_PATH=/etc/ssl/rds/rds-combined-ca-bundle.pem
 
 # ─── Crawler ───
 CRAWL_INTERVAL_MINUTES=60
@@ -58,6 +63,7 @@ SIGNIFICANCE_THRESHOLD=0.3        # 0–1, semantic change required to trigger a
 # Leave unset locally to fall back to the no-op API.
 # OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector.observability.svc.cluster.local:4318
 # OTEL_RESOURCE_ATTRIBUTES=service.name=competitive-intelligence,deployment.environment=dev,agents.tenant=protohype,agents.platform=competitive-intelligence
+# OTEL_SERVICE_NAME=competitive-intelligence  # service.name; overridden by service.name in OTEL_RESOURCE_ATTRIBUTES if set there
 # OTEL_SDK_DISABLED=true            # set true to disable OTel entirely (e.g. local dev)
 
 # ─── Server ───

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -48,8 +48,9 @@ jobs:
       - name: Test
         # OTEL_SDK_DISABLED short-circuits the OTel SDK so the test run never
         # tries to reach the cluster collector. The metrics/tracing API
-        # degrades to a no-op without a registered provider.
-        run: npm test
+        # degrades to a no-op without a registered provider. --coverage enforces
+        # the thresholds in vitest.config.ts so coverage can't silently regress.
+        run: npm run test:coverage
         env:
           OTEL_SDK_DISABLED: "true"
 

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
@@ -37,6 +37,11 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      # Report step: ALWAYS produce + upload a clean SARIF (exit-code 0) so
+      # code scanning records a SUCCESSFUL analysis. Coupling the SARIF run with
+      # the build gate (exit-code 1) makes Trivy mark the SARIF invocation
+      # unsuccessful whenever a finding exists, which GitHub surfaces as
+      # "Trivy is reporting errors" / a code-scanning configuration error.
       - uses: aquasecurity/trivy-action@master
         with:
           scan-type: config
@@ -48,26 +53,34 @@ jobs:
           # them rather than surfacing real findings.
           scan-ref: Dockerfile
           severity: HIGH,CRITICAL
-          # The gate is HIGH/CRITICAL only. Without this, trivy-action builds
-          # the SARIF with all severities and exits non-zero on any of them —
-          # so MEDIUM/LOW lint (e.g. dockerfile best-practice notes) would fail
-          # the build. This keeps the gate to HIGH/CRITICAL while still
-          # surfacing lower-severity findings in the SARIF for code-scanning.
           limit-severities-for-sarif: true
-          exit-code: "1"
+          exit-code: "0"
           format: sarif
           output: trivy-config.sarif
       - uses: github/codeql-action/upload-sarif@v3
         if: always()
         with:
           sarif_file: trivy-config.sarif
           category: trivy-config
+      # Gate step: fail the build on HIGH/CRITICAL, reusing the DB the report
+      # step already downloaded (skip-db-update). Separate from the SARIF upload
+      # so a finding fails CI without poisoning the code-scanning analysis.
+      - name: gate on HIGH/CRITICAL
+        uses: aquasecurity/trivy-action@master
+        with:
+          scan-type: config
+          scan-ref: Dockerfile
+          severity: HIGH,CRITICAL
+          format: table
+          exit-code: "1"
+          skip-db-update: true
 
   trivy-fs:
     name: trivy (filesystem vuln scan — npm deps)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      # Report step: always upload a clean SARIF (exit-code 0) — see trivy-config.
       - uses: aquasecurity/trivy-action@master
         with:
           scan-type: fs
@@ -77,10 +90,8 @@ jobs:
           # to actionable (patch-available) CVEs.
           scanners: vuln
           severity: HIGH,CRITICAL
-          # Gate on HIGH/CRITICAL only (see trivy-config note); lower-severity
-          # CVEs still upload to code-scanning as warnings.
           limit-severities-for-sarif: true
-          exit-code: "1"
+          exit-code: "0"
           ignore-unfixed: true
           format: sarif
           output: trivy-fs.sarif
@@ -90,3 +101,16 @@ jobs:
         with:
           sarif_file: trivy-fs.sarif
           category: trivy-fs
+      # Gate step: fail the build on HIGH/CRITICAL, reusing the cached DB.
+      - name: gate on HIGH/CRITICAL
+        uses: aquasecurity/trivy-action@master
+        with:
+          scan-type: fs
+          scan-ref: .
+          scanners: vuln
+          severity: HIGH,CRITICAL
+          ignore-unfixed: true
+          format: table
+          exit-code: "1"
+          skip-dirs: "node_modules"
+          skip-db-update: true
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 node_modules/
 dist/
+coverage/
 .env
 *.log
 .DS_Store
diff --git a/AGENTS.md b/AGENTS.md
@@ -128,10 +128,10 @@ The vector store is the durability seam. `VectorStore` (`src/providers/vectors.t
 - **Provider registry, not inline construction.** LLM / embeddings / vectors are each a `createRegistry<T>(kind)` returning typed `{ register, get, has, names }`. Pick the implementation by config; `src/index.ts` is the only place real clients are built. Swapping a backend is a one-file change to the bootstrap.
 - **Bedrock-default LLM.** Bedrock (Converse for the LLM, Titan for embeddings) is the default and runs on the AWS credential chain — IRSA on the cluster, no keys. Anthropic/OpenAI are alternates that only register when their key is present.
 - **Prompt caching.** The analysis system prompt is identical on every diff, so the Converse request marks a `cachePoint` after the system block. Cache hits are emitted as a metric — see `ARCHITECTURE.md` § Prompt caching.
-- **Circuit breakers on every external call** — per-host for the crawler's HTTP fetcher, per-provider for LLM + embeddings. Threshold-based, no library.
+- **Circuit breakers on every external call** — per-host for the crawler's HTTP fetcher, per-provider for LLM + embeddings, and around the Slack alert sink. Threshold-based, no library.
 - **Single-writer scheduler + crawl mutex.** `replicaCount: 1`. The scheduler runs one global crawl over all sources on an interval; an in-process mutex prevents the scheduler and a `/competitive-intelligence crawl` from overlapping. Scaling horizontally without leader election would double-crawl and race the differ — don't.
 - **SSRF-guarded crawling.** Every outbound crawl URL passes `guardUrl` (`src/crawler/url-guard.ts`) — rejects loopback, RFC1918, link-local, and cloud-metadata addresses before the fetch.
-- TypeScript strict, ESM NodeNext, Node ≥ 24. Zod at every boundary (config, sources, log level). Structured JSON logging to stderr via Pino; stdout is reserved for CLI output. Explicit timeouts on every external call.
+- TypeScript strict, ESM NodeNext, Node ≥ 24. Zod at every boundary (config, sources, log level, LLM analysis output). Structured JSON logging to stderr via a hand-rolled logger (`src/logger.ts`); stdout is reserved for CLI output. Explicit timeouts on every external call (Bedrock/Anthropic/OpenAI, pgvector, Slack).
 
 ## Pointers
 

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -17,7 +17,7 @@ The system organizes around eight contexts. Cross-boundary services go through a
 | **scheduler**  | `src/scheduler/`  | `createScheduler` is a `setInterval`-based job runner. Runs one global crawl over all sources at `CRAWL_INTERVAL_MINUTES`. The crawl mutex (in `src/index.ts`) prevents the scheduler and a slash-command crawl from overlapping                                                         |
 | **resilience** | `src/resilience/` | `CircuitBreaker` — a threshold-based breaker used per-host by the fetcher and per-provider by the LLM/embeddings providers. Trip → fail fast → half-open probe → recover                                                                                                                 |
 
-Cross-cutting: `src/config.ts` (Zod env validation, fail-fast at boot), `src/logger.ts` (Pino JSON to stderr with OTel `trace_id`/`span_id` correlation), `src/metrics.ts` (OTel timing/counter surface), `src/cli.ts` (one-off `crawl`/`query`), `src/index.ts` (bootstrap + the `/health`+`/readyz` HTTP server).
+Cross-cutting: `src/config.ts` (Zod env validation, fail-fast at boot), `src/logger.ts` (hand-rolled structured JSON logging to stderr), `src/metrics.ts` (OTel timing/counter/gauge surface), `src/cli.ts` (one-off `crawl`/`query`) with `src/display.ts` (ANSI CLI presentation), `src/index.ts` (bootstrap + the `/health`+`/readyz` HTTP server). OTel is initialized by the Dockerfile's auto-instrumentations `--require` preload (env-driven config), not by app code.
 
 ## Key decisions
 
@@ -47,7 +47,7 @@ The analysis system prompt (`ANALYSIS_SYSTEM` in `src/intel/analysis.ts`) is ide
 
 The analysis system prompt is cached via a Converse `cachePoint` marker placed after the system block (and after any stable context prefix). Because that prefix is byte-identical across every diff analyzed within the cache TTL, the second and subsequent analyses in a crawl batch read the prompt from cache rather than re-billing it as input tokens.
 
-Cache effectiveness is **measured, not assumed**. The provider records Bedrock token usage split by kind, emitted as `bedrock.tokens{kind}` with `kind ∈ {input, output, cache_read, cache_write}`. The cache-hit ratio is `cache_read / (cache_read + cache_write)` over a window — high on a warm radar (the system prompt is reused across every source in a crawl) and zero only on the first analysis after a cache expiry. The Grafana dashboard plots the ratio and the token split; the LLM policy requires both the `cachePoint` marker and a measured ratio, which the metric satisfies.
+Cache effectiveness is **measured, not assumed**. `BedrockLlmProvider.chat` records token usage from every Converse response as four distinct counters — `bedrock.input_tokens`, `bedrock.output_tokens`, `bedrock.cache_read_tokens`, `bedrock.cache_write_tokens` (exported to Mimir as `competitive_intelligence_bedrock_*_tokens_total`). The cache-hit ratio is `cache_read / (cache_read + cache_write)` over a window — high on a warm radar (the system prompt is reused across every source in a crawl) and zero only on the first analysis after a cache expiry. The Grafana dashboard plots the ratio and the token split; the LLM policy requires both the `cachePoint` marker and a measured ratio, which the metric satisfies.
 
 ## Data flow: a single crawl
 
@@ -60,7 +60,8 @@ Cache effectiveness is **measured, not assumed**. The provider records Bedrock t
       b. embed chunks (Bedrock Titan, default)
       c. semantic diff: each chunk vs best same-source match (cosine < 0.85 → new)
          → cold-start guard: source count()==0 → baseline (ingest, suppress alerts)
-      d. replace history: deleteByMetadata(sourceId) → upsert new chunks
+      d. replace history: upsert new chunks → prune stale (deleteByMetadata, keeping new ids)
+         — ordered so a mid-write failure can't wipe a source's history
 5.  alertEngine.processDiffs(diffs): per diff with changeScore ≥ SIGNIFICANCE_THRESHOLD →
       a. LLM analysis (Bedrock Converse, cached system prompt) → summary + significance + signals
       b. format Block Kit → dispatch to the Slack alert sink (#competitive-intel)

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -24,16 +24,17 @@ Core insight: semantic diffing via embedding cosine similarity, not text compari
 
 ## Architecture
 
-- **src/providers/** — Self-registering provider registry. LLM (`llm.ts`: Bedrock/Anthropic/OpenAI), embeddings (`embeddings.ts`: Bedrock Titan/OpenAI), vector store (`vectors.ts`: `MemoryVectorStore` for dev/tests + `PgVectorStore` for durable production, both behind the `VectorStore` interface). All via `createRegistry<T>()`. The Bedrock LLM marks a Converse `cachePoint` after the static analysis system prompt — cache hits are emitted as `bedrock.tokens{kind:cache_read/cache_write}`.
+- **src/providers/** — Self-registering provider registry. LLM (`llm.ts`: Bedrock/Anthropic/OpenAI), embeddings (`embeddings.ts`: Bedrock Titan/OpenAI), vector store (`vectors.ts`: `MemoryVectorStore` for dev/tests + `PgVectorStore` for durable production, both behind the `VectorStore` interface). All via `createRegistry<T>()`. The Bedrock LLM marks a Converse `cachePoint` after the static analysis system prompt — token usage is emitted per kind as `bedrock.{input,output,cache_read,cache_write}_tokens` so cache effectiveness is measurable. Every external call carries an explicit timeout (Bedrock via `requestHandler` + an `AbortSignal.timeout` deadline, Anthropic/OpenAI via the SDK `timeout` option).
 - **src/crawler/** — HTTP fetcher with per-host circuit breakers, HTML→text via cheerio scoped by `selectors`. SSRF-guarded (`url-guard.ts`) — every outbound URL rejects loopback/RFC1918/link-local/metadata addresses before the fetch. Sequential crawling. `sources.ts` Zod-validates `sources.json` on load.
 - **src/pipeline/** — Recursive text chunker with overlap → embed → semantic diff against stored vectors → `deleteByMetadata` old chunks → upsert new. Holds the cold-start baseline guard.
 - **src/intel/** — Query facade: embed question → vector search → LLM-generated answer with context. `analysis.ts` holds the LLM change analysis (significance + signal extraction) and the cached analysis/query system prompts.
 - **src/alerts/** — Threshold gating on change score → LLM analysis → Slack Block Kit formatting → dispatch through the alert sink. `formatDigest()` in `formatter.ts` exists for a future digest scheduler job.
 - **src/slack/** — `@slack/bolt` app. @mention + DM query handlers (`handlers.ts`), `/competitive-intelligence query|crawl|status` slash command (`commands.ts`). Socket Mode when `SLACK_APP_TOKEN` is set, HTTP mode otherwise.
 - **src/scheduler/** — `setInterval`-based job runner. One global crawl over all sources at a configurable interval. The crawl mutex (in `index.ts`) prevents the scheduler and a slash-command crawl from overlapping.
 - **src/index.ts** — Bootstrap. Wires config → providers → sources → crawl loop → intel/alert engines → Slack bot → scheduler. Runs a `node:http` server for `/health` (liveness) + `/readyz` (readiness — vector store reachable, Slack connected in Socket Mode) on `PORT`, independent of Slack transport. Runs an initial crawl on boot, then on interval. Graceful shutdown on SIGINT/SIGTERM.
-- **src/cli.ts** — One-off `crawl` and `query` commands for use without Slack.
-- **src/metrics.ts** — OTel metrics surface (`timing` → histogram, `counter` → monotonic counter). Exported OTLP by the auto-instrumentation runtime to the cluster OTel Collector. Degrades to a no-op when no provider is registered (tests).
+- **src/cli.ts** — One-off `crawl` and `query` commands for use without Slack. Reuses `crawlAll` (per-source progress via its `onResult` callback) and renders output through **src/display.ts** (ANSI CLI presentation layer).
+- **OTel init** — telemetry is started once, by the Dockerfile's `--require @opentelemetry/auto-instrumentations-node/register` preload (it must load before any instrumented module is imported, which app code cannot guarantee). All export config is env-driven (`OTEL_*` in the chart); there is no programmatic SDK in the app. `OTEL_SDK_DISABLED=true` short-circuits it for tests/CI/local.
+- **src/metrics.ts** — OTel metrics surface (`timing` → ms histogram, `distribution` → unitless histogram, `counter` → monotonic counter, plus an observable `circuit_breaker.open` gauge). Instrument names map to the `competitive_intelligence_*` series the Grafana dashboard + PrometheusRule query. Exported OTLP to the cluster OTel Collector. Degrades to a no-op when no provider is registered (tests).
 
 ## Commands
 
@@ -63,17 +64,21 @@ All config via env vars, validated by Zod in `src/config.ts`. See `.env.example`
 - `LLM_PROVIDER` — bedrock (default), anthropic, or openai
 - `EMBEDDING_PROVIDER` — bedrock (default) or openai
 - `AWS_REGION` — for Bedrock. Uses the AWS credential chain → IRSA on the cluster, no API keys
-- `BEDROCK_LLM_MODEL` / `BEDROCK_EMBEDDING_MODEL` — model IDs (LLM defaults to a current cross-region Claude Sonnet inference profile; embeddings to Titan Embed v2)
+- `BEDROCK_LLM_MODEL` / `BEDROCK_EMBEDDING_MODEL` — model IDs (LLM defaults to a cross-region Claude Sonnet inference profile; embeddings to Titan Embed v2)
 - `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` — only when using those providers directly
+- `ANTHROPIC_LLM_MODEL` / `OPENAI_LLM_MODEL` — direct-API model IDs (defaults `claude-sonnet-4-6` / `gpt-4o`)
+- `EMBEDDING_MODEL` / `EMBEDDING_DIMENSIONS` — OpenAI embedding model + vector size (default 1024; 1024 for Titan v2, 1536 for OpenAI)
 - `VECTOR_PROVIDER` — `pgvector` in cluster (durable, restart-safe), `memory` for local dev/tests
 - `DATABASE_URL` / `PG*` — Postgres connection for pgvector; in cluster these come from `competitive-intelligence/<env>/db-credentials`
+- `PG_CA_PATH` — optional CA bundle for verifying the pgvector TLS connection; unset → Node's built-in trust store
 - `SIGNIFICANCE_THRESHOLD` — 0–1, minimum change score to trigger an alert (default 0.3)
 - `CRAWL_INTERVAL_MINUTES` — default 60
 - `CRAWL_TIMEOUT_MS` — per-page fetch timeout (default 30000)
 - `SLACK_BOT_TOKEN` / `SLACK_SIGNING_SECRET` / `SLACK_APP_TOKEN` — Slack; absent → CLI-only
 - `SLACK_ALERT_CHANNEL` — alert channel (default `#competitive-intel`)
 - `USER_AGENT` — crawl request User-Agent (default `competitive-intelligence/0.1.0`)
-- `PORT` — HTTP health-server port (default 3000)
+- `PORT` — HTTP health-server port (default 3000); in Slack HTTP mode the Bolt receiver binds `PORT + 1`
+- `NODE_ENV` — development (default), production, or test
 - `LOG_LEVEL` — debug, info (default), warn, error. Zod-validated.
 
 Bedrock needs model access to Claude Sonnet and Titan Embed v2 in the deployment region. Sources are defined in `sources.json` (see `sources.example.json`), Zod-validated on load.

diff --git a/Dockerfile b/Dockerfile
@@ -18,10 +18,11 @@ RUN addgroup -g 1001 -S app && adduser -u 1001 -S app -G app
 COPY package.json package-lock.json ./
 RUN npm ci --omit=dev && npm cache clean --force
 
-# Compiled output + the example sources manifest (the bundled crawl-source
-# catalog the app reads when no sources.json is mounted).
+# Compiled output + the starter crawl-source catalog. The app reads
+# `sources.json` (src/index.ts, src/cli.ts), so ship the example as that path;
+# mount a curated sources.json over it per-env to monitor a real source list.
 COPY --from=builder /app/dist ./dist
-COPY sources.example.json ./
+COPY sources.example.json ./sources.json
 
 USER app