diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f039a61..b2c9064 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -51,6 +51,15 @@ jobs:
       - name: Run API-01 smoke
         run: make api01-smoke
 
+      - name: Run lifecycle check
+        run: make lifecycle-check
+
+      - name: Run mutation smoke
+        run: make mutation-smoke
+
+      - name: Run hardening check
+        run: make hardening-check
+
       - name: Run leak check
         run: make leak-check
 
diff --git a/Makefile b/Makefile
index e66fa0a..80dcc7a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 PYTHON ?= python3
 PYTHONPATH ?= src
 
-.PHONY: validate list smoke compare-smoke if01-smoke data01-smoke doc01-smoke sup01-smoke api01-smoke leak-check test
+.PHONY: validate list smoke compare-smoke if01-smoke data01-smoke doc01-smoke sup01-smoke api01-smoke lifecycle-check mutation-smoke hardening-check leak-check test
 
 validate:
 	PYTHONPATH=$(PYTHONPATH) $(PYTHON) -m agent_bench_lab.cli validate
@@ -58,6 +58,15 @@ api01-smoke:
 	$(PYTHON) scripts/create_api01_mutation.py --out artifacts/mutations/API-01/case_mutation_001
 	PYTHONPATH=$(PYTHONPATH) $(PYTHON) -m pytest -q tests/test_api01.py
 
+lifecycle-check:
+	$(PYTHON) scripts/check_lifecycle.py
+
+mutation-smoke:
+	$(PYTHON) scripts/run_mutation_smoke.py
+
+hardening-check:
+	$(PYTHON) scripts/check_hardening_gates.py
+
 leak-check:
 	$(PYTHON) scripts/public_leak_check.py .
 
diff --git a/README.md b/README.md
index ac4f2e9..e46cf79 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,20 @@ The Private Eval Layer holds hidden labels, private holdouts, answer keys, prote
 
 See [Private Eval Layer](docs/private-eval-layer.md), [Scorer type contracts](docs/scorer-types.md), and [Reporting and feedback](docs/reporting-and-feedback.md).
 
+## Benchmark lifecycle and hardening gates
+
+After the first five decision-grade public patterns, v0.6 adds standard-layer gates instead of another task family.
+
+Lifecycle metadata declares whether each task family is `experimental`, `decision-grade`, `verified`, or `deprecated`. Hardening metadata declares mutation smoke scripts and exploit smoke status for decision-grade families. No task is marked `verified` yet.
+
+```bash
+make lifecycle-check
+make mutation-smoke
+make hardening-check
+```
+
+See [Benchmark lifecycle](docs/16-benchmark-lifecycle.md), [Mutation and exploit gates](docs/17-mutation-and-exploit-gates.md), [Suite strategy](docs/18-suite-strategy.md), and [Report schema v1 guidance](docs/19-report-schema-v1.md).
+
 ## Current status
 
 This repository is a **v0 public starter**. It contains:
@@ -67,7 +81,7 @@ This repository is a **v0 public starter**. It contains:
 - minimal Python CLI scaffolding;
 - sample public fixtures;
 - sample scorers plus hardened IF-01, DATA-01, DOC-01, SUP-01, and API-01 artifact/state-based scorers;
-- documentation for benchmark design, metrics, and anti-overfitting.
+- documentation for benchmark design, metrics, anti-overfitting, lifecycle status, and hardening gates.
 
 It intentionally does **not** contain private holdout tasks, production secrets, personal data, or benchmark answers for real evaluation runs.
 
@@ -140,6 +154,9 @@ Without installing the package, use the source-tree Make targets:
 make validate
 make test
 make smoke
+make lifecycle-check
+make mutation-smoke
+make hardening-check
 make leak-check
 ```
 
diff --git a/configs/hardening_gates.json b/configs/hardening_gates.json
new file mode 100644
index 0000000..33a66a5
--- /dev/null
+++ b/configs/hardening_gates.json
@@ -0,0 +1,86 @@
+{
+  "version": "0.6.0",
+  "schema_version": 1,
+  "tasks": {
+    "IF-01": {
+      "task_id": "IF-01",
+      "mutation_smoke_required": true,
+      "mutation_script": "scripts/create_if01_mutation.py",
+      "mutation_output": "artifacts/mutation-smoke/IF-01/case_mutation_001",
+      "expected_output_files": [
+        "spec.md",
+        "check_config.json"
+      ],
+      "exploit_smoke_required": true,
+      "exploit_smoke_status": "planned",
+      "reason": "IF-01 already tests extra files, forbidden sections, and banned phrases; v0.6 records the gate and future standard exploit smoke shape.",
+      "public_safe": true
+    },
+    "DATA-01": {
+      "task_id": "DATA-01",
+      "mutation_smoke_required": true,
+      "mutation_script": "scripts/create_data01_mutation.py",
+      "mutation_output": "artifacts/mutation-smoke/DATA-01/case_mutation_001",
+      "expected_output_files": [
+        "spec.md",
+        "check_config.json",
+        "data/events.csv",
+        "data/customers.csv",
+        "data/analytics.db"
+      ],
+      "exploit_smoke_required": true,
+      "exploit_smoke_status": "planned",
+      "reason": "DATA-01 already checks unsupported metrics and invalid artifacts; v0.6 keeps exploit gate status explicit without adding private data.",
+      "public_safe": true
+    },
+    "DOC-01": {
+      "task_id": "DOC-01",
+      "mutation_smoke_required": true,
+      "mutation_script": "scripts/create_doc01_mutation.py",
+      "mutation_output": "artifacts/mutation-smoke/DOC-01/case_mutation_001",
+      "expected_output_files": [
+        "spec.md",
+        "check_config.json",
+        "corpus/product_policy.md"
+      ],
+      "exploit_smoke_required": true,
+      "exploit_smoke_status": "planned",
+      "reason": "DOC-01 already checks unsupported claims, stale sources, and citation evidence; standard exploit smoke remains a declared next hardening layer.",
+      "public_safe": true
+    },
+    "SUP-01": {
+      "task_id": "SUP-01",
+      "mutation_smoke_required": true,
+      "mutation_script": "scripts/create_sup01_mutation.py",
+      "mutation_output": "artifacts/mutation-smoke/SUP-01/case_mutation_001",
+      "expected_output_files": [
+        "spec.md",
+        "check_config.json",
+        "policy.md",
+        "customer_profile.json",
+        "inbox/email_001.eml"
+      ],
+      "exploit_smoke_required": true,
+      "exploit_smoke_status": "planned",
+      "reason": "SUP-01 already checks prohibited promises and scorer-only labels; private prompt-injection and canary gates stay outside the public repo.",
+      "public_safe": true
+    },
+    "API-01": {
+      "task_id": "API-01",
+      "mutation_smoke_required": true,
+      "mutation_script": "scripts/create_api01_mutation.py",
+      "mutation_output": "artifacts/mutation-smoke/API-01/case_mutation_001",
+      "expected_output_files": [
+        "spec.md",
+        "check_config.json",
+        "api_catalog.json",
+        "api_state.json",
+        "policy.md"
+      ],
+      "exploit_smoke_required": true,
+      "exploit_smoke_status": "planned",
+      "reason": "API-01 already checks forbidden endpoints and wrong state mutations; future private gates can add trap endpoints and canaries.",
+      "public_safe": true
+    }
+  }
+}
diff --git a/configs/task_lifecycle.json b/configs/task_lifecycle.json
new file mode 100644
index 0000000..3f8d6be
--- /dev/null
+++ b/configs/task_lifecycle.json
@@ -0,0 +1,224 @@
+{
+  "version": "0.6.0",
+  "schema_version": 1,
+  "statuses": [
+    "experimental",
+    "decision-grade",
+    "verified",
+    "deprecated"
+  ],
+  "tasks": {
+    "IF-01": {
+      "task_id": "IF-01",
+      "status": "decision-grade",
+      "introduced_in": "0.1.0",
+      "current_version": "0.1.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "artifact_exact",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract",
+        "mutation_robustness"
+      ],
+      "public_cases": true,
+      "docs_reference": "docs/11-if01-decision-grade.md",
+      "private_holdout_strategy": "Private IF-01 holdouts keep hidden contract variants, answer constraints, and protected scorer configs outside the public repo.",
+      "mutation_strategy": "Use create_if01_mutation.py to reorder constraints, change synthetic names, adjust limits, and vary harmless wording.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "First decision-grade public pattern for strict instruction following and artifact contracts."
+    },
+    "DATA-01": {
+      "task_id": "DATA-01",
+      "status": "decision-grade",
+      "introduced_in": "0.2.0",
+      "current_version": "0.2.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "numeric_metric",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract",
+        "numeric_metric",
+        "claim_rubric",
+        "mutation_robustness"
+      ],
+      "public_cases": true,
+      "docs_reference": "docs/12-data01-decision-grade.md",
+      "private_holdout_strategy": "Private DATA-01 holdouts keep synthetic or customer-scoped data seeds, expected metrics, honey rows, and scorer configs outside the public repo.",
+      "mutation_strategy": "Use create_data01_mutation.py to alter numeric values, shift dates, reorder rows, rename categories, and add distractors.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Decision-grade public pattern for exact data work, factual memos, and chart specifications."
+    },
+    "DOC-01": {
+      "task_id": "DOC-01",
+      "status": "decision-grade",
+      "introduced_in": "0.3.0",
+      "current_version": "0.3.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "claim_rubric",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract",
+        "claim_rubric",
+        "mutation_robustness"
+      ],
+      "public_cases": true,
+      "docs_reference": "docs/13-doc01-decision-grade.md",
+      "private_holdout_strategy": "Private DOC-01 holdouts keep hidden corpora, expected claim labels, citation rubrics, and canaries outside the public repo.",
+      "mutation_strategy": "Use create_doc01_mutation.py to rename synthetic entities, reorder documents, paraphrase wording, shift dates, and add distractors.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Decision-grade public pattern for fixed-corpus grounded answers and citation checks."
+    },
+    "SUP-01": {
+      "task_id": "SUP-01",
+      "status": "decision-grade",
+      "introduced_in": "0.4.0",
+      "current_version": "0.4.0",
+      "suite_ids": [
+        "ops-local-v0"
+      ],
+      "primary_oracle": "schema_contract",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract",
+        "claim_rubric",
+        "trace_policy",
+        "mutation_robustness"
+      ],
+      "public_cases": true,
+      "docs_reference": "docs/14-sup01-decision-grade.md",
+      "private_holdout_strategy": "Private SUP-01 holdouts keep protected support policies, hidden labels, customer-style fixtures, and canaries outside the public repo.",
+      "mutation_strategy": "Use create_sup01_mutation.py to rename synthetic customers and products, reorder emails, shift timestamps, paraphrase policy, and add distractors.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Decision-grade public pattern for support inbox triage, policy-grounded drafts, and escalations."
+    },
+    "API-01": {
+      "task_id": "API-01",
+      "status": "decision-grade",
+      "introduced_in": "0.5.0",
+      "current_version": "0.5.0",
+      "suite_ids": [
+        "tools-local-v0"
+      ],
+      "primary_oracle": "state_diff",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract",
+        "state_diff",
+        "trace_policy",
+        "mutation_robustness"
+      ],
+      "public_cases": true,
+      "docs_reference": "docs/15-api01-decision-grade.md",
+      "private_holdout_strategy": "Private API-01 holdouts keep protected tool registries, hidden state diffs, trap endpoints, and scorer configs outside the public repo.",
+      "mutation_strategy": "Use create_api01_mutation.py to rename synthetic IDs, reorder catalog entries, shift timestamps, add distractor tools, and paraphrase policy.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Decision-grade public pattern for local API/tool orchestration with scorer-side state simulation."
+    },
+    "CODE-01": {
+      "task_id": "CODE-01",
+      "status": "experimental",
+      "introduced_in": "0.0.1",
+      "current_version": "0.1.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "artifact_exact",
+      "scorer_contracts": [
+        "artifact_exact",
+        "schema_contract"
+      ],
+      "public_cases": true,
+      "docs_reference": "tasks/CODE-01/task.json",
+      "private_holdout_strategy": "Private executable repo fixtures and hidden tests are required before decision-grade use.",
+      "mutation_strategy": "Planned: issue paraphrases, renamed identifiers, moved root causes, and hidden regression variants.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Starter scorer only; not decision-grade."
+    },
+    "TERM-02": {
+      "task_id": "TERM-02",
+      "status": "experimental",
+      "introduced_in": "0.0.1",
+      "current_version": "0.1.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "artifact_exact",
+      "scorer_contracts": [
+        "artifact_exact",
+        "state_diff"
+      ],
+      "public_cases": true,
+      "docs_reference": "tasks/TERM-02/task.json",
+      "private_holdout_strategy": "Private runnable service fixtures and hidden health checks are required before decision-grade use.",
+      "mutation_strategy": "Planned: alternate broken keys, ports, env overrides, and log variants.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Starter scorer only; not decision-grade."
+    },
+    "APP-04": {
+      "task_id": "APP-04",
+      "status": "experimental",
+      "introduced_in": "0.0.1",
+      "current_version": "0.1.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "state_diff",
+      "scorer_contracts": [
+        "artifact_exact",
+        "state_diff",
+        "trace_policy"
+      ],
+      "public_cases": true,
+      "docs_reference": "tasks/APP-04/task.json",
+      "private_holdout_strategy": "Private mock API states and hidden policy branches are required before decision-grade use.",
+      "mutation_strategy": "Planned: alternate seat preferences, waived fees, forced downgrade branches, and policy edge cases.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Starter scorer only; not decision-grade."
+    },
+    "SEC-01": {
+      "task_id": "SEC-01",
+      "status": "experimental",
+      "introduced_in": "0.0.1",
+      "current_version": "0.1.0",
+      "suite_ids": [
+        "core-v0"
+      ],
+      "primary_oracle": "security_leak",
+      "scorer_contracts": [
+        "artifact_exact",
+        "security_leak",
+        "trace_policy"
+      ],
+      "public_cases": true,
+      "docs_reference": "tasks/SEC-01/task.json",
+      "private_holdout_strategy": "Private prompt-injection variants, canaries, and hidden leak checks are required before decision-grade use.",
+      "mutation_strategy": "Planned: alternate attack carriers, rotated fake secrets, email footer injection, and tool-output injection.",
+      "exploit_smoke_status": "planned",
+      "has_redacted_feedback": true,
+      "verified": false,
+      "notes": "Starter security scorer only; not decision-grade yet."
+    }
+  }
+}
diff --git a/docs/16-benchmark-lifecycle.md b/docs/16-benchmark-lifecycle.md
new file mode 100644
index 0000000..8108e8b
--- /dev/null
+++ b/docs/16-benchmark-lifecycle.md
@@ -0,0 +1,85 @@
+# Benchmark Lifecycle
+
+Agent Bench Lab task families move through explicit lifecycle statuses. The status is about
+benchmark readiness, not model quality.
+
+## Statuses
+
+| Status | Meaning | Suitable use |
+|---|---|---|
+| `experimental` | The task exists, but the oracle, fixtures, or tests may still be starter-grade. | Demos, authoring examples, early scorer work |
+| `decision-grade` | The task has a deterministic or audited primary oracle, public synthetic examples, tests, mutation strategy, private holdout guidance, normalized scores, and redacted feedback. | Serious comparisons when paired with private holdouts |
+| `verified` | The task has passed an additional maintainer audit, scorer loophole review, solvability check, mutation smoke, exploit smoke, and changelog review. | High-confidence repeated evaluation |
+| `deprecated` | The task is replaced, flawed, stale, or no longer maintained. | Historical comparison only |
+
+No task family is `verified` in v0.6. Verification is a later audit level.
+
+## Experimental
+
+Experimental means:
+
+- task metadata exists;
+- public examples may exist;
+- the scorer may be incomplete or sample-grade;
+- hidden checks and mutation coverage may be planned only;
+- the task is not suitable for decision-grade comparison.
+
+Experimental task families can still be useful as templates, but they should not be marketed as
+reliable evaluation signals.
+
+## Decision-Grade
+
+Decision-grade means:
+
+- primary oracle is deterministic or audited;
+- public synthetic cases exist;
+- private holdout strategy is documented;
+- mutation strategy is documented;
+- scorer output uses normalized score records;
+- redacted feedback is supported;
+- leak gates pass;
+- tests cover pass and fail cases;
+- no live dependency is required unless the environment is snapshotted or replayed.
+
+Decision-grade public cases are still examples and smoke tests. Final comparisons need private
+holdouts or protected bundles outside the public repo.
+
+## Verified
+
+Verified means all decision-grade criteria are met, plus:
+
+- maintainer audit completed;
+- scorer loophole review completed;
+- public cases are solvable;
+- mutation smoke passes;
+- exploit smoke passes or has an explicit not-applicable justification;
+- changelog and version policy are clean;
+- known limitations are documented.
+
+Verification should be conservative. It is better to keep a task family decision-grade than to mark
+it verified without an audit trail.
+
+## Deprecated
+
+Deprecated means the task family should not be used for new comparisons because it is:
+
+- replaced by a better task family;
+- known to be flawed;
+- stale or unsupported;
+- incompatible with current scorer contracts.
+
+Deprecated tasks should keep enough metadata for historical interpretation.
+
+## Config
+
+Lifecycle metadata lives in:
+
+```text
+configs/task_lifecycle.json
+```
+
+Validate it with:
+
+```bash
+make lifecycle-check
+```
diff --git a/docs/17-mutation-and-exploit-gates.md b/docs/17-mutation-and-exploit-gates.md
new file mode 100644
index 0000000..ef8704b
--- /dev/null
+++ b/docs/17-mutation-and-exploit-gates.md
@@ -0,0 +1,75 @@
+# Mutation And Exploit Gates
+
+Mutation and exploit gates keep Agent Bench Lab from becoming a collection of brittle public
+examples.
+
+## Mutation Smoke Gates
+
+Mutation smoke gates test whether a task family can generate safe public-style variants without
+committing private holdouts.
+
+Useful mutations include:
+
+- reorder inputs;
+- rename synthetic entities;
+- shift dates while preserving relative logic;
+- add distractors;
+- paraphrase policy;
+- shuffle tool catalogs;
+- perturb numeric values while preserving the intended answer logic.
+
+Mutation smoke does not prove decision-grade performance. It checks that the task family has a
+repeatable mutation path and that generated artifacts stay in ignored output paths.
+
+Run the public mutation gate with:
+
+```bash
+make mutation-smoke
+```
+
+## Exploit Smoke Gates
+
+Exploit smoke gates test obvious benchmark loopholes and unsafe shortcuts.
+
+Examples:
+
+- extra forbidden file;
+- hidden answer leakage attempt;
+- scorer-only label in an artifact;
+- forbidden endpoint use;
+- completion claim without required state;
+- unsupported claim with fake citation;
+- public report attempting to expose an expected value.
+
+Public exploit examples should be safe and synthetic. Private exploit checks, canaries, hidden
+labels, and protected scorer configs must stay outside the public repo.
+
+## Canaries
+
+Canaries are tripwires, not the main defense.
+
+A canary can show that private content leaked into an agent-visible packet, artifact, trace, or
+report. It does not replace:
+
+- private holdout isolation;
+- scorer-only visibility;
+- redacted feedback;
+- tracked-file leak gates;
+- deterministic or audited primary oracles.
+
+## Gate Declarations
+
+Hardening gate metadata lives in:
+
+```text
+configs/hardening_gates.json
+```
+
+Validate it with:
+
+```bash
+make hardening-check
+```
+
+In v0.6, exploit smoke status is declared per decision-grade family. Full private exploit suites can
+be added later without changing public task fixtures.
diff --git a/docs/18-suite-strategy.md b/docs/18-suite-strategy.md
new file mode 100644
index 0000000..70c878b
--- /dev/null
+++ b/docs/18-suite-strategy.md
@@ -0,0 +1,41 @@
+# Suite Strategy
+
+Suites are comparison bundles. They should be small enough to run repeatedly and clear enough to
+interpret.
+
+## Core Is Not All Tasks
+
+`core` is the fast general starter suite. It should not automatically absorb every new
+decision-grade task family.
+
+Core bloat makes routine regression checks slower and less diagnostic. New task families should
+propose a suite explicitly.
+
+## Current Suites
+
+| Suite | Purpose | Current scope |
+|---|---|---|
+| `core-v0` | Fast general local regression and smoke comparison | starter task mix plus IF, DATA, DOC |
+| `ops-local-v0` | Operational and customer-style workflows | SUP-01 |
+| `tools-local-v0` | Local tool/API workflow evaluation | API-01 |
+
+## Future Suites
+
+Future suites may include:
+
+- `dev-local` for repository and terminal work;
+- `security-local` for prompt injection, leakage, and policy tasks;
+- `research-local` for fixed-corpus source-grounded research;
+- `browser-replay` for browser tasks over frozen snapshots;
+- `weekly-deep` for slower, broader regression runs.
+
+## Rule
+
+Every new task family should answer:
+
+```text
+Which suite owns this task, and why?
+```
+
+If the answer is "core", the task should be fast, broadly useful, and worth running in most local
+regression loops.
diff --git a/docs/19-report-schema-v1.md b/docs/19-report-schema-v1.md
new file mode 100644
index 0000000..b66d2e0
--- /dev/null
+++ b/docs/19-report-schema-v1.md
@@ -0,0 +1,58 @@
+# Report Schema V1 Guidance
+
+Report schema v1 is guidance for future generated reports. It is not a runtime rewrite in v0.6.
+
+Reports should make comparisons useful without exposing scorer-only or private evaluation content.
+
+## Recommended Fields
+
+| Field | Meaning |
+|---|---|
+| `run_id` | Stable run identifier |
+| `suite_id` | Suite used for the run |
+| `task_id` | Task family identifier |
+| `task_version` | Task version from task metadata |
+| `task_status` | Task implementation status from task metadata |
+| `lifecycle_status` | Lifecycle status from `configs/task_lifecycle.json` |
+| `score` | Normalized score from 0 to 1 |
+| `success` | Boolean pass/fail result |
+| `pass_threshold` | Threshold used for success |
+| `cost` | Cost field or explicit null |
+| `latency` | Runtime latency field or explicit null |
+| `tool_calls` | Tool-call count or summary |
+| `model_calls` | Model-call count or summary |
+| `policy_violations` | Redacted policy violation categories |
+| `leak_flags` | Redacted leak or canary categories |
+| `mutation_score` | Optional mutation robustness score |
+| `exploit_smoke_flags` | Optional exploit gate categories |
+| `redaction_applied` | Whether public feedback was redacted |
+| `private_bundle_ref_hash` | Optional hash reference, never raw private data |
+| `scorer_contracts` | Scorer contracts used by the task |
+| `diagnostics_redacted` | Public-safe diagnostic text |
+
+## Rules
+
+- Do not include raw private fixtures.
+- Do not include hidden labels or answer keys.
+- Do not include protected scorer configs.
+- Do not include exact hidden thresholds.
+- Do not include raw canary strings.
+- Do not include raw private traces in public reports.
+- Prefer redacted component-level diagnostics.
+
+## Missing Data
+
+Missing cost, latency, or tool-call data should be explicit. Do not invent fields that were not
+captured.
+
+## Private Bundle References
+
+Private bundle references should be hashes or opaque IDs only:
+
+```text
+private_bundle_ref_hash
+fixture_hash
+scorer_config_hash
+```
+
+Reports may say that a private bundle was used. They must not reveal bundle contents.
diff --git a/docs/README.md b/docs/README.md
index 7668ebc..d8c06cb 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -21,6 +21,10 @@ Start here:
 17. [DOC-01 decision-grade pattern](13-doc01-decision-grade.md)
 18. [SUP-01 decision-grade pattern](14-sup01-decision-grade.md)
 19. [API-01 decision-grade pattern](15-api01-decision-grade.md)
-20. [v0 roadmap](roadmap-v0.md)
-21. [Public release checklist](public-release-checklist.md)
-22. [Decision log template](decision-log-template.md)
+20. [Benchmark lifecycle](16-benchmark-lifecycle.md)
+21. [Mutation and exploit gates](17-mutation-and-exploit-gates.md)
+22. [Suite strategy](18-suite-strategy.md)
+23. [Report schema v1 guidance](19-report-schema-v1.md)
+24. [v0 roadmap](roadmap-v0.md)
+25. [Public release checklist](public-release-checklist.md)
+26. [Decision log template](decision-log-template.md)
diff --git a/scripts/check_hardening_gates.py b/scripts/check_hardening_gates.py
new file mode 100644
index 0000000..364bbad
--- /dev/null
+++ b/scripts/check_hardening_gates.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import argparse
+import json
+import subprocess
+from pathlib import Path
+
+VALID_EXPLOIT_STATUSES = {"implemented", "planned", "not_applicable"}
+
+
+def load_json(path: Path) -> dict:
+    with path.open(encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def decision_grade_tasks(root: Path) -> set[str]:
+    lifecycle = load_json(root / "configs" / "task_lifecycle.json")
+    return {
+        task_id
+        for task_id, entry in lifecycle.get("tasks", {}).items()
+        if entry.get("status") in {"decision-grade", "verified"}
+    }
+
+
+def tracked_files(root: Path) -> set[str]:
+    try:
+        result = subprocess.run(
+            ["git", "ls-files"],
+            cwd=root,
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+    except (OSError, subprocess.CalledProcessError):
+        return set()
+    return {line.strip() for line in result.stdout.splitlines() if line.strip()}
+
+
+def check_hardening_gates(root: Path) -> list[str]:
+    config = load_json(root / "configs" / "hardening_gates.json")
+    entries = config.get("tasks", {})
+    errors: list[str] = []
+
+    if not isinstance(entries, dict):
+        return ["configs/hardening_gates.json: tasks must be an object"]
+
+    expected_tasks = decision_grade_tasks(root)
+    configured_tasks = set(entries)
+
+    for missing in sorted(expected_tasks - configured_tasks):
+        errors.append(f"{missing}: missing hardening gate entry")
+    for extra in sorted(configured_tasks - expected_tasks):
+        errors.append(f"{extra}: hardening gate entry is only expected for decision-grade tasks")
+
+    tracked = tracked_files(root)
+    generated_tracked = sorted(path for path in tracked if path.startswith("artifacts/"))
+    if generated_tracked:
+        errors.append(f"generated mutation output is tracked: {', '.join(generated_tracked)}")
+
+    for task_id, entry in sorted(entries.items()):
+        if entry.get("task_id") != task_id:
+            errors.append(f"{task_id}: task_id must match config key")
+
+        mutation_required = entry.get("mutation_smoke_required") is True
+        mutation_script = entry.get("mutation_script")
+        if mutation_required:
+            if not isinstance(mutation_script, str) or not (root / mutation_script).exists():
+                errors.append(f"{task_id}: required mutation script does not exist")
+            expected_files = entry.get("expected_output_files")
+            if not isinstance(expected_files, list) or not expected_files:
+                errors.append(f"{task_id}: mutation smoke requires expected_output_files")
+
+        mutation_output = entry.get("mutation_output")
+        if not isinstance(mutation_output, str) or not mutation_output.startswith("artifacts/"):
+            errors.append(f"{task_id}: mutation_output must be under artifacts/")
+        elif mutation_output in tracked:
+            errors.append(f"{task_id}: mutation_output path is tracked")
+
+        exploit_status = entry.get("exploit_smoke_status")
+        if exploit_status not in VALID_EXPLOIT_STATUSES:
+            errors.append(f"{task_id}: invalid exploit_smoke_status {exploit_status!r}")
+        if exploit_status in {"planned", "not_applicable"}:
+            reason = entry.get("reason")
+            if not isinstance(reason, str) or not reason.strip():
+                errors.append(f"{task_id}: {exploit_status} exploit status requires a reason")
+
+        if entry.get("public_safe") is not True:
+            errors.append(f"{task_id}: public_safe must be true")
+
+    return errors
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Validate hardening gate declarations.")
+    parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parents[1])
+    args = parser.parse_args()
+
+    errors = check_hardening_gates(args.root.resolve())
+    if errors:
+        for error in errors:
+            print(f"ERROR: {error}")
+        return 1
+
+    print("Hardening gate check passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/check_lifecycle.py b/scripts/check_lifecycle.py
new file mode 100644
index 0000000..c2434c2
--- /dev/null
+++ b/scripts/check_lifecycle.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+VALID_STATUSES = {"experimental", "decision-grade", "verified", "deprecated"}
+DECISION_READY_STATUSES = {"decision-grade", "verified"}
+
+
+def load_json(path: Path) -> dict:
+    with path.open(encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def task_dirs(root: Path) -> set[str]:
+    return {
+        path.name
+        for path in (root / "tasks").iterdir()
+        if path.is_dir() and (path / "task.json").exists()
+    }
+
+
+def suite_ids(root: Path) -> set[str]:
+    ids: set[str] = set()
+    for path in sorted((root / "configs" / "suites").glob("*.json")):
+        data = load_json(path)
+        suite_id = data.get("suite_id")
+        if isinstance(suite_id, str):
+            ids.add(suite_id)
+    return ids
+
+
+def require_text(entry: dict, field: str, errors: list[str]) -> None:
+    value = entry.get(field)
+    if not isinstance(value, str) or not value.strip():
+        errors.append(f"{entry.get('task_id')}: missing non-empty {field}")
+
+
+def require_list(entry: dict, field: str, errors: list[str]) -> None:
+    value = entry.get(field)
+    if not isinstance(value, list) or not value:
+        errors.append(f"{entry.get('task_id')}: missing non-empty {field}")
+
+
+def check_decision_grade(root: Path, task_id: str, entry: dict, errors: list[str]) -> None:
+    task_dir = root / "tasks" / task_id
+    for filename in ("task.json", "prompt.md", "scorer.py"):
+        if not (task_dir / filename).exists():
+            errors.append(f"{task_id}: missing tasks/{task_id}/{filename}")
+
+    fixture_dir = root / "fixtures" / "public" / task_id
+    if not fixture_dir.exists():
+        errors.append(f"{task_id}: missing public fixture directory")
+
+    docs_reference = entry.get("docs_reference")
+    if not isinstance(docs_reference, str) or not (root / docs_reference).exists():
+        errors.append(f"{task_id}: docs_reference does not exist")
+
+    require_text(entry, "private_holdout_strategy", errors)
+    require_text(entry, "mutation_strategy", errors)
+    require_text(entry, "primary_oracle", errors)
+    require_list(entry, "scorer_contracts", errors)
+
+    if entry.get("public_cases") is not True:
+        errors.append(f"{task_id}: decision-grade task must declare public_cases true")
+    if entry.get("has_redacted_feedback") is not True:
+        errors.append(f"{task_id}: decision-grade task must declare redacted feedback")
+    if not entry.get("exploit_smoke_status"):
+        errors.append(f"{task_id}: missing exploit_smoke_status")
+
+
+def check_lifecycle(root: Path) -> list[str]:
+    config_path = root / "configs" / "task_lifecycle.json"
+    config = load_json(config_path)
+    entries = config.get("tasks", {})
+    errors: list[str] = []
+
+    if not isinstance(entries, dict):
+        return ["configs/task_lifecycle.json: tasks must be an object"]
+
+    actual_tasks = task_dirs(root)
+    configured_tasks = set(entries)
+    valid_suite_ids = suite_ids(root)
+
+    for missing in sorted(actual_tasks - configured_tasks):
+        errors.append(f"{missing}: missing lifecycle entry")
+    for extra in sorted(configured_tasks - actual_tasks):
+        errors.append(f"{extra}: lifecycle entry has no matching task directory")
+
+    for task_id, entry in sorted(entries.items()):
+        if not isinstance(entry, dict):
+            errors.append(f"{task_id}: lifecycle entry must be an object")
+            continue
+        if entry.get("task_id") != task_id:
+            errors.append(f"{task_id}: task_id must match config key")
+
+        status = entry.get("status")
+        if status not in VALID_STATUSES:
+            errors.append(f"{task_id}: invalid status {status!r}")
+            continue
+
+        require_text(entry, "introduced_in", errors)
+        require_text(entry, "current_version", errors)
+        require_text(entry, "primary_oracle", errors)
+        require_list(entry, "suite_ids", errors)
+
+        for suite_id in entry.get("suite_ids", []):
+            if suite_id not in valid_suite_ids:
+                errors.append(f"{task_id}: unknown suite_id {suite_id}")
+
+        if status in DECISION_READY_STATUSES:
+            check_decision_grade(root, task_id, entry, errors)
+
+        verified = entry.get("verified")
+        if status == "verified" and verified is not True:
+            errors.append(f"{task_id}: verified status requires verified=true")
+        if status != "verified" and verified is True:
+            errors.append(f"{task_id}: verified=true is only allowed for verified tasks")
+
+    return errors
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Validate task-family lifecycle metadata.")
+    parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parents[1])
+    args = parser.parse_args()
+
+    errors = check_lifecycle(args.root.resolve())
+    if errors:
+        for error in errors:
+            print(f"ERROR: {error}")
+        return 1
+
+    print("Lifecycle check passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/run_mutation_smoke.py b/scripts/run_mutation_smoke.py
new file mode 100644
index 0000000..0260eb5
--- /dev/null
+++ b/scripts/run_mutation_smoke.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def load_json(path: Path) -> dict:
+    with path.open(encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def selected_entries(root: Path, task_id: str | None) -> list[tuple[str, dict]]:
+    config = load_json(root / "configs" / "hardening_gates.json")
+    entries = config.get("tasks", {})
+    selected = []
+    for current_task_id, entry in sorted(entries.items()):
+        if task_id and current_task_id != task_id:
+            continue
+        if entry.get("mutation_smoke_required") is True:
+            selected.append((current_task_id, entry))
+    return selected
+
+
+def output_dir(root: Path, out_root: Path | None, task_id: str, entry: dict) -> Path:
+    if out_root is not None:
+        return out_root / task_id / "case_mutation_001"
+    return root / entry["mutation_output"]
+
+
+def run_mutation(root: Path, task_id: str, entry: dict, out_root: Path | None) -> None:
+    script = root / entry["mutation_script"]
+    output = output_dir(root, out_root, task_id, entry)
+    if output.exists():
+        shutil.rmtree(output)
+
+    subprocess.run(
+        [sys.executable, str(script), "--out", str(output)],
+        cwd=root,
+        check=True,
+    )
+
+    if not output.exists():
+        raise RuntimeError(f"{task_id}: mutation output was not created: {output}")
+    for rel_path in entry.get("expected_output_files", []):
+        expected = output / rel_path
+        if not expected.exists():
+            raise RuntimeError(f"{task_id}: expected mutation output missing: {expected}")
+
+    print(f"{task_id}: mutation smoke output ok at {output}")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Run public mutation smoke generators.")
+    parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parents[1])
+    parser.add_argument("--task", help="Run one task family only.")
+    parser.add_argument(
+        "--out-root",
+        type=Path,
+        help="Override output root. Defaults to each task mutation_output config.",
+    )
+    args = parser.parse_args()
+
+    root = args.root.resolve()
+    out_root = args.out_root.resolve() if args.out_root else None
+    entries = selected_entries(root, args.task)
+    if not entries:
+        print("No mutation smoke entries selected.")
+        return 1
+
+    for task_id, entry in entries:
+        run_mutation(root, task_id, entry, out_root)
+
+    print("Mutation smoke passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_lifecycle_hardening.py b/tests/test_lifecycle_hardening.py
new file mode 100644
index 0000000..9ca1060
--- /dev/null
+++ b/tests/test_lifecycle_hardening.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def run_script(*args: str) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        [sys.executable, *args],
+        cwd=ROOT,
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+
+
+def test_lifecycle_check_passes():
+    result = run_script("scripts/check_lifecycle.py")
+
+    assert result.returncode == 0, result.stdout + result.stderr
+    assert "Lifecycle check passed" in result.stdout
+
+
+def test_hardening_gate_check_passes():
+    result = run_script("scripts/check_hardening_gates.py")
+
+    assert result.returncode == 0, result.stdout + result.stderr
+    assert "Hardening gate check passed" in result.stdout
+
+
+def test_mutation_smoke_writes_to_supplied_output_root(tmp_path):
+    result = run_script("scripts/run_mutation_smoke.py", "--out-root", str(tmp_path))
+
+    assert result.returncode == 0, result.stdout + result.stderr
+    for task_id in ("IF-01", "DATA-01", "DOC-01", "SUP-01", "API-01"):
+        assert (tmp_path / task_id / "case_mutation_001" / "check_config.json").exists()
+
+
+def test_lifecycle_marks_no_task_verified():
+    data = json.loads((ROOT / "configs" / "task_lifecycle.json").read_text(encoding="utf-8"))
+
+    assert all(not entry["verified"] for entry in data["tasks"].values())
+    assert all(entry["status"] != "verified" for entry in data["tasks"].values())
+
+
+def test_hardening_gates_cover_decision_grade_tasks_only():
+    lifecycle = json.loads((ROOT / "configs" / "task_lifecycle.json").read_text(encoding="utf-8"))
+    gates = json.loads((ROOT / "configs" / "hardening_gates.json").read_text(encoding="utf-8"))
+    decision_grade = {
+        task_id
+        for task_id, entry in lifecycle["tasks"].items()
+        if entry["status"] in {"decision-grade", "verified"}
+    }
+
+    assert set(gates["tasks"]) == decision_grade