From 9cfb6e709702d8a1e02d249416942104bc9c6365 Mon Sep 17 00:00:00 2001
From: Bradley Taylor <bradtaylorsf@gmail.com>
Date: Thu, 21 May 2026 15:50:50 -0700
Subject: [PATCH 1/3] feat: batch implement issues #316, #317, #318, #319
 (#395)

---
 README.md                                     |  83 ++++-----
 docs/CONNECTOR_SKILL_TEMPLATE.md              |  70 ++++++++
 src/research_agent/doctor.py                  |  96 ++++++++--
 src/research_agent/observability/events.py    |   2 +
 src/research_agent/orchestrator/loop.py       |  74 ++++++--
 src/research_agent/orchestrator/plan.py       |  48 +++++
 src/research_agent/prompts/planner.md         |  12 +-
 src/research_agent/skills/connectors/bbb.md   |  74 ++++++++
 .../skills/connectors/calaccess.md            |  78 ++++++++
 src/research_agent/skills/connectors/gdelt.md |  74 ++++++++
 src/research_agent/skills/connectors/lda.md   |  78 ++++++++
 .../skills/connectors/licensing.md            |  82 +++++++++
 .../skills/connectors/littlesis.md            |  73 ++++++++
 .../skills/connectors/nonprofits.md           |  69 +++++++
 .../skills/connectors/opencorporates.md       |  76 ++++++++
 .../skills/connectors/usaspending.md          |  77 ++++++++
 src/research_agent/tools/_registry.py         | 168 +++++++++++++++++-
 src/research_agent/tools/bbb.py               |   2 +-
 src/research_agent/tools/calaccess.py         |   2 +-
 src/research_agent/tools/fec.py               | 132 ++++++++++++++
 src/research_agent/tools/gdelt.py             |   2 +-
 src/research_agent/tools/lda.py               |   2 +-
 src/research_agent/tools/licensing.py         |   2 +-
 src/research_agent/tools/linkedin.py          |   1 +
 src/research_agent/tools/littlesis.py         |   2 +-
 src/research_agent/tools/nonprofits.py        |   2 +-
 src/research_agent/tools/opencorporates.py    |   2 +-
 src/research_agent/tools/sanctions.py         |   1 +
 src/research_agent/tools/scholar.py           |   1 +
 src/research_agent/tools/sos.py               |   2 +-
 src/research_agent/tools/state_election.py    |  87 ++++++++-
 src/research_agent/tools/usaspending.py       |   2 +-
 tests/research_agent/tools/test_registry.py   |  83 ++++++++-
 tests/test_candidate_roster_backtest.py       |  12 +-
 tests/test_doctor.py                          | 142 ++++++++++++++-
 .../test_orchestrator_connector_contracts.py  | 168 ++++++++++++++++++
 tests/test_orchestrator_loop.py               |  53 ++++--
 tests/test_orchestrator_plan.py               | 111 ++++++++++++
 tests/test_prompts_loader.py                  |   4 +-
 tests/test_skills_content.py                  | 145 +++++++++++++++
 tests/test_tools_fec.py                       |  49 +++++
 tests/test_tools_state_election.py            |  43 +++++
 42 files changed, 2158 insertions(+), 128 deletions(-)
 create mode 100644 docs/CONNECTOR_SKILL_TEMPLATE.md
 create mode 100644 src/research_agent/skills/connectors/bbb.md
 create mode 100644 src/research_agent/skills/connectors/calaccess.md
 create mode 100644 src/research_agent/skills/connectors/gdelt.md
 create mode 100644 src/research_agent/skills/connectors/lda.md
 create mode 100644 src/research_agent/skills/connectors/licensing.md
 create mode 100644 src/research_agent/skills/connectors/littlesis.md
 create mode 100644 src/research_agent/skills/connectors/nonprofits.md
 create mode 100644 src/research_agent/skills/connectors/opencorporates.md
 create mode 100644 src/research_agent/skills/connectors/usaspending.md
 create mode 100644 tests/test_orchestrator_connector_contracts.py

diff --git a/README.md b/README.md
index 5835f2c..d141fda 100644
--- a/README.md
+++ b/README.md
@@ -107,51 +107,52 @@ cloud tiers (priced — see [Costs](#costs)). The full tier roster lives in
 The planner can dispatch directly to any of the connectors below instead
 of falling back to `web_search` with a `site:` operator. Each row here
 mirrors what the planner sees in its system prompt — same description,
-same optional payload knobs, same example query. The table is generated
-from `src/research_agent/tools/_registry.py` via
+same required payload fields, same optional payload knobs, same skill
+linkage, same example query. The table is generated from
+`src/research_agent/tools/_registry.py` via
 `scripts/regen_readme_kinds.py`; do not hand-edit between the sentinels.
 
 <!-- BEGIN: direct-connector-kinds (auto-generated) -->
 
-| Kind | What it covers | Optional payload knobs | Example query |
-|---|---|---|---|
-| `bbb_search` | Better Business Bureau profiles + ratings (Playwright, no auth) | — | `SBI Builders` |
-| `bne_search` | BNE Hemeroteca Digital Spanish historical press (Playwright scrape, no auth) | `max_results`, `fechaDesde`, `fechaHasta`, `localizacion` | `guerra civil 1936` |
-| `calaccess_search` | California Cal-Access campaign finance (Playwright) | `kind: contributions\|independent_expenditures` | `Newsom` |
-| `commons_search` | Wikimedia Commons free media files with imageinfo license, author, MIME type, original URL, and thumbnail metadata | `max_results` | `Algerian war photographs` |
-| `congress_search` | Bills, members, committees, hearings, congressional record (Congress.gov v3 API) | `kind: bill\|member\|committee\|hearing\|congressional-record` | `Inflation Reduction Act` |
-| `courtlistener_search` | Federal & state court opinions, dockets (RECAP), oral arguments — requires `COURTLISTENER_API_TOKEN` | `kind: opinions\|dockets\|oral_arguments` | `Schedule F appellate` |
-| `cspan_search` | C-SPAN Video Library US political broadcast video with transcripts (Playwright scrape, no auth) | `max_results`, `type=House\|Senate` | `Project 2025` |
-| `dpla_search` | Digital Public Library of America item metadata across US cultural institutions; requires DPLA_API_KEY | `max_results`, `provider` | `Maya land claims` |
-| `edgar_search` | SEC filings (10-K, 10-Q, 8-K, Form 4) — requires `RESEARCH_USER_AGENT` w/ contact email | `form_type: 10-K\|8-K\|...` | `Cisco cybersecurity` |
-| `europeana_search` | Europeana multilingual European cultural-heritage item metadata across museums, libraries, and archives; requires EUROPEANA_API_KEY | `max_results`, `lang` | `Algerian war 1954` |
-| `fec_search` | Candidates, committees, schedule A/E filings (OpenFEC) | `kind: candidates\|candidates_enumerate\|committees\|schedules/schedule_a\|schedules/schedule_e`, `cycle`, `office`, `state`, `district`, `party`, `candidate_status`, `max_rows` | `Trump 2024 committee` |
-| `fedregister_search` | Federal Register rules, proposed rules, agency notices since 1994 (no auth) | `since: YYYY-MM-DD`, `agencies: [...]` | `Schedule F` |
-| `gallica_search` | Gallica/BnF SRU XML search for French national-library newspapers, books, manuscripts, maps, and other digitized primary sources | `max_results` (SRU maximumRecords capped at 50) | `guerre d'Algerie` |
-| `gdelt_search` | GDELT — Global news event aggregator, no `site:` operator (no auth) | `since: YYYY-MM-DD`, `language: english` | `Project 2025 mainstream coverage` |
-| `iarchive_search` | Internet Archive texts, audio, movies, and web-archive collection metadata through advancedsearch.php | `mediatype: texts\|audio\|movies\|web`, `page: <int>` | `Pullman Strike` |
-| `iwm_search` | Imperial War Museums public collections: photographs, sound/oral histories, documents, film, objects (Playwright scrape, no auth) | `max_results`, `object_category`, `related_period`, `records_with_media`, `style`, `page_size` | `Battle of Britain` |
-| `lda_search` | Senate Lobbying Disclosure Act filings (registrants, contributions) | `kind: filings\|registrants\|contributions` | `Heritage Foundation` |
-| `licensing_search` | State contractor / licensing-board lookups (Playwright; CA wired, others stubs) | `state: CA\|TX\|FL\|NY` | `SBI Builders` |
-| `linkedin_search` | LinkedIn person/company lookup via Proxycurl or Lix — requires broker key | `kind: person\|company` | `Sundar Pichai` |
-| `littlesis_search` | Power-mapping database — entities, donations, board seats, family ties (lead, not evidence) | `kind: entities\|relationships` | `Peter Thiel` |
-| `loc_search` | Library of Congress digital collections, including Chronicling America through the unified loc.gov API | `collection: chronicling-america\|prints\|manuscripts\|recordings\|maps`, `page: <int>` | `battle of algiers` |
-| `nara_search` | US National Archives Catalog OPA v2 records, declassified federal records, military records, photos; requires NARA_API_KEY | `available_online`, `type_of_materials`, `result_types`, `record_group`, `page` | `Vietnam War declassified` |
-| `nonprofits_search` | ProPublica Nonprofit Explorer (Form 990 filings, no auth) | — | `Heritage Foundation` |
-| `openalex_search` | OpenAlex Works scholarly articles, abstracts, DOIs, citations, authors, venues, and open-access URLs | `max_results`, `filter`, `sort` | `Project 2025 unitary executive theory` |
-| `opencorporates_search` | Global company registry — requires `OPENCORPORATES_API_KEY` | `jurisdiction: us_ca\|gb\|...` | `Acme Holdings` |
-| `openlibrary_search` | Open Library book metadata, ISBN/OCLC/LCCN identifiers, and Internet Archive scan IDs through search.json | `max_results` | `Pullman Strike 1894` |
-| `persee_search` | Persee French academic journals in humanities and social sciences (Playwright scrape, no auth) | `max_results` | `guerre d'Algerie` |
-| `sanctions_search` | OFAC SDN + UK sanctions lists (local index, no auth) | — | `Wagner Group` |
-| `scholar_search` | Google Scholar via SerpAPI — requires `SERPAPI_KEY` | `kind: case_law\|articles` | `Section 230 appellate` |
-| `si_search` | Smithsonian Open Access digitized collection objects, museum artifacts, images, 3D assets, and object metadata via api.data.gov | `max_results` | `Apollo 11` |
-| `sos_search` | State Secretary-of-State business entity filings (Playwright; CA wired, others stubs) | `state: CA\|DE\|NV\|...` | `Acme Corp` |
-| `state_election_search` | Official state election candidate roster sources and portals | `state`, `office`, `cycle`, `max_results` | `2026 House candidates` |
-| `trove_search` | Trove / National Library of Australia metadata for newspapers, books, photos, magazines, oral histories; metadata-only default | `category`, `zone`, `sortby` | `White Australia Policy 1901` |
-| `ukna_search` | UK National Archives Discovery catalogue metadata for Foreign Office, War Office, Colonial Office, and other UK archival records (no auth) | `max_results`, `page` | `Mau Mau Kenya` |
-| `usaspending_search` | Federal contracts, grants, loans (award-level detail, no auth) | `award_type: contracts\|grants\|loans` | `Heritage Foundation contract` |
-| `wikidata_search` | Wikidata Query Service raw SPARQL for biographical, relational, occupational, place, and entity-ID data | `max_results` (client-side truncation; SPARQL should include `LIMIT`) | `SELECT ?item ?itemLabel WHERE { ?item wdt:P31 wd:Q5; wdt:P19 wd:Q90 . SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } LIMIT 3` |
-| `wikisource_search` | Wikisource transcribed primary documents across per-language hosts; fetch returns the full source text in cleaned_text | `lang: en|fr|es|de|it|pt|nl|ru|zh|ja|ar`, `max_results` | `Treaty of Versailles` |
+| Kind | What it covers | Required payload fields | Optional payload knobs | Skill | Example query |
+|---|---|---|---|---|---|
+| `bbb_search` | Better Business Bureau profiles + ratings (Playwright, no auth) | — | — | `bbb` | `SBI Builders` |
+| `bne_search` | BNE Hemeroteca Digital Spanish historical press (Playwright scrape, no auth) | — | `max_results`, `fechaDesde`, `fechaHasta`, `localizacion` | `bne` | `guerra civil 1936` |
+| `calaccess_search` | California Cal-Access campaign finance (Playwright) | — | `kind: contributions\|independent_expenditures` | `calaccess` | `Newsom` |
+| `commons_search` | Wikimedia Commons free media files with imageinfo license, author, MIME type, original URL, and thumbnail metadata | — | `max_results` | `commons` | `Algerian war photographs` |
+| `congress_search` | Bills, members, committees, hearings, congressional record (Congress.gov v3 API) | — | `kind: bill\|member\|committee\|hearing\|congressional-record` | `congress` | `Inflation Reduction Act` |
+| `courtlistener_search` | Federal & state court opinions, dockets (RECAP), oral arguments — requires `COURTLISTENER_API_TOKEN` | — | `kind: opinions\|dockets\|oral_arguments` | `courtlistener` | `Schedule F appellate` |
+| `cspan_search` | C-SPAN Video Library US political broadcast video with transcripts (Playwright scrape, no auth) | — | `max_results`, `type=House\|Senate` | `cspan` | `Project 2025` |
+| `dpla_search` | Digital Public Library of America item metadata across US cultural institutions; requires DPLA_API_KEY | — | `max_results`, `provider` | `dpla` | `Maya land claims` |
+| `edgar_search` | SEC filings (10-K, 10-Q, 8-K, Form 4) — requires `RESEARCH_USER_AGENT` w/ contact email | — | `form_type: 10-K\|8-K\|...` | `edgar` | `Cisco cybersecurity` |
+| `europeana_search` | Europeana multilingual European cultural-heritage item metadata across museums, libraries, and archives; requires EUROPEANA_API_KEY | — | `max_results`, `lang` | `europeana` | `Algerian war 1954` |
+| `fec_search` | Candidates, committees, schedule A/E filings (OpenFEC) | — | `kind: candidates\|candidates_enumerate\|committees\|schedules/schedule_a\|schedules/schedule_e`, `cycle`, `office`, `state`, `district`, `party`, `candidate_status`, `max_rows` | `fec` | `Trump 2024 committee` |
+| `fedregister_search` | Federal Register rules, proposed rules, agency notices since 1994 (no auth) | — | `since: YYYY-MM-DD`, `agencies: [...]` | `fedregister` | `Schedule F` |
+| `gallica_search` | Gallica/BnF SRU XML search for French national-library newspapers, books, manuscripts, maps, and other digitized primary sources | — | `max_results` (SRU maximumRecords capped at 50) | `gallica` | `guerre d'Algerie` |
+| `gdelt_search` | GDELT — Global news event aggregator, no `site:` operator (no auth) | — | `since: YYYY-MM-DD`, `language: english` | `gdelt` | `Project 2025 mainstream coverage` |
+| `iarchive_search` | Internet Archive texts, audio, movies, and web-archive collection metadata through advancedsearch.php | — | `mediatype: texts\|audio\|movies\|web`, `page: <int>` | `iarchive` | `Pullman Strike` |
+| `iwm_search` | Imperial War Museums public collections: photographs, sound/oral histories, documents, film, objects (Playwright scrape, no auth) | — | `max_results`, `object_category`, `related_period`, `records_with_media`, `style`, `page_size` | `iwm` | `Battle of Britain` |
+| `lda_search` | Senate Lobbying Disclosure Act filings (registrants, contributions) | — | `kind: filings\|registrants\|contributions` | `lda` | `Heritage Foundation` |
+| `licensing_search` | State contractor / licensing-board lookups (Playwright; CA wired, others stubs) | — | `state: CA\|TX\|FL\|NY` | `licensing` | `SBI Builders` |
+| `linkedin_search` | LinkedIn person/company lookup via Proxycurl or Lix — requires broker key | — | `kind: person\|company` | exempt: #320 paid/gated connector skill backfill | `Sundar Pichai` |
+| `littlesis_search` | Power-mapping database — entities, donations, board seats, family ties (lead, not evidence) | — | `kind: entities\|relationships` | `littlesis` | `Peter Thiel` |
+| `loc_search` | Library of Congress digital collections, including Chronicling America through the unified loc.gov API | — | `collection: chronicling-america\|prints\|manuscripts\|recordings\|maps`, `page: <int>` | `loc` | `battle of algiers` |
+| `nara_search` | US National Archives Catalog OPA v2 records, declassified federal records, military records, photos; requires NARA_API_KEY | — | `available_online`, `type_of_materials`, `result_types`, `record_group`, `page` | `nara` | `Vietnam War declassified` |
+| `nonprofits_search` | ProPublica Nonprofit Explorer (Form 990 filings, no auth) | — | — | `nonprofits` | `Heritage Foundation` |
+| `openalex_search` | OpenAlex Works scholarly articles, abstracts, DOIs, citations, authors, venues, and open-access URLs | — | `max_results`, `filter`, `sort` | `openalex` | `Project 2025 unitary executive theory` |
+| `opencorporates_search` | Global company registry — requires `OPENCORPORATES_API_KEY` | — | `jurisdiction: us_ca\|gb\|...` | `opencorporates` | `Acme Holdings` |
+| `openlibrary_search` | Open Library book metadata, ISBN/OCLC/LCCN identifiers, and Internet Archive scan IDs through search.json | — | `max_results` | `openlibrary` | `Pullman Strike 1894` |
+| `persee_search` | Persee French academic journals in humanities and social sciences (Playwright scrape, no auth) | — | `max_results` | `persee` | `guerre d'Algerie` |
+| `sanctions_search` | OFAC SDN + UK sanctions lists (local index, no auth) | — | — | exempt: #320 paid/gated and sanctions connector skill backfill | `Wagner Group` |
+| `scholar_search` | Google Scholar via SerpAPI — requires `SERPAPI_KEY` | — | `kind: case_law\|articles` | exempt: #320 paid/gated connector skill backfill | `Section 230 appellate` |
+| `si_search` | Smithsonian Open Access digitized collection objects, museum artifacts, images, 3D assets, and object metadata via api.data.gov | — | `max_results` | `smithsonian` | `Apollo 11` |
+| `sos_search` | State Secretary-of-State business entity filings (Playwright; CA wired, others stubs) | — | `state: CA\|DE\|NV\|...` | `sos` | `Acme Corp` |
+| `state_election_search` | Official state election candidate roster sources and portals | `state` | `office`, `cycle`, `max_results` | `state_election` | `2026 House candidates` |
+| `trove_search` | Trove / National Library of Australia metadata for newspapers, books, photos, magazines, oral histories; metadata-only default | — | `category`, `zone`, `sortby` | `trove` | `White Australia Policy 1901` |
+| `ukna_search` | UK National Archives Discovery catalogue metadata for Foreign Office, War Office, Colonial Office, and other UK archival records (no auth) | — | `max_results`, `page` | `ukna` | `Mau Mau Kenya` |
+| `usaspending_search` | Federal contracts, grants, loans (award-level detail, no auth) | — | `award_type: contracts\|grants\|loans` | `usaspending` | `Heritage Foundation contract` |
+| `wikidata_search` | Wikidata Query Service raw SPARQL for biographical, relational, occupational, place, and entity-ID data | — | `max_results` (client-side truncation; SPARQL should include `LIMIT`) | `wikidata` | `SELECT ?item ?itemLabel WHERE { ?item wdt:P31 wd:Q5; wdt:P19 wd:Q90 . SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } LIMIT 3` |
+| `wikisource_search` | Wikisource transcribed primary documents across per-language hosts; fetch returns the full source text in cleaned_text | — | `lang: en|fr|es|de|it|pt|nl|ru|zh|ja|ar`, `max_results` | `wikisource` | `Treaty of Versailles` |
 
 <!-- END: direct-connector-kinds -->
 
diff --git a/docs/CONNECTOR_SKILL_TEMPLATE.md b/docs/CONNECTOR_SKILL_TEMPLATE.md
new file mode 100644
index 0000000..9050a33
--- /dev/null
+++ b/docs/CONNECTOR_SKILL_TEMPLATE.md
@@ -0,0 +1,70 @@
+---
+name: connector-short-name
+description: "One-line planner routing signal for this connector."
+when_to_use: "Research situations where this connector is the right first source."
+when_not_to_use: "Nearby questions that should use another connector or generic web search."
+---
+
+# Connector display name
+
+Use `<short_name>_search` for the connector's authoritative scope. Keep usage
+guidance here; registry schemas enforce only the minimum payload contract.
+
+## Official documentation
+
+- API or site docs:
+- Terms, usage policy, or robots guidance:
+- Migration or maintenance notices to verify before changing code:
+
+## Auth and cost
+
+- Required env vars:
+- Free/paid constraints:
+- Anonymous fallback behavior:
+
+## Required payload fields
+
+- `query` - required common field.
+- `sub_question` - required common field.
+- Connector-specific required fields:
+
+## Knobs available
+
+- `kind` - valid modes and defaults.
+- `max_results` - default and cap.
+- Other connector-specific fields:
+
+## Valid payload examples
+
+```yaml
+kind: <short_name>_search
+payload:
+  query: "example query"
+  sub_question: "What should this connector prove?"
+```
+
+## Request and pagination pattern
+
+Describe endpoint/page entry, filters, pagination, detail-page fan-out, and
+rate limits. Include stable selectors only when they are reliable enough to
+survive ordinary site changes.
+
+## Failure modes
+
+- Missing credentials:
+- Rate limits/captcha/blocked access:
+- Maintenance windows:
+- True no-result behavior:
+- Retry/backoff guidance:
+
+## Evidence shape
+
+Describe expected `SearchResult` fields and which connector-specific values
+belong in `extras`. For fetch support, describe expected `Source.cleaned_text`
+sections and `metadata` fields.
+
+## Anti-patterns
+
+- Do not use this connector outside its source authority.
+- Do not treat third-party profile/context pages as official records unless
+  the connector itself is an official registry.
diff --git a/src/research_agent/doctor.py b/src/research_agent/doctor.py
index d646016..9050f4f 100644
--- a/src/research_agent/doctor.py
+++ b/src/research_agent/doctor.py
@@ -524,14 +524,59 @@ def check_task_kind_registry_coherence() -> CheckResult:
         )
 
 
+def check_registry_contract_coherence() -> CheckResult:
+    """Assert registered connector contracts are importable and planner-visible."""
+    name = "registry_contract_coherence"
+    try:
+        import importlib
+
+        import research_agent.tools  # noqa: F401 - populate the connector registry
+        from research_agent.tools._registry import iter_kinds
+
+        problems: list[str] = []
+        required_summaries: list[str] = []
+        for entry in iter_kinds():
+            fields = set(entry.payload_schema.model_fields)
+            missing_base = {"query", "sub_question"} - fields
+            if missing_base:
+                problems.append(
+                    f"{entry.name} payload schema missing base fields: {sorted(missing_base)}"
+                )
+            module = importlib.import_module(f"research_agent.tools.{entry.module_name}")
+            if not hasattr(module, "search"):
+                problems.append(f"{entry.name} module {entry.module_name} has no search()")
+            required = ", ".join(entry.required_payload_fields) or "none"
+            required_summaries.append(f"{entry.name} required={required}")
+
+        if problems:
+            return CheckResult(
+                name,
+                "fail",
+                required=True,
+                detail="; ".join(problems),
+            )
+        detail = (
+            f"{len(required_summaries)} connector contract(s) expose required fields; "
+            + "; ".join(required_summaries[:8])
+        )
+        if len(required_summaries) > 8:
+            detail += f"; +{len(required_summaries) - 8} more"
+        return CheckResult(name, "ok", required=True, detail=detail)
+    except Exception as exc:  # noqa: BLE001
+        return CheckResult(
+            name,
+            "fail",
+            required=True,
+            detail=f"coherence check raised {type(exc).__name__}: {exc}",
+        )
+
+
 def check_registry_skill_coherence() -> list[CheckResult]:
     """Assert each registered kind's skill file exists and parses.
 
     Issue #223: every connector PR ships a ``skills/connectors/<name>.md``
-    file (per #211/#212). Kinds with ``skill_name=None`` are grandfathered
-    from the existing-connector skills backfill — they ``skip`` rather than
-    fail. Kinds whose ``skill_name`` is set but the file is missing are a
-    hard ``fail`` (the planner would fall back to a description-only path).
+    file (per #211/#212). Issue #317 makes missing coverage a hard failure
+    unless the registry entry carries an explicit issue-linked exemption.
     """
     from research_agent.skills.loader import SkillParseError, _parse, _skills_dir
     from research_agent.tools._registry import iter_kinds
@@ -539,42 +584,58 @@ def check_registry_skill_coherence() -> list[CheckResult]:
     results: list[CheckResult] = []
     for entry in iter_kinds():
         row = f"registry_skill:{entry.name}"
+        expected_name = entry.expected_skill_name
+        expected_path = _skills_dir("connectors") / f"{expected_name}.md"
         if entry.skill_name is None:
+            base_detail = (
+                f"kind={entry.name}; short_name={entry.short_name}; "
+                f"module_name={entry.module_name}; expected="
+                f"skills/connectors/{expected_name}.md"
+            )
+            if entry.skill_exemption:
+                results.append(
+                    CheckResult(
+                        row,
+                        "skip",
+                        required=False,
+                        detail=f"{base_detail}; exemption={entry.skill_exemption}",
+                    )
+                )
+                continue
             results.append(
                 CheckResult(
                     row,
-                    "skip",
-                    required=False,
+                    "fail",
+                    required=True,
                     detail=(
-                        f"{entry.name} grandfathered (skill_name=None);"
-                        " backfill pending"
+                        f"{base_detail}; missing skill_name and no documented exemption"
                     ),
                 )
             )
             continue
-        path = _skills_dir("connectors") / f"{entry.skill_name}.md"
-        if not path.exists():
+        if not expected_path.exists():
             results.append(
                 CheckResult(
                     row,
                     "fail",
                     required=True,
                     detail=(
-                        f"missing skills/connectors/{entry.skill_name}.md"
-                        f" for kind {entry.name}"
+                        f"missing skills/connectors/{expected_name}.md"
+                        f" for kind {entry.name}; short_name={entry.short_name};"
+                        f" module_name={entry.module_name}; expected={expected_path}"
                     ),
                 )
             )
             continue
         try:
-            _parse("connectors", entry.skill_name, path)
+            _parse("connectors", expected_name, expected_path)
         except SkillParseError as exc:
             results.append(
                 CheckResult(
                     row,
                     "fail",
                     required=True,
-                    detail=f"{path}: {exc}",
+                    detail=f"{expected_path}: {exc}",
                 )
             )
             continue
@@ -583,7 +644,7 @@ def check_registry_skill_coherence() -> list[CheckResult]:
                 row,
                 "ok",
                 required=True,
-                detail=f"skills/connectors/{entry.skill_name}.md parses",
+                detail=f"skills/connectors/{expected_name}.md parses",
             )
         )
     return results
@@ -602,7 +663,7 @@ def check_registry_skill_summary_coherence(
     try:
         detail_rows = rows if rows is not None else check_registry_skill_coherence()
         failures = [row for row in detail_rows if row.required and row.status == "fail"]
-        skipped = [row for row in detail_rows if row.status == "skip"]
+        exempted = [row for row in detail_rows if row.status == "skip"]
         ok_count = sum(1 for row in detail_rows if row.status == "ok")
         if failures:
             failed_names = ", ".join(row.name for row in failures)
@@ -618,7 +679,7 @@ def check_registry_skill_summary_coherence(
             required=True,
             detail=(
                 f"{ok_count} connector skill file(s) parse;"
-                f" {len(skipped)} grandfathered skip(s)"
+                f" {len(exempted)} documented exemption(s)"
             ),
         )
     except Exception as exc:  # noqa: BLE001
@@ -680,6 +741,7 @@ def run_all_checks(
     results.extend(check_sanctions_refresh())
     results.append(check_planner_allowlist_coherence())
     results.append(check_task_kind_registry_coherence())
+    results.append(check_registry_contract_coherence())
     registry_skill_rows = check_registry_skill_coherence()
     results.append(check_registry_skill_summary_coherence(registry_skill_rows))
     results.extend(registry_skill_rows)
diff --git a/src/research_agent/observability/events.py b/src/research_agent/observability/events.py
index 4a025f9..e3313e9 100644
--- a/src/research_agent/observability/events.py
+++ b/src/research_agent/observability/events.py
@@ -89,6 +89,8 @@
     "cornerstone_followups_emitted",
     "translation_skipped_budget",
     "second_order_fanout",
+    "connector_contract_rejected",
+    "connector_contract_repaired",
     "source_list_reconciled",
     "synth_status_from_prose",
     "synth_status_missing",
diff --git a/src/research_agent/orchestrator/loop.py b/src/research_agent/orchestrator/loop.py
index 9eb328a..fbdf590 100644
--- a/src/research_agent/orchestrator/loop.py
+++ b/src/research_agent/orchestrator/loop.py
@@ -340,7 +340,7 @@ def _task_passthrough_payload(payload: dict[str, Any]) -> dict[str, Any]:
 
 def _deep_load_skills_for_connector(
     job: Job,
-    module_name: str,
+    skill_name: str | None,
     payload: dict[str, Any],
 ) -> None:
     """Deep-load the connector skill + any active strategy skills.
@@ -359,15 +359,16 @@ def _deep_load_skills_for_connector(
     connector path.
     """
     try:
-        load_skill("connectors", module_name, job=job)
+        if skill_name:
+            load_skill("connectors", skill_name, job=job)
         active = payload.get("_active_strategies")
         if isinstance(active, list) and active:
             load_strategies([s for s in active if isinstance(s, str)], job=job)
     except Exception:  # noqa: BLE001 — skills are auxiliary; never break the connector
-        logger.exception("skills_deep_load_failed module=%s", module_name)
+        logger.exception("skills_deep_load_failed skill=%s", skill_name)
 
 
-def _make_connector_search_handler(module_name: str) -> Handler:
+def _make_connector_search_handler(entry: Any) -> Handler:
     """Build a thin search-handler that dispatches to ``tools.<module_name>.search``.
 
     Converts the connector's :class:`MissingCredentialError` (raised by
@@ -387,12 +388,49 @@ def _make_connector_search_handler(module_name: str) -> Handler:
     planner's system prompt and is materialized exactly at task-emit time.
     """
 
+    kind_name = str(entry.name)
+    module_name = str(entry.module_name)
+    skill_name = entry.skill_name
+
     async def _handler(job: Job, task: dict[str, Any]) -> dict[str, Any]:
         from importlib import import_module
 
+        from research_agent.tools._registry import validate_payload_contract
+
+        payload = task["payload"] if isinstance(task.get("payload"), dict) else {}
+        contract = validate_payload_contract(kind_name, payload)
+        if not contract.valid:
+            emit(
+                job,
+                "ERROR",
+                "loop",
+                "connector_contract_rejected",
+                {
+                    "stage": "dispatch",
+                    "task_id": task.get("id"),
+                    "kind": kind_name,
+                    "errors": list(contract.errors),
+                    "message": contract.repair_message,
+                },
+            )
+            raise FatalError(contract.repair_message)
+        if contract.repaired:
+            emit(
+                job,
+                "INFO",
+                "loop",
+                "connector_contract_repaired",
+                {
+                    "stage": "dispatch",
+                    "task_id": task.get("id"),
+                    "kind": kind_name,
+                    "before": payload,
+                    "after": contract.payload,
+                },
+            )
+        payload = contract.payload
         mod = import_module(f"research_agent.tools.{module_name}")
-        payload = task["payload"]
-        _deep_load_skills_for_connector(job, module_name, payload)
+        _deep_load_skills_for_connector(job, skill_name, payload)
         kwargs = {
             k: v for k, v in payload.items() if k in _CONNECTOR_SEARCH_PASSTHROUGH
         }
@@ -406,7 +444,7 @@ async def _handler(job: Job, task: dict[str, Any]) -> dict[str, Any]:
     return _handler
 
 
-def _make_connector_fetch_handler(module_name: str) -> Handler:
+def _make_connector_fetch_handler(entry: Any) -> Handler:
     """Build a thin fetch-handler that dispatches to ``tools.<module_name>.fetch``.
 
     Mirrors :func:`_make_connector_search_handler` but for the single-URL
@@ -418,12 +456,15 @@ def _make_connector_fetch_handler(module_name: str) -> Handler:
     enters via the search or fetch side.
     """
 
+    module_name = str(entry.module_name)
+    skill_name = entry.skill_name
+
     async def _handler(job: Job, task: dict[str, Any]) -> dict[str, Any]:
         from importlib import import_module
 
         mod = import_module(f"research_agent.tools.{module_name}")
         payload = task["payload"]
-        _deep_load_skills_for_connector(job, module_name, payload)
+        _deep_load_skills_for_connector(job, skill_name, payload)
         url = payload.get("url")
         if not url:
             raise FatalError(f"{module_name}_fetch: missing url field")
@@ -436,16 +477,17 @@ async def _handler(job: Job, task: dict[str, Any]) -> dict[str, Any]:
     return _handler
 
 
-def _registered_connector_module_names() -> tuple[str, ...]:
-    """Return the connector ``module_name`` for every registered direct kind.
+def _registered_connector_entries() -> tuple[Any, ...]:
+    """Return every registered direct-connector entry.
 
     Replaces the hand-maintained ``_CONNECTOR_KINDS`` tuple. The order is
-    deterministic (alphabetical) per :func:`iter_kinds`. Used by the handler
-    registry below to wire one ``<x>_search``/``<x>_fetch`` pair per kind.
+    deterministic (alphabetical) per :func:`iter_kinds`. Handler registration
+    uses ``entry.name`` for task kinds and ``entry.module_name`` for imports,
+    so short-name/module-name mismatches do not break dispatch.
     """
     from research_agent.tools._registry import iter_kinds
 
-    return tuple(entry.short_name for entry in iter_kinds())
+    return tuple(iter_kinds())
 
 
 def default_handlers(router: Any) -> dict[str, Handler]:
@@ -637,9 +679,9 @@ async def _critique(job: Job, task: dict[str, Any]) -> dict[str, Any] | None:
     }
     _annotate_heuristic_handler(_synthesize, tier="frontier", router=router)
     _annotate_heuristic_handler(_critique, tier="frontier_alt", router=router)
-    for name in _registered_connector_module_names():
-        registry[f"{name}_search"] = _make_connector_search_handler(name)
-        registry[f"{name}_fetch"] = _make_connector_fetch_handler(name)
+    for entry in _registered_connector_entries():
+        registry[entry.name] = _make_connector_search_handler(entry)
+        registry[entry.name.replace("_search", "_fetch")] = _make_connector_fetch_handler(entry)
     return registry
 
 
diff --git a/src/research_agent/orchestrator/plan.py b/src/research_agent/orchestrator/plan.py
index 44e1441..3407d61 100644
--- a/src/research_agent/orchestrator/plan.py
+++ b/src/research_agent/orchestrator/plan.py
@@ -562,6 +562,50 @@ def _emit_plan_created(job: Job, plan: Plan, *, tier: str, kind: str) -> None:
     )
 
 
+def _validate_direct_connector_spec(job: Job, spec: TaskSpec, *, index: int) -> TaskSpec:
+    """Validate/normalize a planner-emitted direct connector task before enqueue."""
+    import research_agent.tools  # noqa: F401 - populate connector registry
+    from research_agent.tools._registry import is_registered, validate_payload_contract
+
+    if not is_registered(spec.kind):
+        return spec
+
+    result = validate_payload_contract(str(spec.kind), spec.payload)
+    if not result.valid:
+        emit(
+            job,
+            "ERROR",
+            "planner",
+            "connector_contract_rejected",
+            {
+                "stage": "pre_enqueue",
+                "plan_task_index": index,
+                "kind": spec.kind,
+                "errors": list(result.errors),
+                "message": result.repair_message,
+            },
+        )
+        raise PlanParseError(result.repair_message)
+
+    if result.repaired:
+        emit(
+            job,
+            "INFO",
+            "planner",
+            "connector_contract_repaired",
+            {
+                "stage": "pre_enqueue",
+                "plan_task_index": index,
+                "kind": spec.kind,
+                "before": spec.payload,
+                "after": result.payload,
+            },
+        )
+        return spec.model_copy(update={"payload": result.payload})
+
+    return spec
+
+
 def _enqueue_plan_tasks(job: Job, plan: Plan) -> list[int]:
     """Persist ``plan.task_template`` into the tasks queue.
 
@@ -594,6 +638,10 @@ def _enqueue_plan_tasks(job: Job, plan: Plan) -> list[int]:
         ]
     else:
         specs = list(plan.task_template)
+    specs = [
+        _validate_direct_connector_spec(job, spec, index=index)
+        for index, spec in enumerate(specs)
+    ]
     return enqueue(job, specs, plan.version)
 
 
diff --git a/src/research_agent/prompts/planner.md b/src/research_agent/prompts/planner.md
index 4eee1f4..f1a1ded 100644
--- a/src/research_agent/prompts/planner.md
+++ b/src/research_agent/prompts/planner.md
@@ -163,10 +163,12 @@ configured in this environment).
 #### Direct connector kinds
 
 Each kind dispatches to a dedicated `tools/<name>.py` module. Payload
-shape is `{ query: "…", sub_question: "…" }`; a few connectors take
-optional `kind`, `state`, `since`, or `max_results` knobs (noted
-below). The loop turns top hits into `web_fetch` follow-ups exactly as
-it does for `web_search`.
+shape always starts with `{ query: "…", sub_question: "…" }`. The table
+separates connector-specific required fields from optional knobs. Missing
+required connector fields are rejected before enqueue, so repair the next
+plan with the required payload shape rather than retrying the same task.
+The loop turns top hits into `web_fetch` follow-ups exactly as it does for
+`web_search`.
 
 {{direct_kinds_table}}
 
@@ -207,7 +209,7 @@ connector module on the fetch side.
 - `arxiv_search`: `{ query: "…", sub_question: "…", max_results: 10 }`
 - `local_corpus_query`: `{ query: "…", sub_question: "…", top_k: 10 }`
 - `cornerstone_query`: `{ sub_question: "…", cornerstone_url: "<URL>", top_k: 8 }` (replans only — the index does not exist on the initial plan)
-- direct connector kinds ({{kinds_allowlist}}): `{ query: "…", sub_question: "…" }` plus the optional knobs noted in the **Direct connector kinds** table above (e.g. `kind`, `state`, `since`, `max_results`).
+- direct connector kinds ({{kinds_allowlist}}): `{ query: "…", sub_question: "…" }` plus any required fields and optional knobs noted in the **Direct connector kinds** table above (e.g. `state` for `state_election_search`; `kind`, `since`, `max_results` where listed).
 
 ### When to use each search
 
diff --git a/src/research_agent/skills/connectors/bbb.md b/src/research_agent/skills/connectors/bbb.md
new file mode 100644
index 0000000..031dc23
--- /dev/null
+++ b/src/research_agent/skills/connectors/bbb.md
@@ -0,0 +1,74 @@
+---
+name: bbb
+description: "BBB business profiles, ratings, accreditation, complaints, and alerts; private nonprofit context, not government authority."
+when_to_use: "Business reputation context, complaint history, BBB accreditation/rating, and profile discovery for companies or contractors."
+when_not_to_use: "Official licensing, legal status, corporate filings, sanctions, or government enforcement records."
+---
+
+# BBB connector
+
+Use `bbb_search` for Better Business Bureau profile context. BBB is a private nonprofit
+marketplace-trust organization, not a government licensing authority.
+Treat BBB profiles as reputation/context evidence and cross-check official
+licensing or registry facts elsewhere.
+
+## Official documentation
+
+- BBB homepage/search entry: https://www.bbb.org/
+- About BBB and profile/rating/accreditation behavior: https://www.bbb.org/all/about-bbb/
+- Complaint process: https://www.bbb.org/process-of-complaints-and-reviews/complaints
+- BBB FAQ: https://www.bbb.org/frequently-asked-questions
+
+## Auth and cost
+
+No auth and no public API. The connector uses Playwright against public BBB
+pages with a conservative per-host rate gate.
+
+## Required payload fields
+
+- `query` - required common field; use business name plus city/state when known.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: bbb_search
+payload:
+  query: "SBI Builders San Jose"
+  sub_question: "What BBB profile, rating, accreditation, and complaint context exists for SBI Builders?"
+  max_results: 5
+```
+
+## Request and pagination pattern
+
+The connector starts at BBB search with business name/location terms, reads
+result cards, then fetches profile pages. Profiles can expose rating,
+accreditation, business details, complaint counts, complaint categories,
+reviews, alerts, and government-action notes. BBB pages are regional; include
+city/state to avoid merging branch profiles.
+
+## Failure modes
+
+- Captcha, bot-blocking, or a blank React render means blocked access, not a
+  true no-result.
+- Result cards without profile links should be skipped.
+- Ratings and accreditation are separate; accreditation is voluntary.
+- Complaint text can be behind reveal controls; fetch should click stable show
+  more buttons when possible.
+
+## Evidence shape
+
+`SearchResult.extras` should include rating, accreditation status, location,
+profile URL, complaint counts when visible, and `source_kind="bbb"`. `Source`
+metadata should preserve rating, accreditation, complaint counts, categories,
+and government-action snippets.
+
+## Anti-patterns
+
+- Do not cite BBB as proof that a contractor is licensed.
+- Do not treat a high rating as proof there are no legal or licensing issues.
+- Do not compare complaint counts without considering company size and volume.
diff --git a/src/research_agent/skills/connectors/calaccess.md b/src/research_agent/skills/connectors/calaccess.md
new file mode 100644
index 0000000..7833368
--- /dev/null
+++ b/src/research_agent/skills/connectors/calaccess.md
@@ -0,0 +1,78 @@
+---
+name: calaccess
+description: "California Cal-Access and Power Search campaign finance records for contributions and independent expenditures."
+when_to_use: "California state campaign contributions, candidate/committee campaign finance, ballot-measure committees, and independent expenditures."
+when_not_to_use: "Federal FEC filings, lobbying records not implemented by the connector, business entities, or non-California campaign finance."
+---
+
+# Cal-Access connector
+
+Use `calaccess_search` for California Secretary of State campaign-finance
+searches. Power Search and Cal-Access are official California disclosure
+surfaces, but the connector currently automates only contributions and
+independent expenditures.
+
+## Official documentation
+
+- California Power Search: https://powersearch.sos.ca.gov/
+- Power Search FAQ: https://powersearch.sos.ca.gov/frequently-asked-questions/
+- Power Search help: https://powersearch.sos.ca.gov/help/
+- Cal-Access FAQ for candidates, committees, and entities: https://www.sos.ca.gov/campaign-lobbying/cal-access-resources/cal-access-users-manual/campaign-finance-faq-table-contents-section-1/cal-access-faqs-s1-q7
+
+## Auth and cost
+
+No auth and no paid API. The connector uses Playwright against public
+Secretary of State pages with a conservative rate gate.
+
+## Required payload fields
+
+- `query` - required common field; use candidate, committee, donor, payee, or
+  ballot-measure terms.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `kind` - `contributions` (default) or `independent_expenditures`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: calaccess_search
+payload:
+  query: "Newsom"
+  sub_question: "What California Power Search contribution records mention Newsom?"
+  kind: "contributions"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+Power Search contributions use server-rendered forms and result tables.
+Independent expenditures use a separate Power Search service. Cal-Access
+legacy pages cover more campaign-finance and lobbying data, but lobbying is
+not automated here. For candidate/committee/entity discovery, use Cal-Access
+or Power Search as the official source, then fetch detail pages for amounts,
+dates, parties, and filing references.
+
+## Failure modes
+
+- A maintenance page, frame-only legacy page, or empty SPA shell is not a true
+  no-result.
+- `kind=lobbying` is intentionally unsupported until a separate recipe lands.
+- Selector drift should produce diagnostics rather than fabricated rows.
+- Search results may omit rolled-up totals; fetch detail pages before citing.
+
+## Evidence shape
+
+`SearchResult.extras` should preserve contributor/payee, recipient/committee,
+amount, date, committee IDs, office/ballot-measure fields, filing reference,
+and `source_kind="calaccess"`. `Source.cleaned_text` should render the rolled
+up record and detail-page provenance.
+
+## Anti-patterns
+
+- Do not use Cal-Access for federal candidate filing status; use `fec_search`.
+- Do not use `sos_search`; in this repo it means business filings, not
+  campaign finance.
+- Do not treat a selector miss as a confirmed disclosure gap.
diff --git a/src/research_agent/skills/connectors/gdelt.md b/src/research_agent/skills/connectors/gdelt.md
new file mode 100644
index 0000000..3409800
--- /dev/null
+++ b/src/research_agent/skills/connectors/gdelt.md
@@ -0,0 +1,74 @@
+---
+name: gdelt
+description: "GDELT DOC 2.0 news discovery for broad article/event coverage with no auth."
+when_to_use: "Broad news/event discovery, media coverage timelines, and finding article URLs across countries or languages."
+when_not_to_use: "Primary government records, court filings, company filings, or source-specific registries; use the direct official connector first."
+---
+
+# GDELT connector
+
+Use `gdelt_search` for broad news discovery when a query should surface many
+publishers quickly. Treat it as discovery and context, not as the final
+authority for a factual claim.
+
+## Official documentation
+
+- GDELT DOC 2.0 API overview: https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/amp/
+- DOC API base used by the connector: `https://api.gdeltproject.org/api/v2/doc/doc`
+- GDELT data/codebooks landing pages: https://www.gdeltproject.org/data.html
+
+## Auth and cost
+
+No key is required. The connector rate-limits itself; keep broad sweeps polite
+because DOC API traffic goes to public GDELT infrastructure.
+
+## Required payload fields
+
+- `query` - required common field; use a compact news query.
+- `sub_question` - required common field; state what the coverage should prove.
+
+## Knobs available
+
+- `since` - optional `YYYY-MM-DD` lower bound; narrows the DOC API time window.
+- `language` - optional language filter such as `english`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: gdelt_search
+payload:
+  query: "Project 2025 mainstream coverage"
+  sub_question: "Which mainstream outlets covered Project 2025 recently?"
+  since: "2026-01-01"
+  language: "english"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+The connector uses DOC 2.0 article-list style requests and returns article
+URLs. Use GDELT query operators only when they are necessary: quoted phrases,
+`nearN:"word otherword"` proximity, domain/source filters, and language/date
+filters. Keep the first query broad, then replan with publisher names or
+specific claims discovered in results.
+
+## Failure modes
+
+- Empty results often mean the query is too narrative; shorten it.
+- HTTP 429/5xx or timeouts should be retried with backoff.
+- DOC results can include syndication, duplicates, and secondary writeups.
+- GDELT is not an archive guarantee; fetch article URLs promptly.
+
+## Evidence shape
+
+`SearchResult` rows should contain the article URL, title, snippet or source
+metadata, published timestamp when available, `source_kind="gdelt"`, and
+extras such as source country/language/domain when returned. Fetch follows the
+normal `web_fetch` path; cite the fetched article, not just the GDELT row.
+
+## Anti-patterns
+
+- Do not use GDELT when an official connector exists for the same fact.
+- Do not treat GDELT mention volume as proof of event truth.
+- Do not use long natural-language paragraphs as DOC queries.
diff --git a/src/research_agent/skills/connectors/lda.md b/src/research_agent/skills/connectors/lda.md
new file mode 100644
index 0000000..776ba83
--- /dev/null
+++ b/src/research_agent/skills/connectors/lda.md
@@ -0,0 +1,78 @@
+---
+name: lda
+description: "Lobbying Disclosure Act filings, registrants, clients, and LD-203 contributions from LDA.gov."
+when_to_use: "Federal lobbying registrations, quarterly LD-2 activity, registrant/client lookup, and LD-203 political contribution filings."
+when_not_to_use: "Campaign finance transactions outside LD-203, state lobbying, or federal contracts; use fec_search, state portals, or usaspending_search."
+---
+
+# LDA connector
+
+Use `lda_search` for the federal Lobbying Disclosure Act database. It covers
+LD-1 registrations, LD-2 quarterly lobbying activity, and LD-203 contribution
+reports. LDA rows are disclosures, not proof that a policy position succeeded.
+
+## Official documentation
+
+- LDA.gov Download API page: https://lda.gov/api/
+- LDA.gov public search home: https://lda.gov/
+- Senate legacy API documentation and migration notice: https://lda.senate.gov/api/redoc/v1/
+
+The Senate page says the legacy site will no longer be available after
+2026-06-30 and directs systems to LDA.gov.
+
+## Auth and cost
+
+The API key registration link is on the LDA.gov API page. Anonymous access can
+work at low volume, but authenticated access should be used for long jobs when
+`LDA_API_KEY` is configured. The connector sends a token when available.
+
+## Required payload fields
+
+- `query` - required common field; use a registrant, client, lobbyist, issue,
+  or organization name.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `kind` - `filings` (default), `registrants`, or `contributions`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: lda_search
+payload:
+  query: "Heritage Foundation"
+  sub_question: "What federal lobbying filings or registrants mention Heritage Foundation?"
+  kind: "filings"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+Use `kind=registrants` to resolve a formal registrant/client identity, then
+`kind=filings` for LD-1/LD-2 activity and `kind=contributions` for LD-203.
+The API is paginated; keep `max_results` bounded and fan out only from rows
+with stable filing or registrant URLs/IDs.
+
+## Failure modes
+
+- 401/403 means an API token is missing, invalid, or rate-limited.
+- Legacy `lda.senate.gov` links should be treated as transitional because of
+  the 2026-06-30 migration notice.
+- Names are formal and can differ from common names; try affiliates and legal
+  entities before declaring a gap.
+
+## Evidence shape
+
+`SearchResult.extras` should preserve filing IDs, registrant/client names,
+filing type, filing year/period, amount fields for LD-203, and source URLs.
+`fetch()` should return a `Source` with `source_kind="lda"` and markdown
+sections that make LD-1/LD-2/LD-203 distinctions clear.
+
+## Anti-patterns
+
+- Do not use LDA for FEC donor transactions; use `fec_search`.
+- Do not merge LD-1 registration, LD-2 activity, and LD-203 contributions into
+  a single undifferentiated claim.
+- Do not rely on legacy Senate URLs without checking LDA.gov availability.
diff --git a/src/research_agent/skills/connectors/licensing.md b/src/research_agent/skills/connectors/licensing.md
new file mode 100644
index 0000000..bcf5a96
--- /dev/null
+++ b/src/research_agent/skills/connectors/licensing.md
@@ -0,0 +1,82 @@
+---
+name: licensing
+description: "State contractor/professional licensing lookup; CA CSLB is wired, TX/FL/NY are documented stubs."
+when_to_use: "Checking contractor or professional license status, classifications, business identity, complaint disclosure, and license detail pages."
+when_not_to_use: "BBB reputation, business entity filings, campaign finance, or legal case records."
+---
+
+# Licensing connector
+
+Use `licensing_search` for official licensing-board lookups. The current
+implementation supports California CSLB. Texas, Florida, and New York entries
+are registry stubs and should return unsupported until selectors are built.
+
+## Official documentation
+
+- California CSLB Check a License: https://cslb.ca.gov/OnlineServices/CheckLicenseII/CheckLicense.aspx
+- CSLB Online Services: https://www.cslb.ca.gov/OnlineService.aspx
+- Texas TDLR license search entry: https://license.state.tx.us/search/
+- Florida DBPR license search: https://www.myfloridalicense.com/wl11.asp?SID+=&mode=0
+- Florida verify-a-license instructions: https://www2.myfloridalicense.com/how-to-verify-a-license/
+- New York DOS Licensing Services: https://dos.ny.gov/licensing/index.html
+- New York DOS business-name license search: https://appext20.dos.ny.gov/lcns_public/bus_name_search_frm
+
+## Auth and cost
+
+No auth and no paid API for the supported CSLB workflow. The connector uses
+Playwright and should keep conservative per-host pacing.
+
+## Required payload fields
+
+- `query` - required common field; use license number or business/person name.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `state` - `CA` is supported; `TX`, `FL`, and `NY` are stubs until recipes are
+  implemented.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: licensing_search
+payload:
+  query: "SBI Builders"
+  sub_question: "What official CSLB license status and classifications exist for SBI Builders?"
+  state: "CA"
+  max_results: 5
+```
+
+## Request and pagination pattern
+
+For California, the connector uses CSLB Check a License tabs for license
+number, business-name, and personnel-name searches. Detail pages can expose
+license status, classifications, business type/address, bonds, workers
+compensation, personnel, and complaint disclosure links. CSLB notes that the
+database is unavailable Sundays 8 p.m. through Monday 6 a.m. for maintenance.
+
+TX/FL/NY official lookup entry points are documented above but not automated.
+Do not emit them as successful evidence until state recipes exist.
+
+## Failure modes
+
+- CSLB maintenance window, blocked Playwright access, or missing ASP.NET
+  postback fields should be reported as blocked/unsupported, not no-result.
+- Searches returning more than CSLB's visible row cap need narrower queries.
+- TX/FL/NY stubs are unsupported implementation gaps.
+- License status is not a performance review; cross-check BBB/reviews only as
+  separate context.
+
+## Evidence shape
+
+`SearchResult.extras` should include license number, business name, state,
+status, classification, profile URL, and `source_kind="licensing"`. `Source`
+metadata should preserve status, classifications, bond/workers-comp fields,
+complaint disclosure notes, and the official board URL.
+
+## Anti-patterns
+
+- Do not use BBB as a substitute for official licensing records.
+- Do not treat a missing CSLB result during maintenance as proof of no license.
+- Do not claim TX/FL/NY support until the connector has working selectors.
diff --git a/src/research_agent/skills/connectors/littlesis.md b/src/research_agent/skills/connectors/littlesis.md
new file mode 100644
index 0000000..a22e109
--- /dev/null
+++ b/src/research_agent/skills/connectors/littlesis.md
@@ -0,0 +1,73 @@
+---
+name: littlesis
+description: "LittleSis power-map entities and relationships; useful leads, not final evidence."
+when_to_use: "Discovering people, organizations, board seats, donations, family ties, and relationship leads to verify elsewhere."
+when_not_to_use: "When the task needs primary-source proof, official filings, sanctions, contracts, or exhaustive rosters."
+---
+
+# LittleSis connector
+
+Use `littlesis_search` to discover relationship leads between people and
+organizations. LittleSis is a power-mapping database with useful provenance,
+but rows should be verified against original filings or pages before final
+synthesis.
+
+## Official documentation
+
+- LittleSis API documentation: https://dev.littlesis.org/api/
+- Public API entry: https://littlesis.org/api
+- Bulk data note: https://littlesis.org/bulk_data
+
+## Auth and cost
+
+No API key is required for normal API use. Requests may be rate-limited. Bulk
+dataset access is separate from this connector.
+
+## Required payload fields
+
+- `query` - required common field; use a person, organization, or formal name.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `kind` - `entities` (default) or `relationships`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: littlesis_search
+payload:
+  query: "Peter Thiel"
+  sub_question: "What LittleSis entity or relationship leads exist for Peter Thiel?"
+  kind: "entities"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+Start with `kind=entities` to resolve the LittleSis ID and canonical page.
+Use relationship endpoints for known IDs when a follow-up needs board seats,
+donations, family ties, ownership, or position links. Relationship categories
+carry IDs and names; preserve both because category semantics matter.
+
+## Failure modes
+
+- Ambiguous names can return many similarly named entities; use location,
+  employer, or aliases in follow-up queries.
+- Relationship direction can vary; check whether the target is `entity1` or
+  `entity2` before labeling counterparties.
+- Missing data is not a confirmed absence of a relationship.
+
+## Evidence shape
+
+`SearchResult` rows should include LittleSis URLs, entity or relationship IDs,
+relationship category, counterpart names, dates, amount when present, and
+provenance hints in `extras`. `Source.metadata` should preserve the API
+resource type, IDs, and LittleSis page URL.
+
+## Anti-patterns
+
+- Do not cite LittleSis alone for a high-stakes allegation.
+- Do not assume all relationship counterparts are organizations.
+- Do not infer current status when `is_current` or dates are null.
diff --git a/src/research_agent/skills/connectors/nonprofits.md b/src/research_agent/skills/connectors/nonprofits.md
new file mode 100644
index 0000000..604b53c
--- /dev/null
+++ b/src/research_agent/skills/connectors/nonprofits.md
@@ -0,0 +1,69 @@
+---
+name: nonprofits
+description: "ProPublica Nonprofit Explorer API v2 for IRS Form 990 nonprofit profiles and filings."
+when_to_use: "US nonprofit lookup by name or EIN, Form 990 summaries, revenue/assets/compensation context, and filing PDF/XML links."
+when_not_to_use: "For-profit companies, state charity registrations, campaign committees, or lobbying filings."
+---
+
+# Nonprofits connector
+
+Use `nonprofits_search` for IRS-recognized nonprofit organizations in
+ProPublica Nonprofit Explorer. It is best for Form 990 profile and filing
+context, not real-time corporate or campaign-finance records.
+
+## Official documentation
+
+- Nonprofit Explorer API v2: https://projects.propublica.org/nonprofits/api/
+- Nonprofit Explorer product page: https://projects.propublica.org/nonprofits/
+- ProPublica data terms: https://www.propublica.org/about/propublica-data-terms-of-use
+
+## Auth and cost
+
+No key is required. The API is free subject to ProPublica terms. It accepts
+GET requests under `https://projects.propublica.org/nonprofits/api/v2`.
+
+## Required payload fields
+
+- `query` - required common field; use organization name or EIN.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: nonprofits_search
+payload:
+  query: "Heritage Foundation"
+  sub_question: "What Nonprofit Explorer profile and recent Form 990 filings exist for Heritage Foundation?"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+Search uses `/search.json` and can return paginated organization results.
+Profile fetches use `/organizations/:ein.json`. The API supports state and
+taxonomy filters, but the current connector exposes only the common query and
+result cap. Replan with an EIN when a search result needs exact disambiguation.
+
+## Failure modes
+
+- Nonprofit Explorer excludes some very small e-Postcard organizations.
+- Data reflects IRS processing and ProPublica updates; it is not real-time.
+- Name search can match alternate names and city text; verify EIN before
+  merging entities.
+
+## Evidence shape
+
+`SearchResult.extras` should preserve EIN, organization name, city/state,
+NTEE/category, ruling/subsection codes, latest tax period, and filing links.
+`fetch()` should return `Source.cleaned_text` summarizing profile and filings,
+with metadata containing EIN and source URLs.
+
+## Anti-patterns
+
+- Do not use this connector for PACs or candidate committees; use `fec_search`.
+- Do not infer current operations solely from an old Form 990.
+- Do not merge organizations with similar names without matching EINs.
diff --git a/src/research_agent/skills/connectors/opencorporates.md b/src/research_agent/skills/connectors/opencorporates.md
new file mode 100644
index 0000000..5e36925
--- /dev/null
+++ b/src/research_agent/skills/connectors/opencorporates.md
@@ -0,0 +1,76 @@
+---
+name: opencorporates
+description: "OpenCorporates company registry lookup with API-token-gated live access."
+when_to_use: "Company/entity lookup across jurisdictions, officers, filings, registered addresses, and registry identifiers."
+when_not_to_use: "Campaign committees, nonprofits, sanctions, securities filings, or state portals already covered by a more authoritative connector."
+---
+
+# OpenCorporates connector
+
+Use `opencorporates_search` for corporate registry discovery across many
+jurisdictions. It is a registry aggregator; when possible, follow links back
+to the original jurisdiction record for final citation.
+
+## Official documentation
+
+- API landing page: https://api.opencorporates.com/
+- API reference v0.4.8: https://api.opencorporates.com/documentation/API-Reference
+- API FAQ: https://api.opencorporates.com/documentation/FAQs
+
+## Auth and cost
+
+`OPENCORPORATES_API_KEY` is required for live requests. The API reference says
+an API key is required and is submitted as a query parameter. Open-data uses
+may qualify for free access under the OpenCorporates license; paid plans remove
+some share-alike restrictions.
+
+## Required payload fields
+
+- `query` - required common field; use company name or officer/entity name.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `jurisdiction` - optional OpenCorporates jurisdiction code such as `us_ca`
+  or `gb`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: opencorporates_search
+payload:
+  query: "Acme Holdings"
+  sub_question: "Which OpenCorporates company records match Acme Holdings?"
+  jurisdiction: "us_ca"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+Start with company search and add `jurisdiction` when the target state/country
+is known. Company detail pages may expose company number, jurisdiction code,
+current status, officers, registered address, and filing links. Preserve
+OpenCorporates IDs and original registry links for follow-up.
+
+## Failure modes
+
+- Missing `OPENCORPORATES_API_KEY` should skip or fail cleanly, not fall back
+  to scraping.
+- 401/403 means token/access level problems.
+- Jurisdiction codes are exact; a US state name is not a jurisdiction code.
+- Aggregated records can lag or omit filings from the source registry.
+
+## Evidence shape
+
+`SearchResult.extras` should include jurisdiction code, company number,
+company status, incorporation/current status dates when available, officers,
+registered address, OpenCorporates URL, and original registry URL. `Source`
+metadata should keep the same identifiers.
+
+## Anti-patterns
+
+- Do not treat OpenCorporates as the final authority when a state registry
+  source URL is available.
+- Do not use business entity filings as proof of candidate status.
+- Do not run live jobs without confirming the API key/license constraints.
diff --git a/src/research_agent/skills/connectors/usaspending.md b/src/research_agent/skills/connectors/usaspending.md
new file mode 100644
index 0000000..403e4ca
--- /dev/null
+++ b/src/research_agent/skills/connectors/usaspending.md
@@ -0,0 +1,77 @@
+---
+name: usaspending
+description: "USAspending.gov API v2 federal award search for contracts, grants, and loans."
+when_to_use: "Federal awards, recipient spending, agency funding, contract/grant/loan discovery, and award-detail follow-up."
+when_not_to_use: "Lobbying disclosures, campaign finance, state/local awards, or procurement opportunities before award."
+---
+
+# USAspending connector
+
+Use `usaspending_search` for federal award data after money has been obligated.
+It is useful for recipient, agency, NAICS/PSC, and award-detail research.
+
+## Official documentation
+
+- API documentation index: https://api.usaspending.gov/docs/
+- Endpoint list: https://api.usaspending.gov/docs/endpoints
+- USAspending site: https://www.usaspending.gov/
+
+## Auth and cost
+
+No authorization is currently required by the public API docs. The connector
+uses polite rate limiting.
+
+## Required payload fields
+
+- `query` - required common field; use recipient, agency, award term, UEI, or
+  contract/grant keyword.
+- `sub_question` - required common field.
+
+## Knobs available
+
+- `award_type` - `contracts` (default), `grants`, or `loans`.
+- `max_results` - optional client cap.
+
+## Valid payload examples
+
+```yaml
+kind: usaspending_search
+payload:
+  query: "Heritage Foundation contract"
+  sub_question: "What federal USAspending awards mention Heritage Foundation?"
+  award_type: "contracts"
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+The connector uses API v2 award search endpoints such as
+`/api/v2/search/spending_by_award/`, which accept POST filter bodies. Keep
+filters explicit: award type bucket, recipient/keyword search text, time
+period, agency, NAICS/PSC, or location. Paginated responses should preserve
+award IDs so `fetch()` can retrieve `/api/v2/awards/<AWARD_ID>/`.
+
+## Failure modes
+
+- 400 usually means a malformed POST filter, often missing time period or an
+  invalid award-type code.
+- 500/timeout should be retried with backoff.
+- Award records can be revised; preserve the retrieval timestamp and last
+  updated fields when available.
+- Recipient names and UEIs can change; avoid merging recipients by name alone.
+
+## Evidence shape
+
+`SearchResult.extras` should include award ID/PIID/FAIN/URI, recipient name,
+UEI when present, awarding/funding agency, award type, obligation amount,
+period of performance, NAICS/PSC, and generated USAspending URL. `fetch()`
+should render a `Source` with award detail metadata and raw API fields needed
+for audit.
+
+## Anti-patterns
+
+- Do not use USAspending for open solicitations; use SAM.gov or a future
+  procurement connector.
+- Do not equate obligations with payments.
+- Do not treat a recipient-name match as entity resolution without UEI/CAGE or
+  address confirmation.
diff --git a/src/research_agent/tools/_registry.py b/src/research_agent/tools/_registry.py
index ab107ad..b4f47a6 100644
--- a/src/research_agent/tools/_registry.py
+++ b/src/research_agent/tools/_registry.py
@@ -26,8 +26,7 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from pydantic import BaseModel, ConfigDict
-
+from pydantic import BaseModel, ConfigDict, ValidationError
 
 SearchFn = Callable[..., Awaitable[Any]]
 FetchFn = Callable[..., Awaitable[Any]]
@@ -68,12 +67,37 @@ class KindEntry:
     optional_payload_knobs: str
     example_query: str
     module_name: str = field(default="")
+    skill_exemption: str | None = None
 
     @property
     def short_name(self) -> str:
         """``congress`` for ``congress_search``."""
         return self.name.removesuffix("_search")
 
+    @property
+    def required_payload_fields(self) -> tuple[str, ...]:
+        """Required payload fields beyond common ``query``/``sub_question``."""
+        return required_payload_fields(self.payload_schema, include_common=False)
+
+    @property
+    def optional_payload_fields(self) -> tuple[str, ...]:
+        """Optional schema fields beyond common ``query``/``sub_question``."""
+        return optional_payload_fields(self.payload_schema, include_common=False)
+
+    @property
+    def expected_skill_name(self) -> str:
+        """Skill filename expected for this kind, even when not wired yet."""
+        return self.skill_name or self.short_name
+
+    @property
+    def skill_status_label(self) -> str:
+        """Compact skill coverage label for planner/README tables."""
+        if self.skill_name:
+            return f"`{self.skill_name}`"
+        if self.skill_exemption:
+            return f"exempt: {self.skill_exemption}"
+        return "missing"
+
 
 _REGISTRY: dict[str, KindEntry] = {}
 
@@ -82,6 +106,24 @@ class RegistryError(ValueError):
     """Raised when registry operations violate invariants."""
 
 
+@dataclass(frozen=True)
+class PayloadContractResult:
+    """Outcome of validating and normalizing a connector task payload."""
+
+    kind: str
+    valid: bool
+    payload: dict[str, Any]
+    repaired: bool = False
+    error: str | None = None
+    errors: tuple[dict[str, str], ...] = ()
+
+    @property
+    def repair_message(self) -> str:
+        if self.valid:
+            return ""
+        return self.error or f"{self.kind} payload failed connector contract validation"
+
+
 def register_kind(
     name: str,
     *,
@@ -90,6 +132,7 @@ def register_kind(
     fetch_fn: FetchFn | None = None,
     host_patterns: tuple[str, ...] = (),
     skill_name: str | None = _SKILL_NAME_UNSET,
+    skill_exemption: str | None = None,
     description: str = "",
     optional_payload_knobs: str = "",
     example_query: str = "",
@@ -123,6 +166,7 @@ def register_kind(
         fetch_fn=fetch_fn,
         host_patterns=tuple(host_patterns),
         skill_name=resolved_skill,
+        skill_exemption=skill_exemption,
         description=description,
         optional_payload_knobs=optional_payload_knobs,
         example_query=example_query,
@@ -165,30 +209,134 @@ def validate_payload(name: str, payload: dict[str, Any]) -> BaseModel:
     return entry.payload_schema.model_validate(payload)
 
 
+def required_payload_fields(
+    schema: type[BaseModel],
+    *,
+    include_common: bool = True,
+) -> tuple[str, ...]:
+    """Return statically-required fields from a connector payload schema."""
+    common = set() if include_common else set(BaseSearchPayload.model_fields)
+    return tuple(
+        name
+        for name, field_info in schema.model_fields.items()
+        if name not in common and field_info.is_required()
+    )
+
+
+def optional_payload_fields(
+    schema: type[BaseModel],
+    *,
+    include_common: bool = True,
+) -> tuple[str, ...]:
+    """Return optional fields from a connector payload schema."""
+    common = set() if include_common else set(BaseSearchPayload.model_fields)
+    return tuple(
+        name
+        for name, field_info in schema.model_fields.items()
+        if name not in common and not field_info.is_required()
+    )
+
+
+def _validation_errors(exc: ValidationError) -> tuple[dict[str, str], ...]:
+    out: list[dict[str, str]] = []
+    for err in exc.errors(include_url=False):
+        loc = ".".join(str(part) for part in err.get("loc", ())) or "<root>"
+        out.append(
+            {
+                "loc": loc,
+                "msg": str(err.get("msg", "")),
+                "type": str(err.get("type", "")),
+            }
+        )
+    return tuple(out)
+
+
+def _format_payload_error(
+    kind: str,
+    entry: KindEntry,
+    errors: tuple[dict[str, str], ...],
+) -> str:
+    rendered = "; ".join(
+        f"{err['loc']}: {err['msg']}" for err in errors
+    ) or "unknown validation error"
+    required = ", ".join(required_payload_fields(entry.payload_schema)) or "none"
+    optional = ", ".join(optional_payload_fields(entry.payload_schema)) or "none"
+    return (
+        f"{kind} payload rejected by connector contract: {rendered}. "
+        f"Required fields: {required}. Optional fields: {optional}."
+    )
+
+
+def validate_payload_contract(name: str, payload: dict[str, Any]) -> PayloadContractResult:
+    """Validate a connector payload and return a normalized, merged payload.
+
+    The normalized payload keeps orchestrator/private extras from ``payload``
+    but overwrites schema-known fields with Pydantic-normalized values. That
+    lets connector schemas repair values such as ``state='California'`` →
+    ``state='CA'`` without dropping internal fields like ``_active_strategies``.
+    """
+    entry = _REGISTRY.get(name)
+    if entry is None:
+        raise RegistryError(f"validate_payload_contract: unknown kind {name!r}")
+
+    try:
+        parsed = entry.payload_schema.model_validate(payload)
+    except ValidationError as exc:
+        errors = _validation_errors(exc)
+        return PayloadContractResult(
+            kind=name,
+            valid=False,
+            payload=dict(payload),
+            error=_format_payload_error(name, entry, errors),
+            errors=errors,
+        )
+
+    normalized = dict(payload)
+    parsed_payload = parsed.model_dump(mode="json", exclude_none=True)
+    normalized.update(parsed_payload)
+
+    comparable_original = dict(payload)
+    for key in entry.payload_schema.model_fields:
+        if key in comparable_original and comparable_original.get(key) is None:
+            comparable_original.pop(key, None)
+            normalized.pop(key, None)
+
+    return PayloadContractResult(
+        kind=name,
+        valid=True,
+        payload=normalized,
+        repaired=normalized != comparable_original,
+    )
+
+
 # ---------------------------------------------------------------------------
 # Planner-prompt rendering helpers.
 # ---------------------------------------------------------------------------
 
 _TABLE_HEADER = (
-    "| Kind | What it covers | Optional payload knobs | Example query |\n"
-    "|---|---|---|---|"
+    "| Kind | What it covers | Required payload fields | Optional payload knobs |"
+    " Skill | Example query |\n"
+    "|---|---|---|---|---|---|"
 )
 
 
 def render_direct_kinds_table() -> str:
     """Render the **Direct connector kinds** markdown table.
 
-    Each row is ``| `<kind>` | <description> | <optional knobs> | `<example>` |``.
-    Missing knobs render as ``—`` so the column stays visually balanced
-    without leaving an empty cell that confuses model parsers.
+    Each row separates connector-specific required fields from optional knobs.
+    Common ``query``/``sub_question`` fields are documented once in the
+    planner prose and omitted from the table to keep it scannable.
     """
     rows: list[str] = [_TABLE_HEADER]
     for entry in iter_kinds():
+        required = ", ".join(f"`{field}`" for field in entry.required_payload_fields) or "—"
         knobs = entry.optional_payload_knobs.strip() or "—"
+        skill = entry.skill_status_label
         example = entry.example_query.strip() or ""
         example_cell = f"`{example}`" if example else "—"
         rows.append(
-            f"| `{entry.name}` | {entry.description} | {knobs} | {example_cell} |"
+            f"| `{entry.name}` | {entry.description} | {required} | {knobs} |"
+            f" {skill} | {example_cell} |"
         )
     return "\n".join(rows)
 
@@ -240,14 +388,18 @@ def _reset_for_tests() -> None:
 __all__ = [
     "BaseSearchPayload",
     "KindEntry",
+    "PayloadContractResult",
     "RegistryError",
     "get_kind",
     "is_registered",
     "iter_kinds",
+    "optional_payload_fields",
     "register_kind",
     "registered_skill_pairs",
     "render_direct_kinds_table",
     "render_kinds_allowlist",
     "render_tactical_replan_kinds",
+    "required_payload_fields",
     "validate_payload",
+    "validate_payload_contract",
 ]
diff --git a/src/research_agent/tools/bbb.py b/src/research_agent/tools/bbb.py
index a6e2a2d..a9b263b 100644
--- a/src/research_agent/tools/bbb.py
+++ b/src/research_agent/tools/bbb.py
@@ -442,7 +442,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("www.bbb.org", "bbb.org"),
-    skill_name=None,
+    skill_name="bbb",
     description=(
         "Better Business Bureau profiles + ratings (Playwright, no auth)"
     ),
diff --git a/src/research_agent/tools/calaccess.py b/src/research_agent/tools/calaccess.py
index 5f4c26b..04837c8 100644
--- a/src/research_agent/tools/calaccess.py
+++ b/src/research_agent/tools/calaccess.py
@@ -440,7 +440,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("powersearch.sos.ca.gov",),
-    skill_name=None,
+    skill_name="calaccess",
     description="California Cal-Access campaign finance (Playwright)",
     optional_payload_knobs="`kind: contributions\\|independent_expenditures`",
     example_query="Newsom",
diff --git a/src/research_agent/tools/fec.py b/src/research_agent/tools/fec.py
index 5a92163..e6204c5 100644
--- a/src/research_agent/tools/fec.py
+++ b/src/research_agent/tools/fec.py
@@ -36,6 +36,7 @@
 from urllib.parse import urljoin, urlparse
 
 import httpx
+from pydantic import field_validator, model_validator
 
 from research_agent import config
 from research_agent.tools._registry import (
@@ -65,6 +66,77 @@
     "schedules/schedule_a",
     "schedules/schedule_e",
 }
+_OFFICE_ALIASES = {
+    "h": "H",
+    "house": "H",
+    "representative": "H",
+    "representatives": "H",
+    "u s house": "H",
+    "us house": "H",
+    "united states house": "H",
+    "s": "S",
+    "senate": "S",
+    "senator": "S",
+    "u s senate": "S",
+    "us senate": "S",
+    "united states senate": "S",
+    "p": "P",
+    "president": "P",
+    "presidential": "P",
+}
+_STATE_NAME_TO_POSTAL = {
+    "alabama": "AL",
+    "alaska": "AK",
+    "arizona": "AZ",
+    "arkansas": "AR",
+    "california": "CA",
+    "colorado": "CO",
+    "connecticut": "CT",
+    "delaware": "DE",
+    "district of columbia": "DC",
+    "florida": "FL",
+    "georgia": "GA",
+    "hawaii": "HI",
+    "idaho": "ID",
+    "illinois": "IL",
+    "indiana": "IN",
+    "iowa": "IA",
+    "kansas": "KS",
+    "kentucky": "KY",
+    "louisiana": "LA",
+    "maine": "ME",
+    "maryland": "MD",
+    "massachusetts": "MA",
+    "michigan": "MI",
+    "minnesota": "MN",
+    "mississippi": "MS",
+    "missouri": "MO",
+    "montana": "MT",
+    "nebraska": "NE",
+    "nevada": "NV",
+    "new hampshire": "NH",
+    "new jersey": "NJ",
+    "new mexico": "NM",
+    "new york": "NY",
+    "north carolina": "NC",
+    "north dakota": "ND",
+    "ohio": "OH",
+    "oklahoma": "OK",
+    "oregon": "OR",
+    "pennsylvania": "PA",
+    "rhode island": "RI",
+    "south carolina": "SC",
+    "south dakota": "SD",
+    "tennessee": "TN",
+    "texas": "TX",
+    "utah": "UT",
+    "vermont": "VT",
+    "virginia": "VA",
+    "washington": "WA",
+    "west virginia": "WV",
+    "wisconsin": "WI",
+    "wyoming": "WY",
+}
 
 # Candidate IDs: letter prefix (H/S/P) + 8 digits typically; allow alphanumeric.
 # Committee IDs: C + 8 digits typically; allow alphanumeric to be tolerant.
@@ -1150,6 +1222,66 @@ class _PayloadSchema(_BaseSearchPayload):
     per_page: int | None = None
     page: int | None = None
 
+    @field_validator("kind", mode="before")
+    @classmethod
+    def _normalize_kind(cls, value: Any) -> str | None:
+        if value is None:
+            return None
+        text = str(value).strip()
+        if not text:
+            return None
+        return text
+
+    @field_validator("office", mode="before")
+    @classmethod
+    def _normalize_office(cls, value: Any) -> str | None:
+        if value is None:
+            return None
+        text = re.sub(r"[^a-z0-9]+", " ", str(value).strip().lower()).strip()
+        if not text:
+            return None
+        return _OFFICE_ALIASES.get(text, str(value).strip().upper())
+
+    @field_validator("state", mode="before")
+    @classmethod
+    def _normalize_state(cls, value: Any) -> str | None:
+        if value is None:
+            return None
+        text = str(value).strip()
+        if not text:
+            return None
+        normalized_name = re.sub(r"[^a-z]+", " ", text.lower()).strip()
+        return _STATE_NAME_TO_POSTAL.get(normalized_name, text.upper())
+
+    @model_validator(mode="after")
+    def _validate_fec_contract(self) -> _PayloadSchema:
+        kind = self.kind or "candidates"
+        if kind not in _VALID_KINDS:
+            raise ValueError(
+                f"kind must be one of {', '.join(sorted(_VALID_KINDS))}"
+            )
+
+        query = self.query.strip()
+        if kind in {None, "candidates"} and not query and self.cycle and self.office:
+            kind = "candidates_enumerate"
+            self.kind = kind
+
+        if kind == "candidates_enumerate":
+            if self.cycle is None:
+                raise ValueError("kind=candidates_enumerate requires cycle")
+            if self.office is None:
+                raise ValueError("kind=candidates_enumerate requires office")
+            if self.office not in {"H", "S", "P"}:
+                raise ValueError("kind=candidates_enumerate office must be H, S, or P")
+            return self
+
+        if not query:
+            raise ValueError(
+                "query must be non-empty unless kind=candidates_enumerate "
+                "with cycle and office"
+            )
+        return self
+
 
 _register_kind(
     KIND,
diff --git a/src/research_agent/tools/gdelt.py b/src/research_agent/tools/gdelt.py
index b061b51..261bb04 100644
--- a/src/research_agent/tools/gdelt.py
+++ b/src/research_agent/tools/gdelt.py
@@ -368,7 +368,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("api.gdeltproject.org",),
-    skill_name=None,
+    skill_name="gdelt",
     description=(
         "GDELT — Global news event aggregator, no `site:` operator (no auth)"
     ),
diff --git a/src/research_agent/tools/lda.py b/src/research_agent/tools/lda.py
index bdd48ba..e70043f 100644
--- a/src/research_agent/tools/lda.py
+++ b/src/research_agent/tools/lda.py
@@ -625,7 +625,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("lda.senate.gov", "lda.gov", "www.lda.gov"),
-    skill_name=None,
+    skill_name="lda",
     description=(
         "Senate Lobbying Disclosure Act filings (registrants, contributions)"
     ),
diff --git a/src/research_agent/tools/licensing.py b/src/research_agent/tools/licensing.py
index b18da75..17142b2 100644
--- a/src/research_agent/tools/licensing.py
+++ b/src/research_agent/tools/licensing.py
@@ -688,7 +688,7 @@ class _PayloadSchema(_BaseSearchPayload):
         "www.myfloridalicense.com",
         "www.dos.ny.gov",
     ),
-    skill_name=None,
+    skill_name="licensing",
     description=(
         "State contractor / licensing-board lookups (Playwright; CA wired,"
         " others stubs)"
diff --git a/src/research_agent/tools/linkedin.py b/src/research_agent/tools/linkedin.py
index 792e8dc..74cf37c 100644
--- a/src/research_agent/tools/linkedin.py
+++ b/src/research_agent/tools/linkedin.py
@@ -910,6 +910,7 @@ class _PayloadSchema(_BaseSearchPayload):
     fetch_fn=fetch,
     host_patterns=("linkedin.com", "www.linkedin.com"),
     skill_name=None,
+    skill_exemption="#320 paid/gated connector skill backfill",
     description=(
         "LinkedIn person/company lookup via Proxycurl or Lix — requires"
         " broker key"
diff --git a/src/research_agent/tools/littlesis.py b/src/research_agent/tools/littlesis.py
index 2d1802f..2730c86 100644
--- a/src/research_agent/tools/littlesis.py
+++ b/src/research_agent/tools/littlesis.py
@@ -623,7 +623,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("littlesis.org", "www.littlesis.org"),
-    skill_name=None,
+    skill_name="littlesis",
     description=(
         "Power-mapping database — entities, donations, board seats, family"
         " ties (lead, not evidence)"
diff --git a/src/research_agent/tools/nonprofits.py b/src/research_agent/tools/nonprofits.py
index d540687..3bece77 100644
--- a/src/research_agent/tools/nonprofits.py
+++ b/src/research_agent/tools/nonprofits.py
@@ -556,7 +556,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("projects.propublica.org",),
-    skill_name=None,
+    skill_name="nonprofits",
     description="ProPublica Nonprofit Explorer (Form 990 filings, no auth)",
     optional_payload_knobs="—",
     example_query="Heritage Foundation",
diff --git a/src/research_agent/tools/opencorporates.py b/src/research_agent/tools/opencorporates.py
index 64f8542..7d461ca 100644
--- a/src/research_agent/tools/opencorporates.py
+++ b/src/research_agent/tools/opencorporates.py
@@ -571,7 +571,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("opencorporates.com", "www.opencorporates.com"),
-    skill_name=None,
+    skill_name="opencorporates",
     description="Global company registry — requires `OPENCORPORATES_API_KEY`",
     optional_payload_knobs="`jurisdiction: us_ca\\|gb\\|...`",
     example_query="Acme Holdings",
diff --git a/src/research_agent/tools/sanctions.py b/src/research_agent/tools/sanctions.py
index 192aef3..0409f6a 100644
--- a/src/research_agent/tools/sanctions.py
+++ b/src/research_agent/tools/sanctions.py
@@ -1108,6 +1108,7 @@ class _PayloadSchema(_BaseSearchPayload):
         "webgate.ec.europa.eu",
     ),
     skill_name=None,
+    skill_exemption="#320 paid/gated and sanctions connector skill backfill",
     description="OFAC SDN + UK sanctions lists (local index, no auth)",
     optional_payload_knobs="—",
     example_query="Wagner Group",
diff --git a/src/research_agent/tools/scholar.py b/src/research_agent/tools/scholar.py
index 7aa66f1..0725856 100644
--- a/src/research_agent/tools/scholar.py
+++ b/src/research_agent/tools/scholar.py
@@ -445,6 +445,7 @@ class _PayloadSchema(_BaseSearchPayload):
     fetch_fn=fetch,
     host_patterns=("scholar.google.com",),
     skill_name=None,
+    skill_exemption="#320 paid/gated connector skill backfill",
     description="Google Scholar via SerpAPI — requires `SERPAPI_KEY`",
     optional_payload_knobs="`kind: case_law\\|articles`",
     example_query="Section 230 appellate",
diff --git a/src/research_agent/tools/sos.py b/src/research_agent/tools/sos.py
index 9958b1f..ea4c383 100644
--- a/src/research_agent/tools/sos.py
+++ b/src/research_agent/tools/sos.py
@@ -723,7 +723,7 @@ class _PayloadSchema(_BaseSearchPayload):
         "search.sunbiz.org",
         "apps.dos.ny.gov",
     ),
-    skill_name=None,
+    skill_name="sos",
     description=(
         "State Secretary-of-State business entity filings (Playwright; CA"
         " wired, others stubs)"
diff --git a/src/research_agent/tools/state_election.py b/src/research_agent/tools/state_election.py
index 2b79fc6..2e3981c 100644
--- a/src/research_agent/tools/state_election.py
+++ b/src/research_agent/tools/state_election.py
@@ -13,6 +13,7 @@
 
 import httpx
 import yaml  # type: ignore[import-untyped]
+from pydantic import field_validator
 
 from research_agent.tools import browser
 from research_agent.tools._registry import (
@@ -28,6 +29,59 @@
 _CONFIG_PATH = Path("config/state_election_recipes.yaml")
 _DIAGNOSTICS_DIR = Path("data/diagnostics/state_election")
 _DEFAULT_TIMEOUT = 20.0
+_US_STATE_NAME_TO_POSTAL = {
+    "alabama": "AL",
+    "alaska": "AK",
+    "arizona": "AZ",
+    "arkansas": "AR",
+    "california": "CA",
+    "colorado": "CO",
+    "connecticut": "CT",
+    "delaware": "DE",
+    "district of columbia": "DC",
+    "florida": "FL",
+    "georgia": "GA",
+    "hawaii": "HI",
+    "idaho": "ID",
+    "illinois": "IL",
+    "indiana": "IN",
+    "iowa": "IA",
+    "kansas": "KS",
+    "kentucky": "KY",
+    "louisiana": "LA",
+    "maine": "ME",
+    "maryland": "MD",
+    "massachusetts": "MA",
+    "michigan": "MI",
+    "minnesota": "MN",
+    "mississippi": "MS",
+    "missouri": "MO",
+    "montana": "MT",
+    "nebraska": "NE",
+    "nevada": "NV",
+    "new hampshire": "NH",
+    "new jersey": "NJ",
+    "new mexico": "NM",
+    "new york": "NY",
+    "north carolina": "NC",
+    "north dakota": "ND",
+    "ohio": "OH",
+    "oklahoma": "OK",
+    "oregon": "OR",
+    "pennsylvania": "PA",
+    "rhode island": "RI",
+    "south carolina": "SC",
+    "south dakota": "SD",
+    "tennessee": "TN",
+    "texas": "TX",
+    "utah": "UT",
+    "vermont": "VT",
+    "virginia": "VA",
+    "washington": "WA",
+    "west virginia": "WV",
+    "wisconsin": "WI",
+    "wyoming": "WY",
+}
 
 _NAME_KEYS = (
     "candidate_name",
@@ -72,6 +126,24 @@ def _clean(value: Any) -> str:
     return re.sub(r"\s+", " ", text).strip()
 
 
+def _normalize_state(value: Any) -> str:
+    text = _clean(value)
+    if not text:
+        raise ValueError("state is required")
+    normalized_name = re.sub(r"[^a-z]+", " ", text.lower()).strip()
+    code = _US_STATE_NAME_TO_POSTAL.get(normalized_name, text.upper())
+    if not re.fullmatch(r"[A-Z]{2}", code):
+        raise ValueError(
+            "state must be a two-letter postal abbreviation or full US state name"
+        )
+    if code not in _RECIPES:
+        supported = ", ".join(sorted(_RECIPES)) or "none"
+        raise ValueError(
+            f"state {code} is not supported by state_election recipes; supported: {supported}"
+        )
+    return code
+
+
 def _lookup(row: dict[str, Any], keys: tuple[str, ...]) -> str:
     normalized = {str(k).strip().lower(): v for k, v in row.items()}
     for key in keys:
@@ -354,14 +426,18 @@ async def _portal_search(
 async def search(
     query: str,
     *,
-    state: str,
+    state: str | None = None,
     office: str | None = None,
     cycle: int | None = None,
     max_results: int = 50,
     timeout: float = _DEFAULT_TIMEOUT,
 ) -> list[SearchResult]:
     """Search official state-election candidate roster sources."""
-    state_norm = state.strip().upper()
+    try:
+        state_norm = _normalize_state(state)
+    except ValueError as exc:
+        logger.warning("state_election: invalid state %r: %s", state, exc)
+        return []
     recipe = _RECIPES.get(state_norm)
     if recipe is None:
         logger.warning("state_election: no recipe for state=%s", state_norm)
@@ -438,6 +514,11 @@ class _PayloadSchema(_BaseSearchPayload):
     cycle: int | None = None
     max_results: int | None = None
 
+    @field_validator("state", mode="before")
+    @classmethod
+    def _normalize_state_field(cls, value: Any) -> str:
+        return _normalize_state(value)
+
 
 _register_kind(
     KIND,
@@ -458,7 +539,7 @@ class _PayloadSchema(_BaseSearchPayload):
     ),
     skill_name="state_election",
     description="Official state election candidate roster sources and portals",
-    optional_payload_knobs="`state`, `office`, `cycle`, `max_results`",
+    optional_payload_knobs="`office`, `cycle`, `max_results`",
     example_query="2026 House candidates",
     module_name="state_election",
 )
diff --git a/src/research_agent/tools/usaspending.py b/src/research_agent/tools/usaspending.py
index 864ae96..da33528 100644
--- a/src/research_agent/tools/usaspending.py
+++ b/src/research_agent/tools/usaspending.py
@@ -759,7 +759,7 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("usaspending.gov", "www.usaspending.gov", "api.usaspending.gov"),
-    skill_name=None,
+    skill_name="usaspending",
     description=(
         "Federal contracts, grants, loans (award-level detail, no auth)"
     ),
diff --git a/tests/research_agent/tools/test_registry.py b/tests/research_agent/tools/test_registry.py
index 06f3628..3148a60 100644
--- a/tests/research_agent/tools/test_registry.py
+++ b/tests/research_agent/tools/test_registry.py
@@ -5,7 +5,7 @@
 from typing import Any
 
 import pytest
-from pydantic import ValidationError
+from pydantic import ValidationError, field_validator
 
 from research_agent.tools import _registry as registry
 from research_agent.tools._registry import (
@@ -16,7 +16,9 @@
     register_kind,
     render_direct_kinds_table,
     render_kinds_allowlist,
+    required_payload_fields,
     validate_payload,
+    validate_payload_contract,
 )
 
 
@@ -76,7 +78,7 @@ def test_skill_name_defaults_to_short_name(
     assert entry.skill_name == "loc"
 
 
-def test_skill_name_explicit_none_grandfathers(
+def test_skill_name_explicit_none_can_carry_exemption(
     empty_registry: dict[str, KindEntry],
 ) -> None:
     entry = register_kind(
@@ -84,8 +86,12 @@ def test_skill_name_explicit_none_grandfathers(
         payload_schema=_DummyPayload,
         search_fn=_dummy_search,
         skill_name=None,
+        skill_exemption="#999 tracked elsewhere",
     )
     assert entry.skill_name is None
+    assert entry.skill_exemption == "#999 tracked elsewhere"
+    assert entry.expected_skill_name == "loc"
+    assert entry.skill_status_label == "exempt: #999 tracked elsewhere"
 
 
 def test_skill_name_can_be_overridden(
@@ -181,6 +187,56 @@ def test_validate_payload_ignores_orchestrator_extras(
     assert parsed.query == "abc"
 
 
+def test_validate_payload_contract_returns_normalized_payload(
+    empty_registry: dict[str, KindEntry],
+) -> None:
+    class _PS(BaseSearchPayload):
+        state: str
+
+        @field_validator("state", mode="before")
+        @classmethod
+        def _normalize_state(cls, value: Any) -> str:
+            return "CA" if value == "California" else str(value)
+
+    register_kind("x_search", payload_schema=_PS, search_fn=_dummy_search)
+
+    result = validate_payload_contract(
+        "x_search",
+        {
+            "query": "abc",
+            "sub_question": "what?",
+            "state": "California",
+            "_active_strategies": ["triangulation"],
+        },
+    )
+
+    assert result.valid is True
+    assert result.repaired is True
+    assert result.payload["state"] == "CA"
+    assert result.payload["_active_strategies"] == ["triangulation"]
+
+
+def test_validate_payload_contract_returns_actionable_error(
+    empty_registry: dict[str, KindEntry],
+) -> None:
+    register_kind("x_search", payload_schema=_DummyPayload, search_fn=_dummy_search)
+
+    result = validate_payload_contract("x_search", {"query": "abc"})
+
+    assert result.valid is False
+    assert result.errors[0]["loc"] == "sub_question"
+    assert "Required fields: query, sub_question" in result.repair_message
+
+
+def test_required_payload_fields_can_hide_common_base_fields() -> None:
+    class _PS(BaseSearchPayload):
+        state: str
+        max_results: int | None = None
+
+    assert required_payload_fields(_PS) == ("query", "sub_question", "state")
+    assert required_payload_fields(_PS, include_common=False) == ("state",)
+
+
 def test_render_direct_kinds_table_one_row_per_kind(
     empty_registry: dict[str, KindEntry],
 ) -> None:
@@ -199,11 +255,16 @@ def test_render_direct_kinds_table_one_row_per_kind(
         description="Beta description",
     )
     rendered = render_direct_kinds_table()
-    assert "| `alpha_search` | Alpha description | `kind: a\\|b` | `alpha example` |" in rendered
+    assert (
+        "| `alpha_search` | Alpha description | — | `kind: a\\|b` | `alpha` |"
+        " `alpha example` |"
+    ) in rendered
     # No knobs / example for beta — table renders ``—`` to keep cells aligned.
-    assert "| `beta_search` | Beta description | — | — |" in rendered
+    assert "| `beta_search` | Beta description | — | — | `beta` | — |" in rendered
     # Header is intact.
-    assert rendered.startswith("| Kind | What it covers | Optional payload knobs | Example query |")
+    assert rendered.startswith(
+        "| Kind | What it covers | Required payload fields | Optional payload knobs"
+    )
 
 
 def test_render_kinds_allowlist_alphabetical(
@@ -288,9 +349,11 @@ def test_live_registry_skill_name_assignment() -> None:
         if entry.skill_name is not None
     }
     assert skilled == {
-        "congress_search": "congress",
+        "bbb_search": "bbb",
         "bne_search": "bne",
+        "calaccess_search": "calaccess",
         "commons_search": "commons",
+        "congress_search": "congress",
         "courtlistener_search": "courtlistener",
         "cspan_search": "cspan",
         "dpla_search": "dpla",
@@ -299,17 +362,25 @@ def test_live_registry_skill_name_assignment() -> None:
         "fec_search": "fec",
         "fedregister_search": "fedregister",
         "gallica_search": "gallica",
+        "gdelt_search": "gdelt",
         "iarchive_search": "iarchive",
         "iwm_search": "iwm",
+        "lda_search": "lda",
+        "licensing_search": "licensing",
+        "littlesis_search": "littlesis",
         "loc_search": "loc",
         "nara_search": "nara",
+        "nonprofits_search": "nonprofits",
         "openalex_search": "openalex",
+        "opencorporates_search": "opencorporates",
         "openlibrary_search": "openlibrary",
         "persee_search": "persee",
         "si_search": "smithsonian",
+        "sos_search": "sos",
         "state_election_search": "state_election",
         "trove_search": "trove",
         "ukna_search": "ukna",
+        "usaspending_search": "usaspending",
         "wikidata_search": "wikidata",
         "wikisource_search": "wikisource",
     }
diff --git a/tests/test_candidate_roster_backtest.py b/tests/test_candidate_roster_backtest.py
index 443ed40..53f04f4 100644
--- a/tests/test_candidate_roster_backtest.py
+++ b/tests/test_candidate_roster_backtest.py
@@ -132,7 +132,13 @@ def _patch_state_election(monkeypatch: pytest.MonkeyPatch) -> None:
                 "source_type": "csv",
                 "retrieval_method": "static_fetch",
                 "cycle_coverage": [2026],
-            }
+            },
+            "MD": {
+                "source_url": "https://fixtures.example.test/md_2026.csv",
+                "source_type": "csv",
+                "retrieval_method": "static_fetch",
+                "cycle_coverage": [2026],
+            },
         },
     )
 
@@ -195,6 +201,7 @@ def _make_job(tmp_path: Path) -> tuple[Job, Plan]:
                 kind="fec_search",
                 payload={
                     "query": "",
+                    "sub_question": "Enumerate 2026 California House candidates from FEC",
                     "kind": "candidates_enumerate",
                     "cycle": 2026,
                     "office": "H",
@@ -208,6 +215,7 @@ def _make_job(tmp_path: Path) -> tuple[Job, Plan]:
                 kind="fec_search",
                 payload={
                     "query": "",
+                    "sub_question": "Enumerate 2026 Florida Senate candidates from FEC",
                     "kind": "candidates_enumerate",
                     "cycle": 2026,
                     "office": "S",
@@ -220,6 +228,7 @@ def _make_job(tmp_path: Path) -> tuple[Job, Plan]:
                 kind="state_election_search",
                 payload={
                     "query": "House",
+                    "sub_question": "Find Colorado state-election House candidate rows",
                     "cycle": 2026,
                     "state": "CO",
                     "office": "House",
@@ -230,6 +239,7 @@ def _make_job(tmp_path: Path) -> tuple[Job, Plan]:
                 kind="state_election_search",
                 payload={
                     "query": "2026 U.S. House candidates",
+                    "sub_question": "Find Maryland state-election House candidate rows",
                     "cycle": 2026,
                     "state": "MD",
                     "office": "House",
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
index e2178ca..7df1251 100644
--- a/tests/test_doctor.py
+++ b/tests/test_doctor.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import json
+from pathlib import Path
 
 import pytest
 
@@ -446,6 +447,13 @@ def test_check_task_kind_registry_coherence_passes_for_live_registry() -> None:
     assert result.required is True
 
 
+def test_check_registry_contract_coherence_passes_for_live_registry() -> None:
+    result = doctor.check_registry_contract_coherence()
+    assert result.status == "ok", result.detail
+    assert result.required is True
+    assert "connector contract(s) expose required fields" in result.detail
+
+
 def test_check_planner_allowlist_coherence_flags_orphan(monkeypatch) -> None:
     """A kind in the allowlist that isn't registered is a hard fail.
 
@@ -480,8 +488,9 @@ def test_check_planner_allowlist_coherence_flags_missing_table_row(monkeypatch)
     def _truncated() -> dict[str, str]:
         out = real()
         out["direct_kinds_table"] = (
-            "| Kind | What it covers | Optional payload knobs | Example query |\n"
-            "|---|---|---|---|"
+            "| Kind | What it covers | Required payload fields | Optional payload knobs |"
+            " Skill | Example query |\n"
+            "|---|---|---|---|---|---|"
         )  # header only, zero rows
         return out
 
@@ -492,14 +501,9 @@ def _truncated() -> dict[str, str]:
     assert "no Direct-kinds-table row" in result.detail
 
 
-def test_check_registry_skill_coherence_skips_grandfathered() -> None:
-    """Kinds with ``skill_name=None`` produce ``skip`` rows, not ``fail``."""
+def test_check_registry_skill_coherence_reports_live_registry() -> None:
     rows = doctor.check_registry_skill_coherence()
     by_name = {r.name: r for r in rows}
-    # ``bbb_search`` is grandfathered (skill_name=None in the live registry).
-    skipped = by_name["registry_skill:bbb_search"]
-    assert skipped.status == "skip"
-    assert "grandfathered" in skipped.detail
     # ``congress_search`` ships a skill — must be ok.
     ok_row = by_name["registry_skill:congress_search"]
     assert ok_row.status == "ok"
@@ -513,6 +517,76 @@ def test_check_registry_skill_coherence_skips_grandfathered() -> None:
     assert openlibrary_row.status == "ok"
 
 
+def test_check_registry_skill_coherence_skips_documented_exemption(
+    monkeypatch, tmp_path
+) -> None:
+    from research_agent.skills import loader as skills_loader
+    from research_agent.tools import _registry
+
+    (tmp_path / "skills" / "connectors").mkdir(parents=True)
+    monkeypatch.setattr(
+        skills_loader, "_skills_dir", lambda category: tmp_path / "skills" / category
+    )
+
+    fake_entry = _registry.KindEntry(
+        name="pending_search",
+        payload_schema=_registry.BaseSearchPayload,
+        search_fn=lambda *a, **kw: None,
+        fetch_fn=None,
+        host_patterns=(),
+        skill_name=None,
+        description="",
+        optional_payload_knobs="",
+        example_query="",
+        module_name="pending_module",
+        skill_exemption="#999 pending skill",
+    )
+    monkeypatch.setattr(_registry, "iter_kinds", lambda: [fake_entry])
+
+    [row] = doctor.check_registry_skill_coherence()
+    assert row.status == "skip"
+    assert row.required is False
+    assert "kind=pending_search" in row.detail
+    assert "short_name=pending" in row.detail
+    assert "module_name=pending_module" in row.detail
+    assert "skills/connectors/pending.md" in row.detail
+    assert "#999 pending skill" in row.detail
+
+
+def test_check_registry_skill_coherence_fails_without_skill_or_exemption(
+    monkeypatch, tmp_path
+) -> None:
+    from research_agent.skills import loader as skills_loader
+    from research_agent.tools import _registry
+
+    (tmp_path / "skills" / "connectors").mkdir(parents=True)
+    monkeypatch.setattr(
+        skills_loader, "_skills_dir", lambda category: tmp_path / "skills" / category
+    )
+
+    fake_entry = _registry.KindEntry(
+        name="ghost_search",
+        payload_schema=_registry.BaseSearchPayload,
+        search_fn=lambda *a, **kw: None,
+        fetch_fn=None,
+        host_patterns=(),
+        skill_name=None,
+        description="",
+        optional_payload_knobs="",
+        example_query="",
+        module_name="ghost_module",
+    )
+    monkeypatch.setattr(_registry, "iter_kinds", lambda: [fake_entry])
+
+    [row] = doctor.check_registry_skill_coherence()
+    assert row.status == "fail"
+    assert row.required is True
+    assert "missing skill_name and no documented exemption" in row.detail
+    assert "kind=ghost_search" in row.detail
+    assert "module_name=ghost_module" in row.detail
+    assert "skills/connectors/ghost.md" in row.detail
+
+
 def test_check_registry_skill_summary_coherence_passes() -> None:
     rows = [
         doctor.CheckResult("registry_skill:ok_search", "ok", required=True, detail="ok"),
@@ -520,7 +594,7 @@ def test_check_registry_skill_summary_coherence_passes() -> None:
             "registry_skill:old_search",
             "skip",
             required=False,
-            detail="grandfathered",
+            detail="exempted",
         ),
     ]
 
@@ -530,6 +604,7 @@ def test_check_registry_skill_summary_coherence_passes() -> None:
     assert result.status == "ok"
     assert result.required is True
     assert "1 connector skill file(s) parse" in result.detail
+    assert "1 documented exemption(s)" in result.detail
 
 
 def test_check_registry_skill_summary_coherence_fails_on_required_row() -> None:
@@ -584,6 +659,55 @@ def test_check_registry_skill_coherence_fails_when_skill_file_missing(
     assert row.status == "fail"
     assert row.required is True
     assert "missing skills/connectors/ghost.md" in row.detail
+    assert "kind ghost_search" in row.detail
+    assert "module_name=ghost" in row.detail
+
+
+def test_check_registry_skill_coherence_fails_on_malformed_frontmatter(
+    monkeypatch, tmp_path
+) -> None:
+    from research_agent.skills import loader as skills_loader
+    from research_agent.tools import _registry
+
+    skill_dir = tmp_path / "skills" / "connectors"
+    skill_dir.mkdir(parents=True)
+    (skill_dir / "bad.md").write_text("no frontmatter\n", encoding="utf-8")
+    monkeypatch.setattr(
+        skills_loader, "_skills_dir", lambda category: tmp_path / "skills" / category
+    )
+
+    fake_entry = _registry.KindEntry(
+        name="bad_search",
+        payload_schema=_registry.BaseSearchPayload,
+        search_fn=lambda *a, **kw: None,
+        fetch_fn=None,
+        host_patterns=(),
+        skill_name="bad",
+        description="",
+        optional_payload_knobs="",
+        example_query="",
+        module_name="bad",
+    )
+    monkeypatch.setattr(_registry, "iter_kinds", lambda: [fake_entry])
+
+    [row] = doctor.check_registry_skill_coherence()
+    assert row.status == "fail"
+    assert "missing a YAML frontmatter" in row.detail
+
+
+def test_connector_skill_template_documents_required_sections() -> None:
+    template = Path("docs/CONNECTOR_SKILL_TEMPLATE.md").read_text(encoding="utf-8")
+    for section in (
+        "## Official documentation",
+        "## Auth and cost",
+        "## Required payload fields",
+        "## Knobs available",
+        "## Valid payload examples",
+        "## Request and pagination pattern",
+        "## Failure modes",
+        "## Evidence shape",
+    ):
+        assert section in template
 
 
 def test_run_all_checks_includes_registry_coherence(tmp_path) -> None:
diff --git a/tests/test_orchestrator_connector_contracts.py b/tests/test_orchestrator_connector_contracts.py
new file mode 100644
index 0000000..4e90e5f
--- /dev/null
+++ b/tests/test_orchestrator_connector_contracts.py
@@ -0,0 +1,168 @@
+"""Connector contract enforcement at orchestrator dispatch."""
+
+from __future__ import annotations
+
+import json
+import sys
+import types
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from research_agent.orchestrator.loop import default_handlers, run_loop
+from research_agent.orchestrator.plan import Plan, Subgoal, TaskSpec
+from research_agent.storage import db
+from research_agent.storage.jobs import Job
+from research_agent.storage.markdown import write_plan
+from research_agent.storage.tasks import enqueue
+from research_agent.tools import _registry
+from research_agent.tools._registry import BaseSearchPayload, KindEntry
+from research_agent.tools.models import SearchResult
+
+
+@pytest.fixture
+def db_path(tmp_path: Path) -> Path:
+    path = tmp_path / "index.sqlite"
+    db.migrate(path=path).close()
+    return path
+
+
+@pytest.fixture
+def job(tmp_path: Path, db_path: Path) -> Job:
+    return Job.create(
+        {"goal": "Investigate connector contracts"},
+        jobs_root=tmp_path / "jobs",
+        db_path=db_path,
+    )
+
+
+def _open_plan(job: Job) -> Plan:
+    plan = Plan(
+        version=1,
+        objective="Investigate connector contracts",
+        subgoals=[Subgoal(id=1, description="Run connector", done=False)],
+        task_template=[],
+        expected_iterations=1,
+    )
+    write_plan(job, plan.model_dump())
+    return plan
+
+
+def _task_rows(db_path: Path, job_id: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT kind, status, error FROM tasks WHERE job_id = ? ORDER BY id ASC",
+            (job_id,),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [dict(row) for row in rows]
+
+
+def _event_payloads(db_path: Path, job_id: str, kind: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT payload_json FROM events"
+            " WHERE job_id = ? AND kind = ? ORDER BY id ASC",
+            (job_id, kind),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [json.loads(row["payload_json"]) for row in rows]
+
+
+@pytest.mark.asyncio
+async def test_dispatch_rejects_invalid_direct_connector_before_call(
+    job: Job,
+    db_path: Path,
+) -> None:
+    plan = _open_plan(job)
+    enqueue(
+        job,
+        [
+            TaskSpec(
+                kind="state_election_search",
+                payload={
+                    "query": "2026 House candidates",
+                    "sub_question": "Find state candidate rows",
+                },
+            )
+        ],
+        plan.version,
+    )
+
+    await run_loop(
+        job,
+        router=object(),
+        plan=plan,
+        handlers=default_handlers(object()),
+        max_tasks=1,
+        retry_waits=(0,),
+        retry_max_attempts=1,
+    )
+
+    [task] = _task_rows(db_path, job.id)
+    assert task["status"] == "failed"
+    assert "state" in task["error"]
+    [event] = _event_payloads(db_path, job.id, "connector_contract_rejected")
+    assert event["stage"] == "dispatch"
+    assert event["kind"] == "state_election_search"
+    assert "state" in event["message"]
+
+
+@pytest.mark.asyncio
+async def test_default_handlers_import_registered_module_name(
+    job: Job,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    called: dict[str, Any] = {}
+
+    class _Payload(BaseSearchPayload):
+        pass
+
+    async def _search(query: str) -> list[SearchResult]:
+        called["query"] = query
+        return [
+            SearchResult(
+                url="https://example.test/result",
+                title="Alias result",
+                snippet="ok",
+                source_kind="web",
+            )
+        ]
+
+    async def _fetch(url: str) -> None:
+        return None
+
+    module = types.SimpleNamespace(search=_search, fetch=_fetch)
+    monkeypatch.setitem(sys.modules, "research_agent.tools.alias_module", module)
+    entry = KindEntry(
+        name="alias_search",
+        payload_schema=_Payload,
+        search_fn=_search,
+        fetch_fn=_fetch,
+        host_patterns=(),
+        skill_name=None,
+        description="Alias connector",
+        optional_payload_knobs="",
+        example_query="alias",
+        module_name="alias_module",
+    )
+    monkeypatch.setitem(_registry._REGISTRY, "alias_search", entry)  # noqa: SLF001
+
+    handlers = default_handlers(object())
+    result = await handlers["alias_search"](
+        job,
+        {
+            "id": 1,
+            "kind": "alias_search",
+            "payload": {"query": "needle", "sub_question": "needle"},
+        },
+    )
+
+    assert called == {"query": "needle"}
+    assert result is not None
+    assert result["results"][0]["title"] == "Alias result"
diff --git a/tests/test_orchestrator_loop.py b/tests/test_orchestrator_loop.py
index bc032a6..1b33abb 100644
--- a/tests/test_orchestrator_loop.py
+++ b/tests/test_orchestrator_loop.py
@@ -759,6 +759,28 @@ def test_module_constants_match_spec() -> None:
 _CONNECTOR_SOURCE_KIND["bne"] = "bne_search"
 
 
+def _connector_module(prefix: str) -> Any:
+    """Return the implementation module registered for ``<prefix>_search``."""
+    import importlib
+
+    import research_agent.tools  # noqa: F401 — ensure registration ran
+    from research_agent.tools._registry import get_kind
+
+    entry = get_kind(f"{prefix}_search")
+    assert entry is not None
+    return importlib.import_module(f"research_agent.tools.{entry.module_name}")
+
+
+def _valid_connector_search_payload(prefix: str) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "query": "needle",
+        "sub_question": "What relevant records mention needle?",
+    }
+    if prefix == "state_election":
+        payload["state"] = "CA"
+    return payload
+
+
 def test_default_handlers_covers_every_task_kind() -> None:
     handlers = default_handlers(router=None)
     expected = {
@@ -987,9 +1009,7 @@ async def test_connector_search_handler_dispatches_to_module(
     """Each ``<prefix>_search`` handler must call ``tools.<prefix>.search`` and
     expand top hits into ``web_fetch`` follow-ups via the standard helper.
     """
-    import importlib
-
-    mod = importlib.import_module(f"research_agent.tools.{prefix}")
+    mod = _connector_module(prefix)
     captured: dict[str, Any] = {}
 
     sk = _CONNECTOR_SOURCE_KIND[prefix]
@@ -1018,12 +1038,10 @@ async def fake_search(query: str, **kwargs: Any) -> list[SearchResult]:
     handler = handlers[f"{prefix}_search"]
     out = await handler(
         job,
-        {"kind": f"{prefix}_search", "payload": {"query": "needle", "kind": "x"}},
+        {"kind": f"{prefix}_search", "payload": _valid_connector_search_payload(prefix)},
     )
 
     assert captured["query"] == "needle"
-    # ``kind`` is in the passthrough allowlist so it reaches the connector.
-    assert captured["kwargs"].get("kind") == "x"
     assert isinstance(out, dict)
     assert "results" in out
     assert "follow_up_tasks" in out
@@ -1040,12 +1058,11 @@ async def test_connector_fetch_handler_dispatches_to_module(
     """Each ``<prefix>_fetch`` handler must call ``tools.<prefix>.fetch`` and
     persist the returned :class:`Source` via the shared helper.
     """
-    import importlib
     from datetime import UTC, datetime
 
     from research_agent.tools.models import Source
 
-    mod = importlib.import_module(f"research_agent.tools.{prefix}")
+    mod = _connector_module(prefix)
     captured: dict[str, Any] = {}
 
     sk = _CONNECTOR_SOURCE_KIND[prefix]
@@ -1100,7 +1117,14 @@ async def fake_search(query: str, **kwargs: Any) -> list[SearchResult]:
     handler = default_handlers(router=None)["linkedin_search"]
     with pytest.raises(FatalError, match="LINKEDIN_DATA_API_KEY"):
         await handler(
-            job, {"kind": "linkedin_search", "payload": {"query": "Sundar Pichai"}}
+            job,
+            {
+                "kind": "linkedin_search",
+                "payload": {
+                    "query": "Sundar Pichai",
+                    "sub_question": "What LinkedIn profile evidence exists?",
+                },
+            },
         )
 
 
@@ -1152,7 +1176,14 @@ async def fake_search(query: str, **kwargs: Any) -> list[SearchResult]:
     handler = default_handlers(router=None)["congress_search"]
     with pytest.raises(RuntimeError) as excinfo:
         await handler(
-            job, {"kind": "congress_search", "payload": {"query": "needle"}}
+            job,
+            {
+                "kind": "congress_search",
+                "payload": {
+                    "query": "needle",
+                    "sub_question": "What congressional records mention needle?",
+                },
+            },
         )
     assert not isinstance(excinfo.value, FatalError)
     assert bug_message in str(excinfo.value)
@@ -1441,6 +1472,7 @@ async def fake_search(
             "kind": "edgar_search",
             "payload": {
                 "query": "cybersecurity",
+                "sub_question": "What SEC filings mention cybersecurity?",
                 "kind": "should-be-dropped",  # edgar takes form_type, not kind
                 "form_type": "8-K",
                 "max_results": 5,
@@ -1482,6 +1514,7 @@ async def fake_search(query: str, **kwargs: Any) -> list[SearchResult]:
             "kind": "loc_search",
             "payload": {
                 "query": "pullman strike",
+                "sub_question": "What LOC records mention the Pullman strike?",
                 "collection": "chronicling-america",
                 "page": 2,
                 "max_results": 5,
diff --git a/tests/test_orchestrator_plan.py b/tests/test_orchestrator_plan.py
index 7b6f630..3126a69 100644
--- a/tests/test_orchestrator_plan.py
+++ b/tests/test_orchestrator_plan.py
@@ -20,6 +20,7 @@
     MAX_PLAN_VERSIONS,
     MAX_RECENT_RESULTS_FOR_REPLAN,
     Plan,
+    PlanParseError,
     PlanVersionCapExceeded,
     ScopeClass,
     Subgoal,
@@ -505,6 +506,116 @@ def _read_event_kinds(db_path: Path, job_id: str) -> list[str]:
     return [r["kind"] for r in rows]
 
 
+def _read_event_payloads(db_path: Path, job_id: str, kind: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT payload_json FROM events"
+            " WHERE job_id = ? AND kind = ? ORDER BY id ASC",
+            (job_id, kind),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [json.loads(r["payload_json"]) for r in rows]
+
+
+def _read_task_payloads(db_path: Path, job_id: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT payload_json FROM tasks WHERE job_id = ? ORDER BY id ASC",
+            (job_id,),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [json.loads(r["payload_json"]) for r in rows]
+
+
+def test_enqueue_rejects_direct_connector_missing_required_payload(
+    job: Job,
+    db_path: Path,
+) -> None:
+    bad_plan = _sample_plan(
+        task_template=[
+            TaskSpec(
+                kind="state_election_search",
+                payload={
+                    "query": "2026 House candidates",
+                    "sub_question": "Find state candidate rows",
+                },
+            )
+        ]
+    )
+
+    with pytest.raises(PlanParseError, match="state"):
+        plan_module._enqueue_plan_tasks(job, bad_plan)  # noqa: SLF001
+
+    assert _read_task_payloads(db_path, job.id) == []
+    [event] = _read_event_payloads(
+        db_path, job.id, "connector_contract_rejected"
+    )
+    assert event["stage"] == "pre_enqueue"
+    assert event["kind"] == "state_election_search"
+    assert event["plan_task_index"] == 0
+    assert "state" in event["message"]
+
+
+def test_enqueue_repairs_state_election_full_state_name(
+    job: Job,
+    db_path: Path,
+) -> None:
+    plan = _sample_plan(
+        task_template=[
+            TaskSpec(
+                kind="state_election_search",
+                payload={
+                    "query": "2026 House candidates",
+                    "sub_question": "Find California state candidate rows",
+                    "state": "California",
+                },
+            )
+        ]
+    )
+
+    plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    [payload] = _read_task_payloads(db_path, job.id)
+    assert payload["state"] == "CA"
+    [event] = _read_event_payloads(
+        db_path, job.id, "connector_contract_repaired"
+    )
+    assert event["stage"] == "pre_enqueue"
+    assert event["before"]["state"] == "California"
+    assert event["after"]["state"] == "CA"
+
+
+def test_enqueue_repairs_fec_empty_query_candidate_enumeration(
+    job: Job,
+    db_path: Path,
+) -> None:
+    plan = _sample_plan(
+        task_template=[
+            TaskSpec(
+                kind="fec_search",
+                payload={
+                    "query": "",
+                    "sub_question": "Enumerate 2026 California House candidates",
+                    "cycle": 2026,
+                    "office": "House",
+                    "state": "California",
+                },
+            )
+        ]
+    )
+
+    plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    [payload] = _read_task_payloads(db_path, job.id)
+    assert payload["kind"] == "candidates_enumerate"
+    assert payload["office"] == "H"
+    assert payload["state"] == "CA"
+
+
 def test_initial_plan_writes_v1_row_and_emits_event(
     job: Job,
     db_path: Path,
diff --git a/tests/test_prompts_loader.py b/tests/test_prompts_loader.py
index 90fe886..e36a17e 100644
--- a/tests/test_prompts_loader.py
+++ b/tests/test_prompts_loader.py
@@ -420,8 +420,8 @@ async def _noop_search(query: str, **_: object) -> list[object]:
     assert "`alpha_search`, `beta_search`" in rendered
     # tactical_replan list mirrors the allowlist.
     assert rendered.count("`alpha_search`, `beta_search`") >= 2
-    # Stub knob renders as ``—``.
-    assert "| `beta_search` | Beta desc. | — | `beta example` |" in rendered
+    # Stub required/knob cells render as ``—``.
+    assert "| `beta_search` | Beta desc. | — | — | missing | `beta example` |" in rendered
 
 
 def test_load_planner_caller_can_override_registry_vars(
diff --git a/tests/test_skills_content.py b/tests/test_skills_content.py
index 49dc9d2..37d749b 100644
--- a/tests/test_skills_content.py
+++ b/tests/test_skills_content.py
@@ -14,11 +14,21 @@
 import re
 
 import pytest
+import yaml
 
 from research_agent.skills import loader as skills_loader
 from research_agent.skills.loader import clear_cache, list_skills, load_skill
 
 CONNECTOR_SKILLS = ("congress", "edgar", "fedregister", "courtlistener", "fec")
+ISSUE_318_CONNECTOR_SKILLS = (
+    "gdelt",
+    "lda",
+    "littlesis",
+    "nonprofits",
+    "opencorporates",
+    "usaspending",
+)
+ISSUE_319_CONNECTOR_SKILLS = ("bbb", "calaccess", "licensing")
 
 STRATEGY_SKILLS = (
     "modern-policy-era-filtering",
@@ -28,6 +38,17 @@
 )
 
 REQUIRED_BODY_SECTIONS = ("Knobs available", "Anti-patterns")
+ISSUE_318_REQUIRED_SECTIONS = (
+    "Official documentation",
+    "Auth and cost",
+    "Required payload fields",
+    "Knobs available",
+    "Valid payload examples",
+    "Request and pagination pattern",
+    "Failure modes",
+    "Evidence shape",
+    "Anti-patterns",
+)
 
 
 @pytest.fixture(autouse=True)
@@ -70,6 +91,15 @@ def _knobs_section_identifiers(body: str) -> set[str]:
     return knobs
 
 
+def _skill_payload_examples(body: str) -> list[dict[str, object]]:
+    examples: list[dict[str, object]] = []
+    for match in re.finditer(r"```yaml\s+(?P<body>.*?)```", body, re.DOTALL):
+        data = yaml.safe_load(match.group("body"))
+        if isinstance(data, dict) and "kind" in data and "payload" in data:
+            examples.append(data)
+    return examples
+
+
 @pytest.mark.parametrize("name", CONNECTOR_SKILLS)
 def test_connector_skill_loads_with_non_empty_body(name: str) -> None:
     body = load_skill("connectors", name)
@@ -111,6 +141,121 @@ def test_connector_skill_knobs_match_search_signature(name: str) -> None:
     )
 
 
+@pytest.mark.parametrize("name", ISSUE_318_CONNECTOR_SKILLS)
+def test_issue_318_connector_skill_loads(name: str) -> None:
+    body = load_skill("connectors", name)
+    assert len(body) > 500
+    entries = {entry["name"]: entry for entry in list_skills("connectors")}
+    assert entries[name]["description"]
+    assert entries[name]["when_to_use"]
+    assert entries[name]["when_not_to_use"]
+
+
+@pytest.mark.parametrize("name", ISSUE_318_CONNECTOR_SKILLS)
+@pytest.mark.parametrize("section", ISSUE_318_REQUIRED_SECTIONS)
+def test_issue_318_connector_skill_has_required_sections(name: str, section: str) -> None:
+    body = load_skill("connectors", name)
+    assert re.search(rf"^##\s+{re.escape(section)}\s*$", body, re.MULTILINE)
+
+
+@pytest.mark.parametrize("name", ISSUE_318_CONNECTOR_SKILLS)
+def test_issue_318_connector_skill_examples_validate(name: str) -> None:
+    from research_agent.tools._registry import get_kind, validate_payload_contract
+
+    entry = get_kind(f"{name}_search")
+    assert entry is not None
+    assert entry.skill_name == name
+
+    body = load_skill("connectors", name)
+    examples = _skill_payload_examples(body)
+    assert examples, f"{name}: expected at least one YAML payload example"
+    for example in examples:
+        assert example["kind"] == entry.name
+        payload = example["payload"]
+        assert isinstance(payload, dict)
+        result = validate_payload_contract(entry.name, payload)
+        assert result.valid, result.repair_message
+
+
+def test_issue_318_connector_skills_cite_official_docs() -> None:
+    expected = {
+        "gdelt": "https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/amp/",
+        "lda": "https://lda.gov/api/",
+        "littlesis": "https://dev.littlesis.org/api/",
+        "nonprofits": "https://projects.propublica.org/nonprofits/api/",
+        "opencorporates": "https://api.opencorporates.com/documentation/API-Reference",
+        "usaspending": "https://api.usaspending.gov/docs/endpoints",
+    }
+    for name, url in expected.items():
+        assert url in load_skill("connectors", name)
+
+
+@pytest.mark.parametrize("name", ISSUE_319_CONNECTOR_SKILLS)
+def test_issue_319_connector_skill_loads(name: str) -> None:
+    body = load_skill("connectors", name)
+    assert len(body) > 500
+    entries = {entry["name"]: entry for entry in list_skills("connectors")}
+    assert entries[name]["description"]
+    assert entries[name]["when_to_use"]
+    assert entries[name]["when_not_to_use"]
+
+
+@pytest.mark.parametrize("name", ISSUE_319_CONNECTOR_SKILLS)
+@pytest.mark.parametrize("section", ISSUE_318_REQUIRED_SECTIONS)
+def test_issue_319_connector_skill_has_required_sections(name: str, section: str) -> None:
+    body = load_skill("connectors", name)
+    assert re.search(rf"^##\s+{re.escape(section)}\s*$", body, re.MULTILINE)
+
+
+@pytest.mark.parametrize("name", ISSUE_319_CONNECTOR_SKILLS)
+def test_issue_319_connector_skill_examples_validate(name: str) -> None:
+    from research_agent.tools._registry import get_kind, validate_payload_contract
+
+    entry = get_kind(f"{name}_search")
+    assert entry is not None
+    assert entry.skill_name == name
+
+    body = load_skill("connectors", name)
+    examples = _skill_payload_examples(body)
+    assert examples, f"{name}: expected at least one YAML payload example"
+    for example in examples:
+        assert example["kind"] == entry.name
+        payload = example["payload"]
+        assert isinstance(payload, dict)
+        result = validate_payload_contract(entry.name, payload)
+        assert result.valid, result.repair_message
+
+
+def test_issue_319_connector_skills_cite_official_pages() -> None:
+    expected = {
+        "bbb": "https://www.bbb.org/all/about-bbb/",
+        "calaccess": "https://powersearch.sos.ca.gov/frequently-asked-questions/",
+        "licensing": "https://cslb.ca.gov/OnlineServices/CheckLicenseII/CheckLicense.aspx",
+    }
+    for name, url in expected.items():
+        assert url in load_skill("connectors", name)
+
+
+def test_issue_319_site_navigation_source_boundaries() -> None:
+    bbb = load_skill("connectors", "bbb")
+    assert "private nonprofit" in bbb
+    assert "not a government licensing authority" in bbb
+
+    calaccess = load_skill("connectors", "calaccess")
+    assert "Power Search" in calaccess
+    assert "Cal-Access" in calaccess
+    assert "kind=lobbying" in calaccess
+    assert "unsupported" in calaccess
+
+    licensing = load_skill("connectors", "licensing")
+    assert "California CSLB" in licensing
+    assert "TX" in licensing
+    assert "FL" in licensing
+    assert "NY" in licensing
+    assert "stubs" in licensing
+    assert "unsupported" in licensing
+
+
 def test_congress_skill_carries_canonical_motivator() -> None:
     """The 110th-Congress / IRA relevance trap is the headline reason this
     skill exists; the body must keep that example intact."""
diff --git a/tests/test_tools_fec.py b/tests/test_tools_fec.py
index c466c71..4dbcf91 100644
--- a/tests/test_tools_fec.py
+++ b/tests/test_tools_fec.py
@@ -13,6 +13,7 @@
 import pytest
 
 from research_agent.tools import fec
+from research_agent.tools._registry import validate_payload_contract
 
 # ---------------------------------------------------------------------------
 # Fixtures
@@ -40,6 +41,54 @@ def cache_dir(tmp_path: Path, monkeypatch) -> Path:
     return target
 
 
+def test_payload_contract_repairs_empty_query_candidate_enumeration() -> None:
+    result = validate_payload_contract(
+        "fec_search",
+        {
+            "query": "",
+            "sub_question": "Enumerate 2026 California House candidates",
+            "cycle": 2026,
+            "office": "House",
+            "state": "California",
+        },
+    )
+
+    assert result.valid is True
+    assert result.repaired is True
+    assert result.payload["kind"] == "candidates_enumerate"
+    assert result.payload["office"] == "H"
+    assert result.payload["state"] == "CA"
+
+
+def test_payload_contract_rejects_empty_query_non_enumeration() -> None:
+    result = validate_payload_contract(
+        "fec_search",
+        {
+            "query": "",
+            "sub_question": "Search FEC candidates",
+            "kind": "candidates",
+        },
+    )
+
+    assert result.valid is False
+    assert "query must be non-empty" in result.repair_message
+
+
+def test_payload_contract_rejects_candidate_enumeration_without_cycle() -> None:
+    result = validate_payload_contract(
+        "fec_search",
+        {
+            "query": "",
+            "sub_question": "Enumerate House candidates",
+            "kind": "candidates_enumerate",
+            "office": "H",
+        },
+    )
+
+    assert result.valid is False
+    assert "requires cycle" in result.repair_message
+
+
 # ---------------------------------------------------------------------------
 # Test payloads
 # ---------------------------------------------------------------------------
diff --git a/tests/test_tools_state_election.py b/tests/test_tools_state_election.py
index c579f98..3e6cfa8 100644
--- a/tests/test_tools_state_election.py
+++ b/tests/test_tools_state_election.py
@@ -8,6 +8,7 @@
 import pytest
 
 from research_agent.tools import state_election
+from research_agent.tools._registry import validate_payload_contract
 
 
 def _patch_httpx(monkeypatch: pytest.MonkeyPatch, text_by_url: dict[str, str]) -> None:
@@ -34,6 +35,48 @@ def _set_recipe(monkeypatch: pytest.MonkeyPatch, state: str, recipe: dict[str, A
     monkeypatch.setattr(state_election, "_RECIPES", recipes)
 
 
+def test_payload_contract_normalizes_full_state_name() -> None:
+    result = validate_payload_contract(
+        "state_election_search",
+        {
+            "query": "House candidates",
+            "sub_question": "Find California House candidates",
+            "state": "California",
+        },
+    )
+
+    assert result.valid is True
+    assert result.repaired is True
+    assert result.payload["state"] == "CA"
+
+
+def test_payload_contract_rejects_missing_state() -> None:
+    result = validate_payload_contract(
+        "state_election_search",
+        {
+            "query": "House candidates",
+            "sub_question": "Find House candidates",
+        },
+    )
+
+    assert result.valid is False
+    assert "state" in result.repair_message
+
+
+def test_payload_contract_rejects_unsupported_state() -> None:
+    result = validate_payload_contract(
+        "state_election_search",
+        {
+            "query": "House candidates",
+            "sub_question": "Find Alabama House candidates",
+            "state": "Alabama",
+        },
+    )
+
+    assert result.valid is False
+    assert "state AL is not supported" in result.repair_message
+
+
 @pytest.mark.asyncio
 async def test_static_csv_parses_co_candidate_rows(monkeypatch: pytest.MonkeyPatch) -> None:
     url = "https://example.test/co.csv"

From 5cf24c4840554e6663b8947b5a849d37f5e6f26b Mon Sep 17 00:00:00 2001
From: Bradley Taylor <bradtaylorsf@gmail.com>
Date: Thu, 21 May 2026 16:25:24 -0700
Subject: [PATCH 2/3] feat: batch implement issues #320, #321

---
 README.md                                     |  12 +-
 docs/API_KEYS.md                              |  14 +-
 docs/CONFIG.md                                |  38 +++
 .../skills/connectors/linkedin.md             | 122 ++++++++
 .../skills/connectors/sanctions.md            | 129 ++++++++
 .../skills/connectors/scholar.md              | 111 +++++++
 src/research_agent/tools/linkedin.py          |  12 +-
 src/research_agent/tools/sanctions.py         |  12 +-
 src/research_agent/tools/scholar.py           |   8 +-
 tests/research_agent/tools/test_registry.py   |   3 +
 tests/test_candidate_roster_backtest.py       | 282 +++++++++++++++++-
 tests/test_skills_content.py                  | 117 ++++++++
 12 files changed, 828 insertions(+), 32 deletions(-)
 create mode 100644 src/research_agent/skills/connectors/linkedin.md
 create mode 100644 src/research_agent/skills/connectors/sanctions.md
 create mode 100644 src/research_agent/skills/connectors/scholar.md

diff --git a/README.md b/README.md
index d141fda..f37fa69 100644
--- a/README.md
+++ b/README.md
@@ -134,7 +134,7 @@ linkage, same example query. The table is generated from
 | `iwm_search` | Imperial War Museums public collections: photographs, sound/oral histories, documents, film, objects (Playwright scrape, no auth) | — | `max_results`, `object_category`, `related_period`, `records_with_media`, `style`, `page_size` | `iwm` | `Battle of Britain` |
 | `lda_search` | Senate Lobbying Disclosure Act filings (registrants, contributions) | — | `kind: filings\|registrants\|contributions` | `lda` | `Heritage Foundation` |
 | `licensing_search` | State contractor / licensing-board lookups (Playwright; CA wired, others stubs) | — | `state: CA\|TX\|FL\|NY` | `licensing` | `SBI Builders` |
-| `linkedin_search` | LinkedIn person/company lookup via Proxycurl or Lix — requires broker key | — | `kind: person\|company` | exempt: #320 paid/gated connector skill backfill | `Sundar Pichai` |
+| `linkedin_search` | LinkedIn person/company lookup via broker; paid/TOS-sensitive and Proxycurl is shut down | — | `kind: person\|company`, `max_results` | `linkedin` | `Sundar Pichai` |
 | `littlesis_search` | Power-mapping database — entities, donations, board seats, family ties (lead, not evidence) | — | `kind: entities\|relationships` | `littlesis` | `Peter Thiel` |
 | `loc_search` | Library of Congress digital collections, including Chronicling America through the unified loc.gov API | — | `collection: chronicling-america\|prints\|manuscripts\|recordings\|maps`, `page: <int>` | `loc` | `battle of algiers` |
 | `nara_search` | US National Archives Catalog OPA v2 records, declassified federal records, military records, photos; requires NARA_API_KEY | — | `available_online`, `type_of_materials`, `result_types`, `record_group`, `page` | `nara` | `Vietnam War declassified` |
@@ -143,8 +143,8 @@ linkage, same example query. The table is generated from
 | `opencorporates_search` | Global company registry — requires `OPENCORPORATES_API_KEY` | — | `jurisdiction: us_ca\|gb\|...` | `opencorporates` | `Acme Holdings` |
 | `openlibrary_search` | Open Library book metadata, ISBN/OCLC/LCCN identifiers, and Internet Archive scan IDs through search.json | — | `max_results` | `openlibrary` | `Pullman Strike 1894` |
 | `persee_search` | Persee French academic journals in humanities and social sciences (Playwright scrape, no auth) | — | `max_results` | `persee` | `guerre d'Algerie` |
-| `sanctions_search` | OFAC SDN + UK sanctions lists (local index, no auth) | — | — | exempt: #320 paid/gated and sanctions connector skill backfill | `Wagner Group` |
-| `scholar_search` | Google Scholar via SerpAPI — requires `SERPAPI_KEY` | — | `kind: case_law\|articles` | exempt: #320 paid/gated connector skill backfill | `Section 230 appellate` |
+| `sanctions_search` | OFAC sanctions screening plus legacy EU/UK local rows; check source freshness before compliance use | — | `max_results`, `kinds: [SDN|EU|UK]` | `sanctions` | `Wagner Group` |
+| `scholar_search` | Google Scholar via SerpAPI — requires `SERPAPI_KEY` | — | `kind: case_law\|articles`, `max_results` | `scholar` | `Section 230 appellate` |
 | `si_search` | Smithsonian Open Access digitized collection objects, museum artifacts, images, 3D assets, and object metadata via api.data.gov | — | `max_results` | `smithsonian` | `Apollo 11` |
 | `sos_search` | State Secretary-of-State business entity filings (Playwright; CA wired, others stubs) | — | `state: CA\|DE\|NV\|...` | `sos` | `Acme Corp` |
 | `state_election_search` | Official state election candidate roster sources and portals | `state` | `office`, `cycle`, `max_results` | `state_election` | `2026 House candidates` |
@@ -281,9 +281,9 @@ that list, so there is no drift.
 | `DPLA_API_KEY` | no | Digital Public Library of America API key — used by `tools/dpla.py`. Request with `curl -X POST https://api.dp.la/v2/api_key/<your-email>`; the emailed 32-character key is sent as `?api_key=<key>`. Connector and smoke skip cleanly when unset. |
 | `EUROPEANA_API_KEY` | no | Europeana API key — used by `tools/europeana.py`. Create a free key in your Europeana account under Manage API keys (migrated there on 2025-05-28). Sent as `?wskey=<key>` to `https://api.europeana.eu/api/v2/search.json`; connector enforces 1 RPS and smoke skips cleanly when unset. |
 | `SERPAPI_KEY` | no | SERPAPI key — required by `tools/scholar.py` (Google Scholar engine, case law + academic). Plans start at $75/mo for 5k searches across all engines; per-query ≈ $0.015. Sign up at <https://serpapi.com/>. |
-| `LINKEDIN_DATA_API_KEY` | no | LinkedIn data-broker key (default broker: Proxycurl) — required by `tools/linkedin.py`. Per-lookup ≈ $0.01–$0.05; gate fetches behind explicit planner tasks. Sign up at <https://nubela.co/proxycurl/>. |
-| `LINKEDIN_BROKER` | no | Broker recipe used by `tools/linkedin.py`. `proxycurl` (default) or `lix`; switching to `lix` consults `LIX_API_KEY` instead of `LINKEDIN_DATA_API_KEY`. |
-| `LIX_API_KEY` | no | Lix data-broker key (<https://lix-it.com/>) — only consulted when `LINKEDIN_BROKER=lix`. Similar per-lookup pricing to Proxycurl. |
+| `LINKEDIN_DATA_API_KEY` | no | Legacy Proxycurl key read by `tools/linkedin.py` when `LINKEDIN_BROKER=proxycurl`. Proxycurl's official pages now say the service is shut down; use only when an operator confirms legacy access. |
+| `LINKEDIN_BROKER` | no | Broker recipe used by `tools/linkedin.py`. `proxycurl` (legacy default, currently shut down per Nubela) or `lix`; switching to `lix` consults `LIX_API_KEY` instead of `LINKEDIN_DATA_API_KEY`. |
+| `LIX_API_KEY` | no | Lix data-broker key (<https://lix-it.com/>) — only consulted when `LINKEDIN_BROKER=lix`. Paid/gated; review budget and terms before planner use. |
 | `RESEARCH_REDDIT_USER_AGENT` | no | Override the User-Agent `tools/reddit.py` sends. Reddit's anonymous JSON endpoint 403s the project's descriptive UA; the connector defaults to a Chrome UA. Set this when you have a registered OAuth app or want a different override than `RESEARCH_USER_AGENT` (consulted next in the fallback chain). |
 | `RESEARCH_MODELS_CONFIG` | no | Path to the models routing YAML the daemon loads. Defaults to `config/models.yaml` relative to cwd. Set when running out-of-tree or pointing at a packaged config. |
 | `RESEARCH_DB_PATH` | no | Override the SQLite index path the daemon uses. Unset uses `data/index.sqlite`. Useful for isolating runs under test or pointing at a writable disk. |
diff --git a/docs/API_KEYS.md b/docs/API_KEYS.md
index 9909173..1413df9 100644
--- a/docs/API_KEYS.md
+++ b/docs/API_KEYS.md
@@ -83,14 +83,16 @@ explicitly per #113).
 | Connector | Issue | Env var | Where to get it | Approx cost |
 |---|---|---|---|---|
 | Google Scholar via SERPAPI | #114 | `SERPAPI_KEY` | <https://serpapi.com/users/sign_up> | $75/mo for 5K queries (Scholar is one engine of many they offer) |
-| LinkedIn via Proxycurl (default broker) | #115 | `LINKEDIN_DATA_API_KEY` | <https://nubela.co/proxycurl/> → Sign up → API Key | $0.01–$0.05 per profile lookup |
-| LinkedIn via Lix (alternate broker) | #115 | `LIX_API_KEY` (set `LINKEDIN_BROKER=lix` to switch) | <https://lix-it.com/> → Sign up | Similar per-lookup pricing to Proxycurl |
+| LinkedIn via Proxycurl (legacy default broker) | #115/#320 | `LINKEDIN_DATA_API_KEY` | Proxycurl official pages now say the service is shut down; use only if an operator confirms legacy access | Historical broker pricing only |
+| LinkedIn via Lix (alternate broker) | #115/#320 | `LIX_API_KEY` (set `LINKEDIN_BROKER=lix` to switch) | <https://lix-it.com/> → Sign up | Paid/gated Lix credits; review current pricing and terms |
 
 For LinkedIn the connector is **broker-pluggable**: `LINKEDIN_BROKER`
-selects the recipe (`proxycurl` by default, or `lix`). Each broker
-reads its own key — see the rows above. Adding another broker is a
-recipe-layer change in `tools/linkedin.py`. Use whichever broker your
-wallet and TOS comfort allow.
+selects the recipe (`proxycurl` by default, or `lix`). Proxycurl is retained
+only as a legacy code path because Nubela now says it is shut down; NinjaPear
+is the successor platform but is not implemented by this connector. Each
+broker reads its own key — see the rows above. Adding another broker is a
+recipe-layer change in `tools/linkedin.py`. Use broker data only when your
+wallet, terms review, and source-provenance needs allow it.
 
 ---
 
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index 0a1a181..6508dea 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -111,3 +111,41 @@ the epic) reads `metadata.parent_file` to build one
 artifact. Stage 1 in this PR series is just the ingestion plumbing —
 M1.2 wires the per-page coverage ledger, and M2 fills in the
 extraction / rollup tasks.
+
+## Candidate Roster Handoff Backtest
+
+The 2026 federal candidate-roster regression runs without live network access:
+
+```bash
+UV_CACHE_DIR=.uv-cache uv run pytest tests/test_candidate_roster_backtest.py -q
+```
+
+It covers planner handoff failures before enqueue: grouped
+`state_election_search` tasks without `state`, full state names repaired to
+postal abbreviations, empty FEC candidate searches repaired to
+`kind=candidates_enumerate` when structured filters exist, and rejected when
+they do not.
+
+When LM Studio and live source access are available, run a short local smoke
+from the repo root:
+
+```bash
+UV_CACHE_DIR=.uv-cache uv run research start \
+  --skip-intake \
+  --local \
+  --max-tasks 10 \
+  --goal "As of May 15, 2026, create a complete sourced state-by-state list of every U.S. House and Senate candidate in all 50 states."
+```
+
+Then inspect the job events for connector-contract repairs/rejections and
+cadence diagnostics:
+
+```bash
+jq 'select(.kind=="connector_contract_rejected" or .kind=="connector_contract_repaired" or .kind=="warning" or (.kind=="checkpoint" and (.payload.checkpoint_kind=="synthesis_done" or .payload.checkpoint_kind=="critique_done")))' \
+  jobs/<job-id>/events.jsonl
+```
+
+The smoke is healthy when malformed connector tasks are absent or logged as
+contract repairs/rejections before dispatch, local model routing is visible in
+the daemon logs, and any synthesis/critique failure appears as a `warning`
+event instead of a quiet pending-task stall.
diff --git a/src/research_agent/skills/connectors/linkedin.md b/src/research_agent/skills/connectors/linkedin.md
new file mode 100644
index 0000000..1dd09a4
--- /dev/null
+++ b/src/research_agent/skills/connectors/linkedin.md
@@ -0,0 +1,122 @@
+---
+name: linkedin
+description: "Paid LinkedIn person/company lookup via broker; Proxycurl is shut down, Lix remains code-supported but gated."
+when_to_use: "Narrow people or company discovery when an operator has explicitly approved a paid LinkedIn-data broker and the result will be treated as a lead."
+when_not_to_use: "Automatic broad sweeps, local/offline runs, compliance decisions, or cases where a public primary source can prove the same fact."
+---
+
+# LinkedIn connector
+
+Use `linkedin_search` only after confirming that paid/gated broker access is
+allowed for the job. LinkedIn profile data is third-party broker evidence, not
+an official record. Prefer official biographies, filings, employer pages, court
+records, campaign records, or `web_search` before spending broker credits.
+
+## Official documentation
+
+- Proxycurl shutdown status: https://nubela.co/proxycurl/auth/register.html
+- Proxycurl 2026 status and NinjaPear successor note: https://nubela.co/blog/what-is-proxycurl-api-now-in-2026-im-the-founder/
+- Proxycurl shutdown notice: https://nubela.co/blog/goodbye-proxycurl/
+- NinjaPear API reference and pricing, for successor context not implemented by this connector: https://nubela.co/docs and https://nubela.co/pricing
+- Lix API reference: https://lix-it.com/docs/
+- Lix LinkedIn API pricing/status page: https://lix-it.com/pages/linkedin-api
+- Lix terms: https://lix-it.com/terms?currency=usd
+
+## Auth and cost
+
+- Required env vars:
+  - `LINKEDIN_BROKER` selects the broker recipe. Valid values in code are
+    `proxycurl` and `lix`; unset defaults to `proxycurl`.
+  - `LINKEDIN_DATA_API_KEY` is read when `LINKEDIN_BROKER=proxycurl`.
+  - `LIX_API_KEY` is read when `LINKEDIN_BROKER=lix`.
+- Proxycurl: official Nubela pages say Proxycurl is no longer in service.
+  Do not assume the default `proxycurl` recipe is healthy. Run it only for a
+  deliberate legacy account test where the operator has confirmed access.
+- NinjaPear: official successor path from the Proxycurl founder, but this
+  connector does not implement NinjaPear endpoints. Do not set
+  `LINKEDIN_BROKER=ninjapear` unless code is added.
+- Lix: paid/gated. Lix documents Standard Credits and 1 credit per LinkedIn
+  API call. Use only with operator-approved budget and terms review.
+- Anonymous/local behavior: there is no anonymous or offline fallback. Missing
+  broker keys raise a missing-credential error or cause smoke to skip/fail.
+
+## Required payload fields
+
+- `query` - required common field; use a specific person or company name.
+- `sub_question` - required common field; state what the LinkedIn lead should
+  help prove.
+- Connector-specific required fields: none.
+
+## Knobs available
+
+- `kind` - optional; valid values are `person` and `company`; default is
+  `person`.
+- `max_results` - optional client cap. Keep it small because each broker call
+  can spend credits and create terms risk.
+
+## Valid payload examples
+
+```yaml
+kind: linkedin_search
+payload:
+  query: "Sundar Pichai"
+  sub_question: "Find a LinkedIn person-profile lead for Sundar Pichai."
+  kind: person
+  max_results: 3
+```
+
+```yaml
+kind: linkedin_search
+payload:
+  query: "Anthropic"
+  sub_question: "Find the LinkedIn company-profile lead for Anthropic."
+  kind: company
+  max_results: 3
+```
+
+## Request and pagination pattern
+
+The connector resolves the configured broker, sends one person or company
+search request, and returns up to `max_results` rows. Fetch accepts
+`https://www.linkedin.com/in/...` and `https://www.linkedin.com/company/...`
+URLs and asks the same broker for profile/company enrichment.
+
+For Lix, the code-supported search recipes use LinkedIn search endpoints under
+`https://api.lix-it.com/v1/li/linkedin/search/...` with an `Authorization`
+header. Lix documents person and organization search as one Standard Credit per
+call, with response paging metadata in the broker response. The current
+connector does not expose an explicit page or pagination-token payload knob.
+
+## Failure modes
+
+- Proxycurl shutdown: official status pages say it is no longer in service.
+  Treat `proxycurl` failures as expected unless an operator confirms a working
+  legacy account.
+- Legal/terms risk: LinkedIn has no public API for these lookups. Do not run
+  automatic broad discovery, do not bypass account protections, and do not
+  use broker data as the sole basis for an adverse or compliance claim.
+- Missing credentials: do not silently fall back to web scraping.
+- Rate limits or broker errors: retry only with explicit backoff and only if
+  the job still has approval to spend credits.
+- True no-result behavior: use official or public web sources before assuming
+  the person/company lacks a profile.
+
+## Evidence shape
+
+`SearchResult` rows use `source_kind="linkedin"`, a LinkedIn URL, title,
+snippet, and `extras` containing `kind`, `broker`, and person/company fields
+such as location, current company/title, industry, headcount, or HQ location.
+
+`fetch()` returns `Source.cleaned_text` as a markdown profile or company
+summary and `metadata` with broker, kind, profile URL, and parsed facts. Cite
+it as broker-derived context only. Corroborate employment, affiliation,
+headcount, and titles against primary or public sources before synthesis.
+
+## Anti-patterns
+
+- Do not use `linkedin_search` in local/offline runs or unbudgeted loops.
+- Do not imply Proxycurl is healthy in 2026; official pages say it is shut
+  down and NinjaPear is not a drop-in replacement implemented here.
+- Do not auto-fetch every LinkedIn URL found by web search.
+- Do not treat profile data as an official identity, employment, or sanctions
+  record.
diff --git a/src/research_agent/skills/connectors/sanctions.md b/src/research_agent/skills/connectors/sanctions.md
new file mode 100644
index 0000000..18b3ed1
--- /dev/null
+++ b/src/research_agent/skills/connectors/sanctions.md
@@ -0,0 +1,129 @@
+---
+name: sanctions
+description: "OFAC sanctions screening with local SQLite cache plus legacy EU/UK rows; compliance-sensitive and freshness-critical."
+when_to_use: "Name, alias, EIN, vessel, or entity screening against the local sanctions index when source freshness can be checked and cited."
+when_not_to_use: "Final compliance determinations, stale UK/OFSI-only screening, unavailable EU bulk refreshes, or any task that needs legal advice."
+---
+
+# Sanctions connector
+
+Use `sanctions_search` for sanctions-screening leads and citations to official
+list entries. This connector is compliance-sensitive: a result is not a legal
+determination, and a non-result is not clearance. Always inspect source
+freshness and list authority before relying on output.
+
+## Official documentation
+
+- OFAC Sanctions List Service: https://ofac.treasury.gov/sanctions-list-service
+- OFAC Advanced Sanctions List Standard FAQ: https://ofac.treasury.gov/sdn-list-data-formats-data-schemas/frequently-asked-questions-on-advanced-sanctions-list-standard
+- OFAC compliance match guidance entry point: https://ofac.treasury.gov/ofac-compliance-hotline
+- EU sanctions resources and Financial Sanctions Database pointer:
+  https://finance.ec.europa.eu/eu-and-world/sanctions-restrictive-measures/overview-sanctions-and-related-resources_en
+- UK Sanctions List current source and formats:
+  https://www.gov.uk/government/publications/the-uk-sanctions-list
+- UK single-list transition guidance:
+  https://www.gov.uk/guidance/moving-to-a-single-list-for-uk-sanctions-designations-28-january-2026
+- Legacy OFSI search closure page: https://sanctionssearchapp.ofsi.hmtreasury.gov.uk/
+
+## Auth and cost
+
+- Required env vars: none for normal use.
+- Optional env vars:
+  - `SANCTIONS_DB_PATH` overrides the local SQLite sanctions index path. Unset
+    uses the module default under `data/sanctions.sqlite`.
+- Cost: official OFAC/UK/EU list access is public, but the EU bulk path used
+  by historical code may be unavailable or gated. Do not use scraped or
+  third-party mirrors as authoritative replacements without a separate issue.
+- Local/offline behavior: searches use the local SQLite index. If the cache is
+  missing and source refresh fails, the connector can return empty results.
+  Offline runs are useful only when `research doctor` or index metadata confirms
+  a recent successful refresh for the list being searched.
+
+## Required payload fields
+
+- `query` - required common field; use a person, entity, alias, vessel, or ID.
+- `sub_question` - required common field; state the list and match question.
+- Connector-specific required fields: none.
+
+## Knobs available
+
+- `max_results` - optional client cap.
+- `kinds` - optional list-kind filter. Valid values are `SDN`, `EU`, and `UK`.
+  Use this only when the source freshness caveats below are acceptable.
+
+## Valid payload examples
+
+```yaml
+kind: sanctions_search
+payload:
+  query: "Wagner Group"
+  sub_question: "Screen Wagner Group against the current OFAC sanctions index."
+  kinds:
+    - SDN
+  max_results: 5
+```
+
+```yaml
+kind: sanctions_search
+payload:
+  query: "Abramovich"
+  sub_question: "Look for legacy UK sanctions-index rows and verify freshness before use."
+  kinds:
+    - UK
+  max_results: 5
+```
+
+## Request and pagination pattern
+
+`sanctions_search` searches the local SQLite index using FTS first, then a
+normalized fuzzy fallback. The index refresh path currently fetches OFAC
+`sdn.xml` and parses the basic SDN schema. OFAC's Sanctions List Service is the
+current official entry point for SDN and non-SDN list downloads; OFAC's FAQ
+says the advanced XML files contain the same core list data and add metadata.
+
+EU: the European Commission points users to the Financial Sanctions Database
+for the consolidated list. The code keeps the historical FSD XML URL as a
+fetchable source reference, but the refresh path is disabled because that bulk
+endpoint has returned 403 in this project. Treat EU rows as stale unless a
+successful refresh is proven.
+
+UK: the UK Sanctions List is now the authoritative source for all UK sanctions
+designations. The OFSI Consolidated List and legacy OFSI search page stopped
+updating on 2026-01-28. Any local rows produced from the old OFSI feed are
+reference-only for post-transition UK screening. Current UK work must cite the
+UKSL formats at `https://sanctionslist.fcdo.gov.uk/docs/UK-Sanctions-List.*`.
+
+## Failure modes
+
+- Stale local index: check refresh metadata before using output. A fresh OFAC
+  row and a stale UK/EU row have different evidentiary value.
+- UK source transition: OFSI Consolidated List data after 2026-01-28 is stale
+  for current UK designations. Use UKSL as the current authority.
+- EU bulk access: prior unauthenticated bulk XML access can fail with 403.
+  Do not interpret missing EU rows as no EU sanctions match.
+- Fuzzy matches: fallback rows are leads and can be false positives. Confirm
+  aliases, identifiers, dates of birth, addresses, vessels, or official IDs.
+- Legal boundary: sanctions screening can support research, but final match
+  decisions require the relevant regulator's guidance and human/legal review.
+
+## Evidence shape
+
+`SearchResult` rows use `source_kind="sanctions"`, official or local-detail
+URLs, a matched name, snippet, and `extras` including `uid`, `list_kind`,
+`sanctioning_agency`, `programs`, designation date, aliases, IDs, and `fuzzy`
+when the fallback search produced the row.
+
+`fetch()` resolves OFAC detail URLs through the local index, fetches OFAC Recent
+Actions, or returns a bulk-list source summary for EU/UK list URLs. Cite the
+official list source and retrieval timestamp. For UK, cite the current UKSL
+format page, not the closed OFSI search, for current designations.
+
+## Anti-patterns
+
+- Do not treat a no-result response as sanctions clearance.
+- Do not use stale OFSI rows as current UK sanctions evidence after
+  2026-01-28.
+- Do not hide list freshness, disabled EU refresh, or fuzzy-match status from
+  synthesis.
+- Do not use this connector for legal advice or outbound compliance action
+  without human review.
diff --git a/src/research_agent/skills/connectors/scholar.md b/src/research_agent/skills/connectors/scholar.md
new file mode 100644
index 0000000..9f05580
--- /dev/null
+++ b/src/research_agent/skills/connectors/scholar.md
@@ -0,0 +1,111 @@
+---
+name: scholar
+description: "Google Scholar through SerpAPI for legal case leads and scholarly result discovery; paid API key required."
+when_to_use: "Case-law discovery through Google Scholar or a targeted scholarly search where OpenAlex/CourtListener do not cover the need."
+when_not_to_use: "Broad academic literature reviews that OpenAlex can answer for free, primary court records available from CourtListener, or local/offline runs without SERPAPI_KEY."
+---
+
+# Scholar connector
+
+Use `scholar_search` for Google Scholar result discovery through SerpAPI. It is
+a paid/keyed connector and should be treated as a lead generator. Fetch the
+result URL or a primary source before relying on any claim in synthesis.
+
+## Official documentation
+
+- SerpAPI Google Scholar API: https://serpapi.com/google-scholar-api
+- SerpAPI pricing: https://serpapi.com/pricing
+- SerpAPI terms: https://serpapi.com/legal
+- OpenAlex connector skill for the preferred free scholarly path:
+  `src/research_agent/skills/connectors/openalex.md`
+
+## Auth and cost
+
+- Required env vars:
+  - `SERPAPI_KEY` is required for every live `scholar_search` call.
+- Paid constraints: SerpAPI requires a private API key and bills searches
+  against the operator's plan. Cached SerpAPI hits may be free under SerpAPI's
+  cache policy, but the connector should still be planned as paid/gated.
+- Anonymous/local behavior: no anonymous fallback exists. In `--local` model
+  mode the connector still needs `SERPAPI_KEY`; local LLM routing does not make
+  Google Scholar local or free.
+
+## Required payload fields
+
+- `query` - required common field; use a compact Google Scholar query.
+- `sub_question` - required common field; state what the Scholar hit should
+  help prove.
+- Connector-specific required fields: none.
+
+## Knobs available
+
+- `kind` - optional; valid values are `case_law` and `articles`; default is
+  `case_law`.
+- `max_results` - optional client cap. SerpAPI documents `num` as 1-20 for
+  the Google Scholar engine; the connector clamps the request to 20.
+
+## Valid payload examples
+
+```yaml
+kind: scholar_search
+payload:
+  query: "Section 230 appellate immunity platform moderation"
+  sub_question: "Find Google Scholar case-law leads on Section 230 moderation immunity."
+  kind: case_law
+  max_results: 5
+```
+
+```yaml
+kind: scholar_search
+payload:
+  query: "unitary executive theory Project 2025"
+  sub_question: "Find scholarly article leads on unitary executive theory and Project 2025."
+  kind: articles
+  max_results: 10
+```
+
+## Request and pagination pattern
+
+The connector calls SerpAPI with `engine=google_scholar`, `q=<query>`,
+`api_key=<SERPAPI_KEY>`, and `num=min(max_results, 20)`. For `kind=case_law`,
+the connector adds `as_sdt=2006`, which is the current code path for legal
+case results. For `kind=articles`, the connector omits `as_sdt`.
+
+SerpAPI also documents `start` for pagination and `no_cache=true` to force a
+fresh fetch when an exact cached search exists. The current registry contract
+does not expose `start` or `no_cache`; do not promise pagination or minute-level
+freshness from planner payloads until those knobs are added.
+
+## Failure modes
+
+- Missing `SERPAPI_KEY`: reject the task or surface the missing credential.
+- Paid quota exhausted: stop automatic retries and prefer free/open connectors.
+- Cache freshness: SerpAPI says exact cached searches can be served for up to
+  one hour unless `no_cache=true`; this connector does not currently expose
+  `no_cache`.
+- Empty results: simplify the query, try `kind=articles`, or use
+  `openalex_search`; do not synthesize absence of scholarship from one empty
+  Google Scholar call.
+- Legal source authority: Google Scholar legal hits are discovery leads. Use
+  `courtlistener_search` or official court sources for primary case text when
+  available.
+
+## Evidence shape
+
+`SearchResult` rows use `source_kind="scholar"` with Scholar or publisher
+URLs, titles, snippets, optional publication year, and `extras` such as
+`kind`, citation links, publication info, and PDF links when SerpAPI returns
+them.
+
+`fetch()` returns a `Source` from the selected Scholar result URL. PDFs are
+extracted through the PDF tool; HTML is extracted with readability/trafilatura.
+Treat Scholar metadata as a lead until the fetched source is available.
+
+## Anti-patterns
+
+- Do not use `scholar_search` for broad scholarly sweeps when
+  `openalex_search` can provide free structured metadata and OA URLs.
+- Do not run `scholar_search` automatically in unbudgeted local jobs.
+- Do not treat cached Scholar result snippets as current legal authority.
+- Do not cite a Scholar result row when the actual opinion, article, or PDF can
+  be fetched.
diff --git a/src/research_agent/tools/linkedin.py b/src/research_agent/tools/linkedin.py
index 74cf37c..1751519 100644
--- a/src/research_agent/tools/linkedin.py
+++ b/src/research_agent/tools/linkedin.py
@@ -34,7 +34,7 @@
 import re
 import time
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, Literal
 from urllib.parse import urlparse
 
 import httpx
@@ -899,7 +899,7 @@ def reset_for_tests() -> None:
 
 
 class _PayloadSchema(_BaseSearchPayload):
-    kind: str | None = None
+    kind: Literal["person", "company"] | None = None
     max_results: int | None = None
 
 
@@ -909,13 +909,11 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("linkedin.com", "www.linkedin.com"),
-    skill_name=None,
-    skill_exemption="#320 paid/gated connector skill backfill",
     description=(
-        "LinkedIn person/company lookup via Proxycurl or Lix — requires"
-        " broker key"
+        "LinkedIn person/company lookup via broker; paid/TOS-sensitive and"
+        " Proxycurl is shut down"
     ),
-    optional_payload_knobs="`kind: person\\|company`",
+    optional_payload_knobs="`kind: person\\|company`, `max_results`",
     example_query="Sundar Pichai",
     module_name="linkedin",
 )
diff --git a/src/research_agent/tools/sanctions.py b/src/research_agent/tools/sanctions.py
index 0409f6a..9a030d6 100644
--- a/src/research_agent/tools/sanctions.py
+++ b/src/research_agent/tools/sanctions.py
@@ -53,7 +53,7 @@
 from collections.abc import Callable
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 from urllib.parse import parse_qs, urlparse
 
 import httpx
@@ -1095,6 +1095,7 @@ def reset_for_tests() -> None:
 
 class _PayloadSchema(_BaseSearchPayload):
     max_results: int | None = None
+    kinds: list[Literal["SDN", "EU", "UK"]] | None = None
 
 
 _register_kind(
@@ -1107,10 +1108,11 @@ class _PayloadSchema(_BaseSearchPayload):
         "home.treasury.gov",
         "webgate.ec.europa.eu",
     ),
-    skill_name=None,
-    skill_exemption="#320 paid/gated and sanctions connector skill backfill",
-    description="OFAC SDN + UK sanctions lists (local index, no auth)",
-    optional_payload_knobs="—",
+    description=(
+        "OFAC sanctions screening plus legacy EU/UK local rows; check source"
+        " freshness before compliance use"
+    ),
+    optional_payload_knobs="`max_results`, `kinds: [SDN|EU|UK]`",
     example_query="Wagner Group",
     module_name="sanctions",
 )
diff --git a/src/research_agent/tools/scholar.py b/src/research_agent/tools/scholar.py
index 0725856..49715eb 100644
--- a/src/research_agent/tools/scholar.py
+++ b/src/research_agent/tools/scholar.py
@@ -26,7 +26,7 @@
 import re
 import time
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, Literal
 from urllib.parse import urlparse
 
 import httpx
@@ -434,7 +434,7 @@ def reset_for_tests() -> None:
 
 
 class _PayloadSchema(_BaseSearchPayload):
-    kind: str | None = None
+    kind: Literal["case_law", "articles"] | None = None
     max_results: int | None = None
 
 
@@ -444,10 +444,8 @@ class _PayloadSchema(_BaseSearchPayload):
     search_fn=search,
     fetch_fn=fetch,
     host_patterns=("scholar.google.com",),
-    skill_name=None,
-    skill_exemption="#320 paid/gated connector skill backfill",
     description="Google Scholar via SerpAPI — requires `SERPAPI_KEY`",
-    optional_payload_knobs="`kind: case_law\\|articles`",
+    optional_payload_knobs="`kind: case_law\\|articles`, `max_results`",
     example_query="Section 230 appellate",
     module_name="scholar",
 )
diff --git a/tests/research_agent/tools/test_registry.py b/tests/research_agent/tools/test_registry.py
index 3148a60..4526b96 100644
--- a/tests/research_agent/tools/test_registry.py
+++ b/tests/research_agent/tools/test_registry.py
@@ -367,6 +367,7 @@ def test_live_registry_skill_name_assignment() -> None:
         "iwm_search": "iwm",
         "lda_search": "lda",
         "licensing_search": "licensing",
+        "linkedin_search": "linkedin",
         "littlesis_search": "littlesis",
         "loc_search": "loc",
         "nara_search": "nara",
@@ -375,6 +376,8 @@ def test_live_registry_skill_name_assignment() -> None:
         "opencorporates_search": "opencorporates",
         "openlibrary_search": "openlibrary",
         "persee_search": "persee",
+        "sanctions_search": "sanctions",
+        "scholar_search": "scholar",
         "si_search": "smithsonian",
         "sos_search": "sos",
         "state_election_search": "state_election",
diff --git a/tests/test_candidate_roster_backtest.py b/tests/test_candidate_roster_backtest.py
index 53f04f4..c055f70 100644
--- a/tests/test_candidate_roster_backtest.py
+++ b/tests/test_candidate_roster_backtest.py
@@ -13,13 +13,18 @@
 
 import pytest
 
+from research_agent.orchestrator import plan as plan_module
 from research_agent.orchestrator import synth as synth_module
-from research_agent.orchestrator.loop import default_handlers, run_loop
-from research_agent.orchestrator.plan import Plan, Subgoal, TaskSpec
+from research_agent.orchestrator.loop import (
+    HEURISTIC_CHECK_EVERY_N,
+    default_handlers,
+    run_loop,
+)
+from research_agent.orchestrator.plan import Plan, PlanParseError, Subgoal, TaskSpec
 from research_agent.storage import artifacts, coverage, db
 from research_agent.storage.jobs import Job
 from research_agent.storage.markdown import write_plan
-from research_agent.storage.tasks import enqueue
+from research_agent.storage.tasks import STATUS_DONE, STATUS_PENDING, enqueue
 from research_agent.tools import fec, state_election
 
 GOAL = (
@@ -256,6 +261,277 @@ def _make_job(tmp_path: Path) -> tuple[Job, Plan]:
     return job, plan
 
 
+def _make_handoff_job(tmp_path: Path, *, db_name: str = "handoff.sqlite") -> Job:
+    db_path = tmp_path / db_name
+    db.migrate(path=db_path).close()
+    return Job.create(
+        {"goal": GOAL, "domain": "political"},
+        jobs_root=tmp_path / "jobs",
+        db_path=db_path,
+        today=date(2026, 5, 15),
+    )
+
+
+def _candidate_roster_handoff_plan(task_template: list[TaskSpec]) -> Plan:
+    return Plan(
+        version=1,
+        objective="Backtest 2026 candidate-roster connector handoff.",
+        subgoals=[
+            Subgoal(
+                id=1,
+                description="Verify 2026 House and Senate candidate-roster tasks.",
+            )
+        ],
+        task_template=task_template,
+        expected_iterations=1,
+        scope_class="comprehensive",
+    )
+
+
+def _event_payloads(db_path: Path, job_id: str, kind: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT payload_json FROM events"
+            " WHERE job_id = ? AND kind = ? ORDER BY id ASC",
+            (job_id, kind),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [json.loads(row["payload_json"]) for row in rows]
+
+
+def _task_payloads(db_path: Path, job_id: str) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT payload_json FROM tasks WHERE job_id = ? ORDER BY id ASC",
+            (job_id,),
+        ).fetchall()
+    finally:
+        conn.close()
+    return [json.loads(row["payload_json"]) for row in rows]
+
+
+def _candidate_roster_stall_diagnostics(
+    db_path: Path,
+    job_id: str,
+) -> list[dict[str, Any]]:
+    conn = db.connect(db_path)
+    try:
+        tasks = [
+            dict(row)
+            for row in conn.execute(
+                "SELECT id, kind, status FROM tasks"
+                " WHERE job_id = ? ORDER BY id ASC",
+                (job_id,),
+            ).fetchall()
+        ]
+        checkpoints = [
+            dict(row)
+            for row in conn.execute(
+                "SELECT kind, payload_json FROM checkpoints"
+                " WHERE job_id = ? ORDER BY id ASC",
+                (job_id,),
+            ).fetchall()
+        ]
+    finally:
+        conn.close()
+
+    done_count = sum(1 for task in tasks if task["status"] == STATUS_DONE)
+    pending_connector_tasks = [
+        task
+        for task in tasks
+        if task["status"] == STATUS_PENDING
+        and task["kind"] in {"fec_search", "state_election_search"}
+    ]
+    checkpoint_kinds = {checkpoint["kind"] for checkpoint in checkpoints}
+    diagnostics: list[dict[str, Any]] = []
+    if not pending_connector_tasks:
+        return diagnostics
+
+    pending_kinds = sorted({str(task["kind"]) for task in pending_connector_tasks})
+    if done_count >= HEURISTIC_CHECK_EVERY_N and "synthesis_done" not in checkpoint_kinds:
+        diagnostics.append(
+            {
+                "condition": "synthesis_cadence_pending_stall",
+                "tasks_done": done_count,
+                "pending_task_kinds": pending_kinds,
+                "message": (
+                    "candidate-roster backtest reached synthesis cadence with"
+                    " pending connector tasks but no synthesis_done checkpoint"
+                ),
+            }
+        )
+    if done_count >= HEURISTIC_CHECK_EVERY_N * 2 and "critique_done" not in checkpoint_kinds:
+        diagnostics.append(
+            {
+                "condition": "critique_cadence_pending_stall",
+                "tasks_done": done_count,
+                "pending_task_kinds": pending_kinds,
+                "message": (
+                    "candidate-roster backtest reached critique cadence with"
+                    " pending connector tasks but no critique_done checkpoint"
+                ),
+            }
+        )
+    return diagnostics
+
+
+def test_candidate_roster_handoff_rejects_grouped_state_task_without_state(
+    tmp_path: Path,
+) -> None:
+    job = _make_handoff_job(tmp_path)
+    plan = _candidate_roster_handoff_plan(
+        [
+            TaskSpec(
+                kind="state_election_search",
+                payload={
+                    "query": "2026 House and Senate candidates in AL, AK, AZ, AR, CA",
+                    "sub_question": "Find 2026 House and Senate candidates in AL, AK, AZ, AR, CA.",
+                    "cycle": 2026,
+                    "office": "House",
+                },
+            )
+        ]
+    )
+
+    with pytest.raises(PlanParseError, match="state"):
+        plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    assert _task_payloads(job.db_path, job.id) == []
+    [event] = _event_payloads(job.db_path, job.id, "connector_contract_rejected")
+    assert event["stage"] == "pre_enqueue"
+    assert event["kind"] == "state_election_search"
+    assert event["plan_task_index"] == 0
+    assert "state" in event["message"]
+
+
+def test_candidate_roster_handoff_normalizes_full_state_name_before_enqueue(
+    tmp_path: Path,
+) -> None:
+    job = _make_handoff_job(tmp_path)
+    plan = _candidate_roster_handoff_plan(
+        [
+            TaskSpec(
+                kind="state_election_search",
+                payload={
+                    "query": "2026 House candidates",
+                    "sub_question": "Find Colorado 2026 House candidate rows.",
+                    "cycle": 2026,
+                    "state": "Colorado",
+                    "office": "House",
+                },
+            )
+        ]
+    )
+
+    plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    [payload] = _task_payloads(job.db_path, job.id)
+    assert payload["state"] == "CO"
+    [event] = _event_payloads(job.db_path, job.id, "connector_contract_repaired")
+    assert event["stage"] == "pre_enqueue"
+    assert event["before"]["state"] == "Colorado"
+    assert event["after"]["state"] == "CO"
+
+
+def test_candidate_roster_handoff_repairs_empty_fec_candidates_to_enumeration(
+    tmp_path: Path,
+) -> None:
+    job = _make_handoff_job(tmp_path)
+    plan = _candidate_roster_handoff_plan(
+        [
+            TaskSpec(
+                kind="fec_search",
+                payload={
+                    "query": "",
+                    "sub_question": "Enumerate 2026 California House candidates from FEC.",
+                    "kind": "candidates",
+                    "cycle": 2026,
+                    "office": "House",
+                    "state": "California",
+                },
+            )
+        ]
+    )
+
+    plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    [payload] = _task_payloads(job.db_path, job.id)
+    assert payload["kind"] == "candidates_enumerate"
+    assert payload["office"] == "H"
+    assert payload["state"] == "CA"
+    [event] = _event_payloads(job.db_path, job.id, "connector_contract_repaired")
+    assert event["after"]["kind"] == "candidates_enumerate"
+
+
+def test_candidate_roster_handoff_rejects_empty_fec_candidates_without_filters(
+    tmp_path: Path,
+) -> None:
+    job = _make_handoff_job(tmp_path)
+    plan = _candidate_roster_handoff_plan(
+        [
+            TaskSpec(
+                kind="fec_search",
+                payload={
+                    "query": "",
+                    "sub_question": "Search all FEC candidates with an empty query.",
+                    "kind": "candidates",
+                },
+            )
+        ]
+    )
+
+    with pytest.raises(PlanParseError, match="query must be non-empty"):
+        plan_module._enqueue_plan_tasks(job, plan)  # noqa: SLF001
+
+    assert _task_payloads(job.db_path, job.id) == []
+    [event] = _event_payloads(job.db_path, job.id, "connector_contract_rejected")
+    assert event["kind"] == "fec_search"
+    assert "query must be non-empty" in event["message"]
+
+
+def test_candidate_roster_backtest_reports_missing_cadence_checkpoints(
+    tmp_path: Path,
+) -> None:
+    job = _make_handoff_job(tmp_path)
+    done_specs = [
+        TaskSpec(kind="web_search", payload={"query": f"completed task {idx}"})
+        for idx in range(HEURISTIC_CHECK_EVERY_N * 2)
+    ]
+    pending_spec = TaskSpec(
+        kind="state_election_search",
+        payload={
+            "query": "2026 House candidates",
+            "sub_question": "Find Colorado state-election House candidate rows.",
+            "cycle": 2026,
+            "state": "CO",
+            "office": "House",
+        },
+    )
+    ids = enqueue(job, [*done_specs, pending_spec], plan_version=1)
+    conn = db.connect(job.db_path)
+    try:
+        with conn:
+            for task_id in ids[:-1]:
+                conn.execute(
+                    "UPDATE tasks SET status = ? WHERE id = ?",
+                    (STATUS_DONE, task_id),
+                )
+    finally:
+        conn.close()
+
+    diagnostics = _candidate_roster_stall_diagnostics(job.db_path, job.id)
+
+    assert [item["condition"] for item in diagnostics] == [
+        "synthesis_cadence_pending_stall",
+        "critique_cadence_pending_stall",
+    ]
+    assert all(item["tasks_done"] == HEURISTIC_CHECK_EVERY_N * 2 for item in diagnostics)
+    assert all(item["pending_task_kinds"] == ["state_election_search"] for item in diagnostics)
+
+
 @pytest.mark.asyncio
 async def test_candidate_roster_fixture_backtest_completes_or_gaps_honestly(
     tmp_path: Path,
diff --git a/tests/test_skills_content.py b/tests/test_skills_content.py
index 37d749b..d2da087 100644
--- a/tests/test_skills_content.py
+++ b/tests/test_skills_content.py
@@ -29,6 +29,7 @@
     "usaspending",
 )
 ISSUE_319_CONNECTOR_SKILLS = ("bbb", "calaccess", "licensing")
+ISSUE_320_CONNECTOR_SKILLS = ("linkedin", "scholar", "sanctions")
 
 STRATEGY_SKILLS = (
     "modern-policy-era-filtering",
@@ -256,6 +257,122 @@ def test_issue_319_site_navigation_source_boundaries() -> None:
     assert "unsupported" in licensing
 
 
+@pytest.mark.parametrize("name", ISSUE_320_CONNECTOR_SKILLS)
+def test_issue_320_connector_skill_loads(name: str) -> None:
+    body = load_skill("connectors", name)
+    assert len(body) > 500
+    entries = {entry["name"]: entry for entry in list_skills("connectors")}
+    assert entries[name]["description"]
+    assert entries[name]["when_to_use"]
+    assert entries[name]["when_not_to_use"]
+
+
+@pytest.mark.parametrize("name", ISSUE_320_CONNECTOR_SKILLS)
+@pytest.mark.parametrize("section", ISSUE_318_REQUIRED_SECTIONS)
+def test_issue_320_connector_skill_has_required_sections(name: str, section: str) -> None:
+    body = load_skill("connectors", name)
+    assert re.search(rf"^##\s+{re.escape(section)}\s*$", body, re.MULTILINE)
+
+
+@pytest.mark.parametrize("name", ISSUE_320_CONNECTOR_SKILLS)
+def test_issue_320_connector_skill_examples_validate(name: str) -> None:
+    from research_agent.tools._registry import get_kind, validate_payload_contract
+
+    entry = get_kind(f"{name}_search")
+    assert entry is not None
+    assert entry.skill_name == name
+
+    body = load_skill("connectors", name)
+    examples = _skill_payload_examples(body)
+    assert examples, f"{name}: expected at least one YAML payload example"
+    for example in examples:
+        assert example["kind"] == entry.name
+        payload = example["payload"]
+        assert isinstance(payload, dict)
+        result = validate_payload_contract(entry.name, payload)
+        assert result.valid, result.repair_message
+
+
+def test_issue_320_connector_skills_cite_current_official_docs() -> None:
+    expected = {
+        "linkedin": (
+            "https://nubela.co/proxycurl/auth/register.html",
+            "https://nubela.co/blog/what-is-proxycurl-api-now-in-2026-im-the-founder/",
+            "https://lix-it.com/docs/",
+            "https://lix-it.com/pages/linkedin-api",
+        ),
+        "scholar": (
+            "https://serpapi.com/google-scholar-api",
+            "https://serpapi.com/pricing",
+        ),
+        "sanctions": (
+            "https://ofac.treasury.gov/sanctions-list-service",
+            "https://finance.ec.europa.eu/eu-and-world/sanctions-restrictive-measures/overview-sanctions-and-related-resources_en",
+            "https://www.gov.uk/government/publications/the-uk-sanctions-list",
+            "https://sanctionssearchapp.ofsi.hmtreasury.gov.uk/",
+        ),
+    }
+    for name, urls in expected.items():
+        body = load_skill("connectors", name)
+        for url in urls:
+            assert url in body
+
+
+def test_issue_320_paid_and_sanctions_caveats_are_explicit() -> None:
+    linkedin = load_skill("connectors", "linkedin")
+    assert "Proxycurl is no longer in service" in linkedin
+    assert "NinjaPear is not a drop-in replacement implemented here" in linkedin
+    assert "paid/gated" in linkedin
+
+    scholar = load_skill("connectors", "scholar")
+    assert "SERPAPI_KEY" in scholar
+    assert "openalex_search" in scholar
+    assert "no_cache" in scholar
+
+    sanctions = load_skill("connectors", "sanctions")
+    assert "UK Sanctions List is now the authoritative source" in sanctions
+    assert "OFSI Consolidated List" in sanctions
+    assert "2026-01-28" in sanctions
+    assert "EU rows as stale" in sanctions
+
+
+def test_issue_320_payload_contract_rejects_invalid_modes() -> None:
+    from research_agent.tools._registry import validate_payload_contract
+
+    linkedin_result = validate_payload_contract(
+        "linkedin_search",
+        {
+            "query": "Jane Doe",
+            "sub_question": "Find a LinkedIn lead",
+            "kind": "profile",
+        },
+    )
+    assert linkedin_result.valid is False
+    assert "kind" in linkedin_result.repair_message
+
+    scholar_result = validate_payload_contract(
+        "scholar_search",
+        {
+            "query": "Section 230",
+            "sub_question": "Find case law",
+            "kind": "cases",
+        },
+    )
+    assert scholar_result.valid is False
+    assert "kind" in scholar_result.repair_message
+
+    sanctions_result = validate_payload_contract(
+        "sanctions_search",
+        {
+            "query": "Wagner Group",
+            "sub_question": "Screen sanctions",
+            "kinds": ["OFSI"],
+        },
+    )
+    assert sanctions_result.valid is False
+    assert "kinds" in sanctions_result.repair_message
+
+
 def test_congress_skill_carries_canonical_motivator() -> None:
     """The 110th-Congress / IRA relevance trap is the headline reason this
     skill exists; the body must keep that example intact."""

From 3cb28176f76fc31582641687733c20a4754ad241 Mon Sep 17 00:00:00 2001
From: Bradley Taylor <bradtaylorsf@gmail.com>
Date: Thu, 21 May 2026 17:21:43 -0700
Subject: [PATCH 3/3] fix: pass registered connector knobs through dispatch

---
 src/research_agent/orchestrator/loop.py |  3 +++
 tests/test_orchestrator_loop.py         | 32 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/src/research_agent/orchestrator/loop.py b/src/research_agent/orchestrator/loop.py
index fbdf590..dc4bd73 100644
--- a/src/research_agent/orchestrator/loop.py
+++ b/src/research_agent/orchestrator/loop.py
@@ -219,7 +219,10 @@ async def _not_implemented_handler(job: Job, task: dict[str, Any]) -> dict[str,
 _CONNECTOR_SEARCH_PASSTHROUGH: frozenset[str] = frozenset(
     {
         "kind",
+        "kinds",
         "max_results",
+        "provider",
+        "timeout",
         "cycle",
         "office",
         "state",
diff --git a/tests/test_orchestrator_loop.py b/tests/test_orchestrator_loop.py
index 1b33abb..cb59a27 100644
--- a/tests/test_orchestrator_loop.py
+++ b/tests/test_orchestrator_loop.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import inspect
 import json
 from pathlib import Path
 from typing import Any
@@ -11,6 +12,7 @@
 from research_agent.orchestrator import plan as plan_module
 from research_agent.orchestrator.errors import FatalError, RetriableError
 from research_agent.orchestrator.loop import (
+    _CONNECTOR_SEARCH_PASSTHROUGH,
     HEURISTIC_CHECK_EVERY_N,
     MAX_DRAIN_REPLANS,
     MAX_TASKS_PER_JOB,
@@ -697,6 +699,36 @@ def test_module_constants_match_spec() -> None:
     assert RETRY_MAX_ATTEMPTS == 5
 
 
+def test_connector_search_passthrough_covers_registered_payload_contracts() -> None:
+    """Every connector-accepted schema knob must survive dispatch filtering."""
+    import research_agent.tools  # noqa: F401 - populate registry
+    from research_agent.tools._registry import iter_kinds
+
+    base_fields = {"query", "sub_question"}
+    missing: list[tuple[str, str]] = []
+    for entry in iter_kinds():
+        schema_fields = set(entry.payload_schema.model_fields) - base_fields
+        sig = inspect.signature(entry.search_fn)
+        accepts_var_kw = any(
+            p.kind is inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
+        )
+        accepted = {
+            name
+            for name, param in sig.parameters.items()
+            if param.kind
+            in (
+                inspect.Parameter.KEYWORD_ONLY,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            )
+        }
+        for field in sorted(schema_fields):
+            accepted_by_connector = accepts_var_kw or field in accepted
+            if accepted_by_connector and field not in _CONNECTOR_SEARCH_PASSTHROUGH:
+                missing.append((entry.name, field))
+
+    assert missing == []
+
+
 CONNECTOR_KIND_PREFIXES: tuple[str, ...] = (
     "congress",
     "fec",