From 74e2d406a1fda90a8b04df3bb5518debbef71c3e Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Thu, 11 Jun 2026 00:13:36 +0300
Subject: [PATCH 1/4] feat(llm): response_format=json_schema support

---
 cyberai/core/llm_client.py | 109 +++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/cyberai/core/llm_client.py b/cyberai/core/llm_client.py
index 6ab8f37..e33127b 100644
--- a/cyberai/core/llm_client.py
+++ b/cyberai/core/llm_client.py
@@ -220,6 +220,115 @@ def _call_tools_anthropic(
             stop_reason=getattr(response, "stop_reason", None),
         )
 
+    # ── structured output (sync) ──────────────────────────────────────
+
+    def structured_call(
+        self,
+        messages: List[Dict],
+        schema: Dict[str, Any],
+        schema_name: str = "response",
+        description: str = "",
+        system: Optional[str] = None,
+        agent_name: str = "unknown",
+        cacheable_system: bool = False,
+    ) -> Dict[str, Any]:
+        """Force the model to return JSON matching `schema`; returns parsed dict.
+
+        OpenAI: response_format=json_schema. Anthropic: a single forced tool
+        whose input_schema is `schema` — the tool_use input IS the structured
+        output. Ollama is unsupported. Caller validates via pydantic.
+        """
+        if self.config.provider == "openai":
+            return self._structured_openai(
+                messages, schema, schema_name, description, system, agent_name
+            )
+        elif self.config.provider == "anthropic":
+            return self._structured_anthropic(
+                messages,
+                schema,
+                schema_name,
+                description,
+                system,
+                agent_name,
+                cacheable_system,
+            )
+        else:
+            raise ValueError(f"Structured output unsupported for provider: {self.config.provider}")
+
+    def _structured_openai(
+        self, messages, schema, schema_name, description, system, agent_name="unknown"
+    ):
+        import openai
+
+        client = openai.OpenAI(api_key=self.config.api_key)
+        full_messages = []
+        if system:
+            full_messages.append({"role": "system", "content": system})
+        full_messages.extend(messages)
+        response = client.chat.completions.create(
+            model=self.config.model,
+            messages=full_messages,
+            max_tokens=self.config.max_tokens,
+            temperature=self.config.temperature,
+            response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "name": schema_name,
+                    "schema": schema,
+                    "strict": False,
+                },
+            },
+        )
+        self._record_usage(
+            agent_name,
+            getattr(response, "model", self.config.model),
+            getattr(response.usage, "prompt_tokens", 0),
+            getattr(response.usage, "completion_tokens", 0),
+        )
+        content = response.choices[0].message.content or "{}"
+        return json.loads(content)
+
+    def _structured_anthropic(
+        self,
+        messages,
+        schema,
+        schema_name,
+        description,
+        system,
+        agent_name="unknown",
+        cacheable_system=False,
+    ):
+        import anthropic
+
+        client = anthropic.Anthropic(api_key=self.config.api_key)
+        tool = {
+            "name": schema_name,
+            "description": description or f"Return a structured {schema_name}.",
+            "input_schema": schema,
+        }
+        kwargs: Dict[str, Any] = dict(
+            model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            messages=messages,
+            tools=[tool],
+            tool_choice={"type": "tool", "name": schema_name},
+        )
+        if system:
+            kwargs["system"] = _wrap_cacheable(system) if cacheable_system else system
+        response = client.messages.create(**kwargs)
+        self._record_usage(
+            agent_name,
+            getattr(response, "model", self.config.model),
+            getattr(response.usage, "input_tokens", 0),
+            getattr(response.usage, "output_tokens", 0),
+            cache_creation_tokens=getattr(response.usage, "cache_creation_input_tokens", 0) or 0,
+            cache_read_tokens=getattr(response.usage, "cache_read_input_tokens", 0) or 0,
+        )
+        for block in response.content:
+            if block.type == "tool_use":
+                return dict(block.input)
+        return {}
+
     # ── async API ─────────────────────────────────────────────────────
 
     async def acall(

From c9f98259493767b6f0f3715ab9520b865475cf5b Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Thu, 11 Jun 2026 00:15:02 +0300
Subject: [PATCH 2/4] feat(report): Pydantic-validated report from LLM

---
 cyberai/agents/report/agent.py | 53 ++++++++++++++++++++++++++++++++++
 cyberai/core/types.py          | 25 +++++++++++++++-
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/cyberai/agents/report/agent.py b/cyberai/agents/report/agent.py
index d628f35..b654876 100644
--- a/cyberai/agents/report/agent.py
+++ b/cyberai/agents/report/agent.py
@@ -6,7 +6,10 @@
 from pathlib import Path
 from typing import Any, Dict, Optional
 
+import json
+
 from cyberai.core.base_agent import BaseAgent, Tool
+from cyberai.core.types import ReportSection
 
 from .json_exporter import export_json
 from .markdown_renderer import render_markdown
@@ -58,9 +61,59 @@ def run(self, target: str, context: Optional[Dict[str, Any]] = None) -> Dict[str
         self.kb.set("report.markdown_path", md_path, agent=self.AGENT_NAME)
         self.kb.set("report.json_path", json_path, agent=self.AGENT_NAME)
 
+        # Flag-gated: LLM-generated structured executive section.
+        if getattr(self.config, "use_llm_summary", False) and self.llm is not None:
+            section = self._structured_summary(target)
+            if section is not None:
+                self.kb.set("report.section", section.model_dump(), agent=self.AGENT_NAME)
+
         return {
             "status": "done",
             "markdown": md_path,
             "json": json_path,
             "total_findings": len(self.session.findings),
         }
+
+    def _structured_summary(self, target: str):
+        """Flag-gated: ask the LLM for a Pydantic-validated ReportSection.
+
+        Uses LLMClient.structured_call with ReportSection's JSON Schema; the
+        provider returns JSON, which we validate. Returns None on any failure
+        so the deterministic report is never blocked.
+        """
+        if self.llm is None:
+            return None
+        findings = [
+            {
+                "title": f.title,
+                "severity": getattr(f.severity, "value", str(f.severity)),
+                "description": f.description,
+            }
+            for f in self.session.findings
+        ]
+        system = (
+            "You are a penetration-test report writer. Summarize the findings "
+            "into one executive ReportSection: a concise title, the highest "
+            "applicable severity, key findings, concrete recommendations, and "
+            "a short business impact statement."
+        )
+        messages = [
+            {
+                "role": "user",
+                "content": (f"Target: {target}\nFindings JSON:\n{json.dumps(findings, indent=2)}"),
+            }
+        ]
+        schema = ReportSection.model_json_schema()
+        try:
+            raw = self.llm.structured_call(
+                messages,
+                schema=schema,
+                schema_name="report_section",
+                description="Executive pentest report section.",
+                system=system,
+                agent_name=self.AGENT_NAME,
+            )
+            return ReportSection.model_validate(raw)
+        except Exception as exc:  # noqa: BLE001 — report must never hard-fail
+            self._log(f"LLM structured summary failed: {exc}")
+            return None
diff --git a/cyberai/core/types.py b/cyberai/core/types.py
index 2a4652d..d670471 100644
--- a/cyberai/core/types.py
+++ b/cyberai/core/types.py
@@ -6,7 +6,7 @@
 from typing import Any, Union
 from pathlib import Path
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 # Target types
 Target = str  # IP, CIDR, or domain
@@ -105,6 +105,29 @@ class ExploitResult(BaseModel):
 ReportPath = Path
 ReportFormat = str  # "markdown" | "html" | "json" | "pdf"
 
+_VALID_SEVERITIES = {"CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"}
+
+
+class ReportSection(BaseModel):
+    """LLM-generated structured report section (day 20 structured outputs).
+
+    `impact` is included for HackerOne-style export; not in the original
+    plan column but required by the H1 template.
+    """
+
+    title: str
+    severity: str = "INFO"
+    findings: list[str] = Field(default_factory=list)
+    recommendations: list[str] = Field(default_factory=list)
+    impact: str = ""
+
+    @field_validator("severity")
+    @classmethod
+    def _norm_severity(cls, v: str) -> str:
+        up = (v or "INFO").strip().upper()
+        return up if up in _VALID_SEVERITIES else "INFO"
+
+
 # Pipeline
 PipelineInput = Target
 PipelineOutput = dict[str, AgentOutput]

From d2b0c810a92ea187753401d025d2fe39fd90bf76 Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Thu, 11 Jun 2026 00:16:01 +0300
Subject: [PATCH 3/4] feat(report): HackerOne-compatible export

---
 cyberai/agents/report/h1_exporter.py | 42 ++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 cyberai/agents/report/h1_exporter.py

diff --git a/cyberai/agents/report/h1_exporter.py b/cyberai/agents/report/h1_exporter.py
new file mode 100644
index 0000000..9435fcf
--- /dev/null
+++ b/cyberai/agents/report/h1_exporter.py
@@ -0,0 +1,42 @@
+"""HackerOne-compatible Markdown export for a ReportSection (day 20)."""
+
+from __future__ import annotations
+
+from cyberai.core.types import ReportSection
+
+# Map internal severity to HackerOne's severity vocabulary.
+_H1_SEVERITY = {
+    "CRITICAL": "Critical",
+    "HIGH": "High",
+    "MEDIUM": "Medium",
+    "LOW": "Low",
+    "INFO": "None",
+}
+
+
+def _bullets(items: list[str]) -> str:
+    """Render a list as Markdown bullets; placeholder if empty."""
+    if not items:
+        return "_None provided._"
+    return "\n".join(f"- {it}" for it in items)
+
+
+def export_hackerone(section: ReportSection) -> str:
+    """Render a ReportSection as a HackerOne-style Markdown submission.
+
+    Sections follow the H1 report template: Title, Severity, Steps to
+    Reproduce, Impact, Recommendation. `findings` map to reproduction
+    steps; `recommendations` to the Recommendation block.
+    """
+    severity = _H1_SEVERITY.get(section.severity.upper(), "None")
+    impact = section.impact.strip() or "_Impact not specified._"
+    return (
+        f"# {section.title}\n\n"
+        f"**Severity:** {severity}\n\n"
+        f"## Steps to Reproduce\n\n"
+        f"{_bullets(section.findings)}\n\n"
+        f"## Impact\n\n"
+        f"{impact}\n\n"
+        f"## Recommendation\n\n"
+        f"{_bullets(section.recommendations)}\n"
+    )

From a2b06046bd4e450dded09cd7e31489404d13bff1 Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Thu, 11 Jun 2026 00:17:32 +0300
Subject: [PATCH 4/4] test(report): structured output roundtrip

---
 tests/unit/test_structured_report.py | 194 +++++++++++++++++++++++++++
 1 file changed, 194 insertions(+)
 create mode 100644 tests/unit/test_structured_report.py

diff --git a/tests/unit/test_structured_report.py b/tests/unit/test_structured_report.py
new file mode 100644
index 0000000..f8c75ac
--- /dev/null
+++ b/tests/unit/test_structured_report.py
@@ -0,0 +1,194 @@
+"""Day 20 — structured outputs: ReportSection, structured_call, H1 export."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+from cyberai.agents.report.h1_exporter import export_hackerone
+from cyberai.core.llm_client import LLMClient
+from cyberai.core.types import ReportSection
+
+
+# ── ReportSection model ───────────────────────────────────────────────
+
+
+def test_section_severity_normalized():
+    s = ReportSection(title="t", severity="critical")
+    assert s.severity == "CRITICAL"
+
+
+def test_section_severity_invalid_falls_back_info():
+    s = ReportSection(title="t", severity="bogus")
+    assert s.severity == "INFO"
+
+
+def test_section_defaults():
+    s = ReportSection(title="t")
+    assert s.severity == "INFO"
+    assert s.findings == []
+    assert s.recommendations == []
+    assert s.impact == ""
+
+
+# ── HackerOne export ──────────────────────────────────────────────────
+
+
+def _sample_section() -> ReportSection:
+    return ReportSection(
+        title="SQL Injection in login",
+        severity="HIGH",
+        findings=["Send ' OR 1=1-- in username", "Observe auth bypass"],
+        recommendations=["Use parameterized queries"],
+        impact="Full auth bypass, account takeover.",
+    )
+
+
+def test_h1_export_contains_sections():
+    md = export_hackerone(_sample_section())
+    assert "# SQL Injection in login" in md
+    assert "**Severity:** High" in md
+    assert "## Steps to Reproduce" in md
+    assert "## Impact" in md
+    assert "## Recommendation" in md
+
+
+def test_h1_export_info_maps_to_none():
+    md = export_hackerone(ReportSection(title="x", severity="INFO"))
+    assert "**Severity:** None" in md
+
+
+def test_h1_export_empty_lists_placeholder():
+    md = export_hackerone(ReportSection(title="x", severity="LOW"))
+    assert "_None provided._" in md
+    assert "_Impact not specified._" in md
+
+
+def test_h1_roundtrip_steps_present():
+    section = _sample_section()
+    md = export_hackerone(section)
+    for step in section.findings:
+        assert step in md
+    for rec in section.recommendations:
+        assert rec in md
+
+
+# ── structured_call provider branches (mocked SDK) ────────────────────
+
+
+def _client(provider: str) -> LLMClient:
+    cfg = MagicMock()
+    cfg.provider = provider
+    cfg.api_key = "x"
+    cfg.model = "test-model"
+    cfg.max_tokens = 1024
+    cfg.temperature = 0.0
+    return LLMClient(cfg)
+
+
+SCHEMA = ReportSection.model_json_schema()
+PAYLOAD = {"title": "t", "severity": "HIGH", "findings": ["a"]}
+
+
+def test_structured_call_openai(monkeypatch):
+    client = _client("openai")
+    fake = MagicMock()
+    msg = MagicMock()
+    msg.content = json.dumps(PAYLOAD)
+    fake.choices = [MagicMock(message=msg)]
+    fake.usage = MagicMock(prompt_tokens=10, completion_tokens=5)
+    fake.model = "test-model"
+
+    import openai
+
+    inst = MagicMock()
+    inst.chat.completions.create.return_value = fake
+    monkeypatch.setattr(openai, "OpenAI", lambda **kw: inst)
+
+    out = client.structured_call(
+        [{"role": "user", "content": "go"}], schema=SCHEMA, schema_name="rs"
+    )
+    assert out["title"] == "t"
+    # response_format must carry json_schema
+    kwargs = inst.chat.completions.create.call_args.kwargs
+    assert kwargs["response_format"]["type"] == "json_schema"
+
+
+def test_structured_call_anthropic(monkeypatch):
+    client = _client("anthropic")
+    block = MagicMock()
+    block.type = "tool_use"
+    block.input = PAYLOAD
+    fake = MagicMock()
+    fake.content = [block]
+    fake.usage = MagicMock(
+        input_tokens=10,
+        output_tokens=5,
+        cache_creation_input_tokens=0,
+        cache_read_input_tokens=0,
+    )
+    fake.model = "test-model"
+
+    import anthropic
+
+    inst = MagicMock()
+    inst.messages.create.return_value = fake
+    monkeypatch.setattr(anthropic, "Anthropic", lambda **kw: inst)
+
+    out = client.structured_call(
+        [{"role": "user", "content": "go"}], schema=SCHEMA, schema_name="rs"
+    )
+    assert out["severity"] == "HIGH"
+    # forced single-tool choice
+    kwargs = inst.messages.create.call_args.kwargs
+    assert kwargs["tool_choice"] == {"type": "tool", "name": "rs"}
+
+
+def test_structured_call_ollama_unsupported():
+    client = _client("ollama")
+    try:
+        client.structured_call([], schema=SCHEMA)
+        assert False, "expected ValueError"
+    except ValueError:
+        pass
+
+
+# ── ReportAgent._structured_summary (mocked) ──────────────────────────
+
+
+def _report_agent(provider="anthropic"):
+    from cyberai.agents.report.agent import ReportAgent
+
+    agent = ReportAgent.__new__(ReportAgent)
+    agent.AGENT_NAME = "report"
+    agent.llm = MagicMock()
+    agent.llm.config.provider = provider
+    session = MagicMock()
+    session.findings = [
+        MagicMock(title="Log4Shell", severity="CRITICAL", description="rce"),
+    ]
+    agent.session = session
+    return agent
+
+
+def test_structured_summary_validates():
+    agent = _report_agent()
+    agent.llm.structured_call.return_value = {
+        "title": "Exec summary",
+        "severity": "critical",
+        "findings": ["Log4Shell RCE"],
+        "recommendations": ["Patch log4j"],
+        "impact": "RCE on host.",
+    }
+    section = agent._structured_summary("testhost")
+    assert isinstance(section, ReportSection)
+    assert section.severity == "CRITICAL"  # normalized
+    md = export_hackerone(section)
+    assert "Log4Shell RCE" in md
+
+
+def test_structured_summary_failsafe_returns_none():
+    agent = _report_agent()
+    agent.llm.structured_call.side_effect = RuntimeError("api down")
+    agent._log = MagicMock()
+    assert agent._structured_summary("testhost") is None