From a38cb3a3fe625793d008c2150e24b88504226d1a Mon Sep 17 00:00:00 2001 From: DankerMu Date: Sat, 21 Feb 2026 12:23:05 +0800 Subject: [PATCH] fix(qa): address review findings for consistency checker - M1: add project existence check (404) in QA endpoint - M2: batch-fetch KGNode IDs in possession check (eliminate N+1) - M3/M4: lower confidence to 0.6 when death/resolved location absent - L1: rename _loads to _safe_loads for codebase consistency - L2: move func import to module top level - Add timeline conflict tests (2 tests) - Add plot_thread conflict tests (2 tests) - Fix line length and input validation (Pydantic Field) All 16 tests passing. Co-Authored-By: Claude Opus 4.6 --- backend/app/api/qa.py | 12 ++- backend/app/services/consistency.py | 52 ++++++---- backend/tests/test_consistency.py | 141 +++++++++++++++++++++++++++- 3 files changed, 179 insertions(+), 26 deletions(-) diff --git a/backend/app/api/qa.py b/backend/app/api/qa.py index 02892d6..5cc772e 100644 --- a/backend/app/api/qa.py +++ b/backend/app/api/qa.py @@ -1,11 +1,12 @@ """Quality Assurance API endpoints.""" -from fastapi import APIRouter, Depends -from pydantic import BaseModel +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncSession from app.api.schemas import ConsistencyResult from app.core.database import get_db +from app.models.tables import Project from app.services.consistency import run_consistency_check router = APIRouter(prefix="/api", tags=["qa"]) @@ -13,8 +14,8 @@ class ConsistencyCheckRequest(BaseModel): project_id: int - ngram_n: int = 4 - ngram_threshold: int = 3 + ngram_n: int = Field(default=4, ge=2, le=10) + ngram_threshold: int = Field(default=3, ge=2, le=20) @router.post("/qa/check", response_model=list[ConsistencyResult]) @@ -23,6 +24,9 @@ async def check_consistency( db: AsyncSession = Depends(get_db), ): """Run rule-based consistency checks on a project.""" + project = await db.get(Project, body.project_id) + if not project: + raise HTTPException(404, "Project not found") return await run_consistency_check( db, body.project_id, diff --git a/backend/app/services/consistency.py b/backend/app/services/consistency.py index c43a82f..85e8422 100644 --- a/backend/app/services/consistency.py +++ b/backend/app/services/consistency.py @@ -3,13 +3,13 @@ import json from collections import defaultdict -from sqlalchemy import select +from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from app.models.tables import Book, Chapter, KGEdge, KGNode, KGProposal, Scene, SceneTextVersion -def _loads(raw: str, default=None): +def _safe_loads(raw: str, default=None): if default is None: default = {} try: @@ -24,15 +24,12 @@ def _extract_ngrams(text: str, n: int) -> list[str]: if len(tokens) < n: # fall back to character n-grams for dense Chinese text chars = [c for c in text if c.strip()] - return ["" .join(chars[i : i + n]) for i in range(len(chars) - n + 1)] + return ["".join(chars[i : i + n]) for i in range(len(chars) - n + 1)] return [" ".join(tokens[i : i + n]) for i in range(len(tokens) - n + 1)] async def _build_scene_index(db: AsyncSession, project_id: int) -> list[dict]: """Return ordered list of {chapter_sort, chapter_id, scene_id, scene_sort, text, location}.""" - # Subquery: latest version per scene - from sqlalchemy import func - latest = ( select( SceneTextVersion.scene_id, @@ -98,7 +95,7 @@ async def _check_character_status( dead_chars: list[tuple[str, str]] = [] # (name, death_location or "") for char in characters: - props = _loads(char.properties_json, {}) + props = _safe_loads(char.properties_json, {}) status = props.get("status", "") or props.get("Status", "") if str(status).lower() == "dead": death_loc = props.get("death_location", "") @@ -126,6 +123,8 @@ async def _check_character_status( ] if reappearances: + has_loc = death_idx is not None + conf = 1.0 if has_loc else 0.6 evidence = [] if death_loc: evidence.append(f"Character '{name}' marked dead at {death_loc}") @@ -137,7 +136,7 @@ async def _check_character_status( { "type": "character_status", "severity": "high", - "confidence": 1.0, + "confidence": conf, "source": "rule", "message": f"Dead character '{name}' appears in later scene text.", "evidence": evidence, @@ -178,7 +177,7 @@ async def _check_timeline(db: AsyncSession, project_id: int) -> list[dict]: events: list[dict] = [] # {name, narrative_day, chapter_id, chapter_sort, location} for p in proposals: - data = _loads(p.data_json, {}) + data = _safe_loads(p.data_json, {}) day = data.get("narrative_day") or data.get("properties", {}).get("narrative_day") if day is not None: try: @@ -197,7 +196,7 @@ async def _check_timeline(db: AsyncSession, project_id: int) -> list[dict]: ) for e in edges: - props = _loads(e.properties_json, {}) + props = _safe_loads(e.properties_json, {}) day = props.get("narrative_day") if day is not None: try: @@ -265,19 +264,30 @@ async def _check_possession(db: AsyncSession, project_id: int) -> list[dict]: for e in edges: ownership[e.target_node_id].append(e.source_node_id) - # Fetch node names for conflicting entries + # Batch-fetch all node names to avoid N+1 queries + all_ids: set[int] = set() + for target_id, owners in ownership.items(): + if len(owners) > 1: + all_ids.add(target_id) + all_ids.update(owners) + + if not all_ids: + return [] + + node_result = await db.execute( + select(KGNode).where(KGNode.id.in_(all_ids)) + ) + node_map = {n.id: n.name for n in node_result.scalars().all()} + conflicts = [] for target_id, owners in ownership.items(): if len(owners) <= 1: continue - target_node = await db.get(KGNode, target_id) - item_name = target_node.name if target_node else str(target_id) - - owner_names = [] - for oid in owners: - node = await db.get(KGNode, oid) - owner_names.append(node.name if node else str(oid)) + item_name = node_map.get(target_id, str(target_id)) + owner_names = [ + node_map.get(oid, str(oid)) for oid in owners + ] conflicts.append( { @@ -317,7 +327,7 @@ async def _check_plot_thread(db: AsyncSession, project_id: int, scenes: list[dic conflicts = [] for node in nodes: - props = _loads(node.properties_json, {}) + props = _safe_loads(node.properties_json, {}) status = str(props.get("status", "") or props.get("Status", "")).lower() if status != "resolved": continue @@ -338,6 +348,8 @@ async def _check_plot_thread(db: AsyncSession, project_id: int, scenes: list[dic ] if reappearances: + has_loc = resolved_idx is not None + conf = 1.0 if has_loc else 0.6 evidence = [f"Plot thread '{node.name}' marked resolved"] if resolved_loc: evidence.append(f"Resolved at {resolved_loc}") @@ -347,7 +359,7 @@ async def _check_plot_thread(db: AsyncSession, project_id: int, scenes: list[dic { "type": "plot_thread", "severity": "medium", - "confidence": 1.0, + "confidence": conf, "source": "rule", "message": ( f"Resolved plot thread '{node.name}' is referenced again in later scenes." diff --git a/backend/tests/test_consistency.py b/backend/tests/test_consistency.py index 2f97fc5..0d3c389 100644 --- a/backend/tests/test_consistency.py +++ b/backend/tests/test_consistency.py @@ -95,7 +95,7 @@ async def test_character_status_conflict(client, db_session): assert len(char_conflicts) >= 1 conflict = char_conflicts[0] assert conflict["severity"] == "high" - assert conflict["confidence"] == 1.0 + assert conflict["confidence"] == 0.6 # no death_location → lower confidence assert conflict["source"] == "rule" assert "Lin Yuan" in conflict["message"] assert len(conflict["evidence"]) >= 1 @@ -289,7 +289,7 @@ async def test_api_endpoint_custom_ngram_params(client, db_session): # Very high threshold: no repetition flagged resp = await client.post( "/api/qa/check", - json={"project_id": pid, "ngram_n": 4, "ngram_threshold": 100}, + json={"project_id": pid, "ngram_n": 4, "ngram_threshold": 20}, ) assert resp.status_code == 200 assert all(r["type"] != "repetition" for r in resp.json()) @@ -316,3 +316,140 @@ async def test_api_possession_conflict_via_endpoint(client, db_session): poss = [r for r in resp.json() if r["type"] == "possession"] assert len(poss) >= 1 assert "Golden Key" in poss[0]["message"] + + +# ---------- Test: timeline ---------- + +@pytest.mark.asyncio +async def test_timeline_conflict(client, db_session): + """Events with narrative_day ordered inconsistently with chapter order are flagged.""" + from app.models.tables import KGProposal + + pid = await _setup_project(client) + bid = await _setup_book(client, pid) + + ch1 = await _setup_chapter(client, bid, sort_order=0) + ch2 = await _setup_chapter(client, bid, sort_order=1) + + # Event in ch1 has narrative_day=10, event in ch2 has narrative_day=5 + # This is a timeline conflict: later chapter has earlier narrative_day + proposal1 = KGProposal( + project_id=pid, + chapter_id=ch1, + category="entity", + data_json=json.dumps({"narrative_day": 10, "name": "Battle of Dawn"}), + status="auto_approved", + confidence=0.95, + evidence_location=f"chapter:{ch1}", + ) + proposal2 = KGProposal( + project_id=pid, + chapter_id=ch2, + category="entity", + data_json=json.dumps({"narrative_day": 5, "name": "Festival Preparation"}), + status="auto_approved", + confidence=0.95, + evidence_location=f"chapter:{ch2}", + ) + db_session.add_all([proposal1, proposal2]) + await db_session.flush() + + results = await run_consistency_check(db_session, pid) + timeline_conflicts = [r for r in results if r["type"] == "timeline"] + assert len(timeline_conflicts) >= 1 + c = timeline_conflicts[0] + assert c["severity"] == "medium" + assert c["confidence"] == 1.0 + assert c["source"] == "rule" + assert "narrative_day" in c["message"] + + +@pytest.mark.asyncio +async def test_timeline_no_conflict(client, db_session): + """Events with narrative_day in correct order produce no conflict.""" + from app.models.tables import KGProposal + + pid = await _setup_project(client) + bid = await _setup_book(client, pid) + + ch1 = await _setup_chapter(client, bid, sort_order=0) + ch2 = await _setup_chapter(client, bid, sort_order=1) + + # Correct order: ch1 has narrative_day=5, ch2 has narrative_day=10 + proposal1 = KGProposal( + project_id=pid, + chapter_id=ch1, + category="entity", + data_json=json.dumps({"narrative_day": 5, "name": "Morning Meeting"}), + status="auto_approved", + confidence=0.95, + evidence_location=f"chapter:{ch1}", + ) + proposal2 = KGProposal( + project_id=pid, + chapter_id=ch2, + category="entity", + data_json=json.dumps({"narrative_day": 10, "name": "Evening Banquet"}), + status="auto_approved", + confidence=0.95, + evidence_location=f"chapter:{ch2}", + ) + db_session.add_all([proposal1, proposal2]) + await db_session.flush() + + results = await run_consistency_check(db_session, pid) + timeline_conflicts = [r for r in results if r["type"] == "timeline"] + assert timeline_conflicts == [] + + +# ---------- Test: plot_thread ---------- + +@pytest.mark.asyncio +async def test_plot_thread_conflict(client, db_session): + """Resolved plot thread referenced in later scene text is flagged.""" + pid = await _setup_project(client) + bid = await _setup_book(client, pid) + + ch1 = await _setup_chapter(client, bid, sort_order=0) + await _setup_scene_with_text(client, ch1, "The mystery of the stolen crown was solved.") + + ch2 = await _setup_chapter(client, bid, sort_order=1) + await _setup_scene_with_text( + client, ch2, + "The mystery of the stolen crown continues to puzzle everyone.", + ) + + node = _make_node(pid, "PlotThread", "mystery of the stolen crown", {"status": "resolved"}) + db_session.add(node) + await db_session.flush() + + results = await run_consistency_check(db_session, pid) + plot_conflicts = [r for r in results if r["type"] == "plot_thread"] + assert len(plot_conflicts) >= 1 + c = plot_conflicts[0] + assert c["severity"] == "medium" + assert c["confidence"] == 0.6 # no resolved_location → lower confidence + assert c["source"] == "rule" + assert "mystery of the stolen crown" in c["message"] + + +@pytest.mark.asyncio +async def test_plot_thread_no_conflict(client, db_session): + """Resolved plot thread not mentioned in later scenes produces no conflict.""" + pid = await _setup_project(client) + bid = await _setup_book(client, pid) + + ch1 = await _setup_chapter(client, bid, sort_order=0) + await _setup_scene_with_text(client, ch1, "The ancient relic was finally recovered.") + + ch2 = await _setup_chapter(client, bid, sort_order=1) + await _setup_scene_with_text(client, ch2, "A new adventure begins in the northern lands.") + + # Use a different name that won't appear in ch2 text + node = _make_node(pid, "PlotThread", "quest for the golden crown", {"status": "resolved"}) + db_session.add(node) + await db_session.flush() + + results = await run_consistency_check(db_session, pid) + plot_conflicts = [r for r in results if r["type"] == "plot_thread"] + assert plot_conflicts == []