Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ DC := docker compose -f docker-compose.yml
PYTEST_FOCUS ?= app
PYTEST_APP_TESTS := archive/services/orchestrator_legacy_python/tests/test_orchestrator_retrieval.py archive/services/orchestrator_legacy_python/tests/test_migration_runtime.py

.PHONY: help launch all up up-core down status ps logs build rebuild pull clean prune mcp-proxy-up init qdrant-init mindsdb-seed letta-seed models-pull proxy-status doctor mem-ping monitor-open monitor-check dmg-build msi-build linux-bundle-build storage-audit qdrant-snapshot-prune qdrant-cutover cold-snapshot-pack cold-snapshot-tier cold-snapshot-restore telemetry-archive fanout-status fanout-deadletters fanout-rehydrate retention-install retention-uninstall retention-status retention-install-daily storage-ledger-capture storage-ledger-prune storage-ledger-install storage-ledger-uninstall storage-ledger-status memory-graph-quality memory-graph-quality-install memory-graph-quality-uninstall memory-graph-quality-status weekly-lineage-rollup weekly-lineage-install weekly-lineage-uninstall weekly-lineage-status docker-fs-watchdog-run docker-fs-watchdog-install docker-fs-watchdog-uninstall docker-fs-watchdog-status storage-migrate-hot-bindings disk-clean-safe mem-mode-show mem-mode-core mem-mode-balanced mem-mode-full mem-up-core mem-up-balanced mem-up-full observability-up observability-down launch-readiness-gate launch-readiness-gate-schedule launch-readiness-gate-schedule-status launch-readiness-gate-schedule-cancel paid-launch-checklist backup-restore-drill mem-up-release mem-up-lite-release release-lock-verify qdrant-cloud-check quickstart submission-preflight launch-lock launch-lock-public test-py bench-shortlist bench-qdrant-tuning bench-backend-lanes env-lock-check env-lock-apply sentrux-check sentrux-gate sentrux-gate-save agent-context-gate
.PHONY: help launch all up up-core down status ps logs build rebuild pull clean prune mcp-proxy-up init qdrant-init mindsdb-seed letta-seed models-pull proxy-status doctor mem-ping monitor-open monitor-check dmg-build msi-build linux-bundle-build storage-audit qdrant-snapshot-prune qdrant-cutover cold-snapshot-pack cold-snapshot-tier cold-snapshot-restore telemetry-archive fanout-status fanout-deadletters fanout-rehydrate retention-install retention-uninstall retention-status retention-install-daily storage-ledger-capture storage-ledger-prune storage-ledger-install storage-ledger-uninstall storage-ledger-status memory-graph-quality memory-graph-quality-install memory-graph-quality-uninstall memory-graph-quality-status recall-quality recall-quality-refresh recall-quality-tuning open-core-boundary-audit weekly-lineage-rollup weekly-lineage-install weekly-lineage-uninstall weekly-lineage-status docker-fs-watchdog-run docker-fs-watchdog-install docker-fs-watchdog-uninstall docker-fs-watchdog-status storage-migrate-hot-bindings disk-clean-safe mem-mode-show mem-mode-core mem-mode-balanced mem-mode-full mem-up-core mem-up-balanced mem-up-full observability-up observability-down launch-readiness-gate launch-readiness-gate-schedule launch-readiness-gate-schedule-status launch-readiness-gate-schedule-cancel paid-launch-checklist backup-restore-drill mem-up-release mem-up-lite-release release-lock-verify qdrant-cloud-check quickstart submission-preflight launch-lock launch-lock-public test-py bench-shortlist bench-qdrant-tuning bench-backend-lanes env-lock-check env-lock-apply sentrux-check sentrux-gate sentrux-gate-save agent-context-gate

help:
> echo "Targets:"
Expand All @@ -54,6 +54,8 @@ help:
> echo " storage-ledger-capture|storage-ledger-prune: append/prune metadata-only storage growth ledger"
> echo " storage-ledger-install|storage-ledger-status: install hourly ledger runner (launchd)"
> echo " memory-graph-quality*: score graph coverage and install bounded repair runner"
> echo " recall-quality*: run saved recall eval, terminal quality view, and tuning"
> echo " open-core-boundary-audit: verify lite/full/paid branch feature boundaries"
> echo " weekly-lineage-rollup: generate weekly per-project lineage + global synergy rollups"
> echo " weekly-lineage-install|weekly-lineage-status: install weekly lineage runner (launchd)"
> echo " qdrant-cutover: set QDRANT_COLLECTION and rehydrate vectors"
Expand Down Expand Up @@ -319,6 +321,21 @@ memory-graph-quality-uninstall:
memory-graph-quality-status:
> bash scripts/install_memory_graph_quality_runner.sh status

recall-quality:
> scripts/agent/recall-quality-eval --tuning --pretty

recall-quality-refresh:
> scripts/agent/recall-quality-eval --refresh-cases --tuning --pretty

recall-quality-tuning:
> if [ -f .env ]; then source .env >/dev/null 2>&1 || true; fi
> base="$${CONTEXTLATTICE_ORCHESTRATOR_URL:-http://127.0.0.1:8075}"; key="$${CONTEXTLATTICE_ORCHESTRATOR_API_KEY:-}"; \
> if [ -n "$$key" ]; then curl -fsS -H "x-api-key: $$key" "$${base%/}/telemetry/recall/tuning?min_samples=1" | jq .; \
> else curl -fsS "$${base%/}/telemetry/recall/tuning?min_samples=1" | jq .; fi

open-core-boundary-audit:
> scripts/agent/audit-open-core-boundary --pretty

weekly-lineage-rollup:
> scripts/context_storage_ops.sh weekly-lineage \
> --orchestrator-url "$${CONTEXTLATTICE_ORCHESTRATOR_URL:-http://127.0.0.1:8075}" \
Expand Down
16 changes: 16 additions & 0 deletions contextlattice-dashboard/app/api/telemetry/recall/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { NextResponse } from "next/server";
import { callOrchestrator } from "@/lib/orchestrator";

export async function GET(request: Request) {
const url = new URL(request.url);
const params = new URLSearchParams();
for (const key of ["traffic_class"]) {
const value = url.searchParams.get(key);
if (value) {
params.set(key, value);
}
}
const suffix = params.toString() ? `?${params.toString()}` : "";
const data = await callOrchestrator(`/telemetry/recall${suffix}`);
return NextResponse.json(data);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { NextResponse } from "next/server";
import { callOrchestrator } from "@/lib/orchestrator";

export async function GET(request: Request) {
const url = new URL(request.url);
const params = new URLSearchParams();
for (const key of ["lookback_hours", "min_samples", "max_samples"]) {
const value = url.searchParams.get(key);
if (value) {
params.set(key, value);
}
}
const suffix = params.toString() ? `?${params.toString()}` : "";
const data = await callOrchestrator(`/telemetry/recall/tuning${suffix}`);
return NextResponse.json(data);
}
15 changes: 14 additions & 1 deletion contextlattice-dashboard/app/status/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { useEffect, useState } from "react";
import { RetrievalPanel } from "@/components/RetrievalPanel";
import { MemoryGraphPanel, type MemoryGraphPayload } from "@/components/MemoryGraphPanel";
import { RecallQualityPanel, type RecallQualityPayload, type RecallTuningPayload } from "@/components/RecallQualityPanel";

type Service = {
name: string;
Expand Down Expand Up @@ -63,18 +64,22 @@ export default function StatusPage() {
const [topics, setTopics] = useState<TopicsPayload | null>(null);
const [memoryTelemetry, setMemoryTelemetry] = useState<MemoryTelemetry | null>(null);
const [memoryGraph, setMemoryGraph] = useState<MemoryGraphPayload | null>(null);
const [recallQuality, setRecallQuality] = useState<RecallQualityPayload | null>(null);
const [recallTuning, setRecallTuning] = useState<RecallTuningPayload | null>(null);
const [error, setError] = useState<string | null>(null);
const [updatedAt, setUpdatedAt] = useState<string | null>(null);

async function loadStatus() {
try {
setError(null);
const [statusRes, prefRes, topicRes, memRes, graphRes] = await Promise.all([
const [statusRes, prefRes, topicRes, memRes, graphRes, recallRes, tuningRes] = await Promise.all([
fetch("/api/memory/status", { cache: "no-store" }),
fetch("/api/memory/preferences", { cache: "no-store" }),
fetch("/api/memory/topics", { cache: "no-store" }),
fetch("/api/telemetry/memory", { cache: "no-store" }),
fetch("/api/telemetry/memory/graph", { cache: "no-store" }),
fetch("/api/telemetry/recall", { cache: "no-store" }),
fetch("/api/telemetry/recall/tuning", { cache: "no-store" }),
]);
const statusData = await statusRes.json();
if (!statusRes.ok) {
Expand All @@ -93,6 +98,12 @@ export default function StatusPage() {
if (graphRes.ok) {
setMemoryGraph(await graphRes.json());
}
if (recallRes.ok) {
setRecallQuality(await recallRes.json());
}
if (tuningRes.ok) {
setRecallTuning(await tuningRes.json());
}
setUpdatedAt(new Date().toLocaleTimeString());
} catch (err: any) {
setError(err?.message || "Status unavailable");
Expand Down Expand Up @@ -214,6 +225,8 @@ export default function StatusPage() {

<MemoryGraphPanel graph={memoryGraph} />

<RecallQualityPanel recall={recallQuality} tuning={recallTuning} />

<RetrievalPanel />
</div>
);
Expand Down
195 changes: 195 additions & 0 deletions contextlattice-dashboard/components/RecallQualityPanel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"use client";

type QualityTotals = {
requests?: number;
timeouts?: number;
errors?: number;
sourceErrorRate?: number;
noHitRate?: number;
lowConfidenceRate?: number;
staleHitRate?: number;
recallAtK?: number | null;
mrr?: number | null;
numericExactness?: number | null;
citationCoverage?: number | null;
sourceDiversity?: number | null;
graphLift?: number | null;
evalP95Ms?: number | null;
lastEvalAt?: string | null;
};

export type RecallQualityPayload = {
updatedAt?: string;
trafficClass?: string;
quality?: {
status?: string;
totals?: QualityTotals;
sampleCount?: number;
recommendations?: string[];
};
alerts?: {
count?: number;
};
};

export type RecallTuningPayload = {
window?: {
samples?: number;
minSamples?: number;
sufficient?: boolean;
};
recommended?: {
quality?: {
graphExpansion?: {
enabled?: boolean;
depth?: number;
neighborLimit?: number;
};
sourceOrder?: string[];
recommendations?: string[];
};
};
};

function numberValue(value: unknown): number {
return typeof value === "number" && Number.isFinite(value) ? value : 0;
}

function percentText(value: unknown): string {
if (typeof value !== "number" || !Number.isFinite(value)) {
return "-";
}
return `${Math.round(value * 100)}%`;
}

function statusTone(status: string) {
if (status === "repair_recommended" || status === "insufficient_cases") {
return "bg-amber-500 text-amber-950";
}
if (status === "watch" || status === "unknown") {
return "bg-cyan-500 text-cyan-950";
}
return "bg-emerald-500 text-emerald-950";
}

function QualityBar({ value, tone = "good" }: { value: number; tone?: "good" | "warn" | "neutral" }) {
const pct = Math.max(0, Math.min(100, value * 100));
const color = tone === "warn" ? "bg-amber-300" : tone === "neutral" ? "bg-cyan-300" : "bg-emerald-300";
return (
<div className="h-2 w-full rounded bg-slate-800 overflow-hidden" aria-hidden="true">
<div className={`h-full ${color}`} style={{ width: `${pct}%` }} />
</div>
);
}

function Metric({ label, value, tone }: { label: string; value: string; tone?: "warn" | "good" }) {
const toneClass =
tone === "warn" ? "text-amber-200 border-amber-500/70" : tone === "good" ? "text-emerald-200 border-emerald-500/70" : "text-slate-200 border-slate-600";
return (
<div className={`rounded border px-3 py-2 ${toneClass}`}>
<div className="text-xs uppercase tracking-wide text-slate-400">{label}</div>
<div className="text-lg font-semibold">{value}</div>
</div>
);
}

export function RecallQualityPanel({
recall,
tuning,
}: {
recall: RecallQualityPayload | null;
tuning?: RecallTuningPayload | null;
}) {
if (!recall) {
return (
<section className="card">
<h3 className="text-lg font-semibold">Recall quality</h3>
<p className="text-sm text-slate-400 mt-2">Recall telemetry unavailable.</p>
</section>
);
}

const totals = recall.quality?.totals ?? {};
const status = String(recall.quality?.status || "unknown");
const graphExpansion = tuning?.recommended?.quality?.graphExpansion;
const sourceOrder = tuning?.recommended?.quality?.sourceOrder ?? [];
const recallAtK = typeof totals.recallAtK === "number" ? totals.recallAtK : 0;
const mrr = typeof totals.mrr === "number" ? totals.mrr : 0;
const citationCoverage = typeof totals.citationCoverage === "number" ? totals.citationCoverage : 0;
const graphLift = typeof totals.graphLift === "number" ? totals.graphLift : 0;
const recommendations = [
...(recall.quality?.recommendations ?? []),
...(tuning?.recommended?.quality?.recommendations ?? []),
].slice(0, 4);

return (
<section className="card space-y-5">
<div className="flex flex-wrap items-start justify-between gap-3">
<div>
<h3 className="text-lg font-semibold">Recall quality</h3>
<p className="text-xs text-slate-500 mt-1">
{totals.lastEvalAt ? `Last eval ${new Date(totals.lastEvalAt).toLocaleTimeString()}` : "No saved eval sample yet"}
</p>
</div>
<span className={`text-xs px-2 py-1 rounded ${statusTone(status)}`}>{status}</span>
</div>

<div className="grid md:grid-cols-6 gap-3 text-sm">
<Metric label="Recall@K" value={percentText(totals.recallAtK)} tone={recallAtK >= 0.75 ? "good" : "warn"} />
<Metric label="MRR" value={mrr ? mrr.toFixed(2) : "-"} tone={mrr >= 0.55 ? "good" : "warn"} />
<Metric label="Citations" value={percentText(totals.citationCoverage)} tone={citationCoverage >= 0.9 ? "good" : "warn"} />
<Metric label="Graph lift" value={percentText(totals.graphLift)} tone={graphLift > 0 ? "good" : undefined} />
<Metric label="Diversity" value={numberValue(totals.sourceDiversity).toFixed(1)} />
<Metric label="Eval p95" value={totals.evalP95Ms ? `${Math.round(totals.evalP95Ms)} ms` : "-"} />
</div>

<div className="grid lg:grid-cols-[minmax(0,1fr)_18rem] gap-5">
<div className="space-y-3">
<div className="grid grid-cols-[5rem_minmax(0,1fr)_3.5rem] items-center gap-3 text-xs">
<span className="text-slate-400">recall</span>
<QualityBar value={recallAtK} tone={recallAtK >= 0.75 ? "good" : "warn"} />
<span className="text-right text-slate-300">{percentText(totals.recallAtK)}</span>
</div>
<div className="grid grid-cols-[5rem_minmax(0,1fr)_3.5rem] items-center gap-3 text-xs">
<span className="text-slate-400">mrr</span>
<QualityBar value={mrr} tone={mrr >= 0.55 ? "good" : "warn"} />
<span className="text-right text-slate-300">{mrr ? mrr.toFixed(2) : "-"}</span>
</div>
<div className="grid grid-cols-[5rem_minmax(0,1fr)_3.5rem] items-center gap-3 text-xs">
<span className="text-slate-400">graph</span>
<QualityBar value={Math.min(1, graphLift * 4)} tone={graphLift > 0 ? "neutral" : "good"} />
<span className="text-right text-slate-300">{percentText(totals.graphLift)}</span>
</div>
</div>

<div className="rounded border border-slate-700/70 p-3 text-xs text-slate-300">
<div className="font-semibold text-slate-100 mb-2">Tuning</div>
<div className="flex justify-between gap-3">
<span className="text-slate-500">Graph depth</span>
<span>{graphExpansion?.enabled ? `${graphExpansion.depth ?? 0} / ${graphExpansion.neighborLimit ?? 0}` : "off"}</span>
</div>
<div className="mt-2 text-slate-500">Sources</div>
<div className="mt-1 flex flex-wrap gap-1">
{sourceOrder.slice(0, 5).map((source) => (
<span key={source} className="rounded border border-slate-700 px-1.5 py-0.5 text-slate-300">
{source}
</span>
))}
{!sourceOrder.length ? <span>-</span> : null}
</div>
</div>
</div>

{recommendations.length ? (
<div className="rounded border border-slate-700/70 p-3 text-xs text-slate-300">
<div className="font-semibold text-slate-100 mb-1">Recommended next action</div>
<ul className="space-y-1">
{recommendations.map((item) => (
<li key={item}>{item}</li>
))}
</ul>
</div>
) : null}
</section>
);
}
2 changes: 1 addition & 1 deletion contextlattice-dashboard/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"billing:reconcile:coinbase": "TS_NODE_COMPILER_OPTIONS='{\"module\":\"commonjs\",\"jsx\":\"react-jsx\"}' node --require tsconfig-paths/register --require ts-node/register scripts/reconcile_coinbase.ts",
"audit:export": "TS_NODE_COMPILER_OPTIONS='{\"module\":\"commonjs\",\"jsx\":\"react-jsx\"}' node --require tsconfig-paths/register --require ts-node/register scripts/export_audit_logs.ts",
"audit:prune": "TS_NODE_COMPILER_OPTIONS='{\"module\":\"commonjs\",\"jsx\":\"react-jsx\"}' node --require tsconfig-paths/register --require ts-node/register scripts/prune_audit_logs.ts",
"test": "TS_NODE_COMPILER_OPTIONS='{\"module\":\"commonjs\",\"jsx\":\"react-jsx\"}' node --require tsconfig-paths/register --test --require ts-node/register tests/overrides-api.test.ts tests/overrides-panel.test.tsx tests/sidecar-health-api.test.ts tests/sidecar-health-panel.test.tsx tests/charts-panel.test.tsx tests/alerts-panel.test.tsx tests/strategy-panel.test.tsx"
"test": "TS_NODE_COMPILER_OPTIONS='{\"module\":\"commonjs\",\"jsx\":\"react-jsx\"}' node --require tsconfig-paths/register --test --require ts-node/register tests/overrides-api.test.ts tests/overrides-panel.test.tsx tests/sidecar-health-api.test.ts tests/sidecar-health-panel.test.tsx tests/charts-panel.test.tsx tests/alerts-panel.test.tsx tests/strategy-panel.test.tsx tests/recall-quality-panel.test.tsx"
},
"dependencies": {
"@next-auth/prisma-adapter": "^1.0.7",
Expand Down
41 changes: 41 additions & 0 deletions contextlattice-dashboard/tests/recall-quality-panel.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import assert from "node:assert/strict";
import test from "node:test";
import React from "react";
import { renderToStaticMarkup } from "react-dom/server";
import { RecallQualityPanel } from "@/components/RecallQualityPanel";

test("RecallQualityPanel renders quality metrics and tuning", () => {
const html = renderToStaticMarkup(
<RecallQualityPanel
recall={{
quality: {
status: "healthy",
totals: {
recallAtK: 0.91,
mrr: 0.78,
citationCoverage: 1,
sourceDiversity: 2.4,
graphLift: 0.12,
evalP95Ms: 184,
lastEvalAt: "2026-05-29T12:00:00Z",
},
recommendations: ["Recall quality telemetry is inside current production thresholds."],
},
}}
tuning={{
recommended: {
quality: {
graphExpansion: { enabled: true, depth: 1, neighborLimit: 12 },
sourceOrder: ["qdrant", "postgres_pgvector", "topic_rollups"],
},
},
}}
/>,
);

assert.match(html, /Recall quality/);
assert.match(html, /91%/);
assert.match(html, /Graph lift/);
assert.match(html, /qdrant/);
assert.match(html, /1 \/ 12/);
});
Loading
Loading