forked from garrytan/gbrain
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathengine.ts
More file actions
1526 lines (1426 loc) · 64.6 KB
/
engine.ts
File metadata and controls
1526 lines (1426 loc) · 64.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import type {
Page, PageInput, PageFilters, GetPageOpts,
Chunk, ChunkInput, StaleChunkRow,
SearchResult, SearchOpts,
Link, GraphNode, GraphPath,
TimelineEntry, TimelineInput, TimelineOpts,
RawData,
PageVersion,
BrainStats, BrainHealth,
IngestLogEntry, IngestLogInput,
EngineConfig,
CodeEdgeInput, CodeEdgeResult,
EvalCandidate, EvalCandidateInput,
EvalCaptureFailure, EvalCaptureFailureReason,
SalienceOpts, SalienceResult, AnomaliesOpts, AnomalyResult,
EmotionalWeightInputRow, EmotionalWeightWriteRow,
DomainBankSampleOpts, CorpusSampleOpts, DomainBankRow,
} from './types.ts';
/**
* v0.27.1: file row for binary-asset metadata. Mirrors the `files` table
* shape on both engines (Postgres has had it since v0.18; PGLite gets it
* via migration v36).
*/
export interface FileRow {
id: number;
source_id: string;
page_slug: string | null;
page_id: number | null;
filename: string;
storage_path: string;
mime_type: string | null;
size_bytes: number | null;
content_hash: string;
metadata: Record<string, unknown>;
created_at: Date;
}
/**
* v0.27.1: spec for upsertFile. Identity is (source_id, storage_path).
* Re-upserting the same identity with a different content_hash updates the
* row in place (image was replaced); same content_hash is a no-op.
*/
export interface FileSpec {
source_id?: string;
page_slug?: string | null;
page_id?: number | null;
filename: string;
storage_path: string;
mime_type?: string | null;
size_bytes?: number | null;
content_hash: string;
metadata?: Record<string, unknown>;
}
/** Input row for addLinksBatch. Optional fields default to '' (matches NOT NULL DDL). */
export interface LinkBatchInput {
from_slug: string;
to_slug: string;
link_type?: string;
context?: string;
/**
* Provenance (v0.13+). Pass 'frontmatter' for edges derived from YAML
* frontmatter, 'markdown' for [Name](path) refs, 'manual' for user-created.
* NULL means "legacy / unknown" and is only used by pre-v0.13 rows; new
* writes should always set this. Missing on input defaults to 'markdown'.
*/
link_source?: string;
/** For link_source='frontmatter': slug of the page whose frontmatter created this edge. */
origin_slug?: string;
/** Frontmatter field name (e.g. 'key_people', 'investors'). */
origin_field?: string;
/**
* v0.18.0: source id for each endpoint. When omitted, the engine JOINs
* against `source_id='default'`. Pass explicit values when the edge
* lives in a non-default source OR crosses sources.
*
* Without these fields, the batch JOIN `pages.slug = v.from_slug` fans
* out across every source containing that slug, silently creating wrong
* edges in a multi-source brain. The source_id filter eliminates the
* fan-out. Origin pages (frontmatter provenance) get their own
* source_id so reconciliation can't delete edges from another source's
* frontmatter.
*/
from_source_id?: string;
to_source_id?: string;
origin_source_id?: string;
}
/** Input row for addTimelineEntriesBatch. Optional fields default to '' (matches NOT NULL DDL). */
export interface TimelineBatchInput {
slug: string;
date: string;
source?: string;
summary: string;
detail?: string;
/**
* v0.18.0: source id for the owning page. When omitted, the engine JOINs
* against `source_id='default'`. Without this, two pages sharing the
* same slug across sources would fan out timeline rows to both.
*/
source_id?: string;
}
/**
* A single dedicated database connection, isolated from the engine's pool.
*
* Used by migration paths that need session-level GUCs (e.g.
* `SET statement_timeout = '600000'` before a `CREATE INDEX CONCURRENTLY`)
* without leaking into the shared pool, and by write-quiesce designs
* that need a session-lifetime Postgres advisory lock that survives
* across transaction boundaries.
*
* On Postgres: backed by postgres-js `sql.reserve()`; the same backend
* process serves every `executeRaw` call within the callback. Released
* automatically when the callback returns or throws.
*
* On PGLite: a thin pass-through. PGLite has no pool, so every call is
* already on the single backing connection. The interface is still
* exposed so cross-engine callers don't need to branch.
*
* Not safe to call from inside `transaction()`. The transaction holds a
* different backend; reserving a second one can deadlock on a row the
* transaction itself is waiting to write.
*/
export interface ReservedConnection {
executeRaw<T = Record<string, unknown>>(sql: string, params?: unknown[]): Promise<T[]>;
}
/**
* v0.28: Takes — typed/weighted/attributed claims, indexed in Postgres.
* Markdown is source of truth (fenced table on the page); this row is the
* derived index. Page-scoped via page_id (NOT slug — slug is unique only
* within a source). `(page_id, row_num)` is the natural unique key.
*/
export interface TakeKindLiteral { kind: 'fact' | 'take' | 'bet' | 'hunch' }
export type TakeKind = TakeKindLiteral['kind'];
/** Input row for addTakesBatch. */
export interface TakeBatchInput {
page_id: number;
row_num: number;
claim: string;
kind: TakeKind;
holder: string;
weight?: number; // 0..1, default 0.5; clamped server-side
since_date?: string; // ISO date 'YYYY-MM-DD'
until_date?: string;
source?: string;
superseded_by?: number | null;
active?: boolean; // default true
}
/** Take row as returned by listTakes / searchTakes. */
export interface Take {
id: number;
page_id: number;
page_slug: string; // joined from pages
row_num: number;
claim: string;
kind: TakeKind;
holder: string;
weight: number;
since_date: string | null;
until_date: string | null;
source: string | null;
superseded_by: number | null;
active: boolean;
resolved_at: string | null;
resolved_outcome: boolean | null;
/**
* v0.30.0: 3-state outcome label. v0.36.1.1 added 'unresolvable' as a 4th
* state for verdicts where evidence was insufficient to grade. Sits
* alongside `resolved_outcome` for back-compat. New writes populate both;
* legacy v0.28-resolved rows have `resolved_quality` backfilled by
* migration v40 from the boolean. Null on unresolved rows. Schema CHECK
* (widened in v74) enforces (quality, outcome) consistency:
* `correct` ↔ `outcome=true`, `incorrect` ↔ `outcome=false`,
* `partial` ↔ `outcome=NULL`, `unresolvable` ↔ `outcome=NULL`.
*/
resolved_quality: 'correct' | 'incorrect' | 'partial' | 'unresolvable' | null;
resolved_value: number | null;
resolved_unit: string | null;
resolved_source: string | null;
resolved_by: string | null;
created_at: string;
updated_at: string;
}
export interface TakesListOpts {
page_id?: number;
page_slug?: string; // resolved via JOIN
holder?: string;
kind?: TakeKind;
active?: boolean; // default true (only active rows)
resolved?: boolean; // true = only resolved; false = only unresolved; undefined = both
/** Per-token MCP allow-list. Server applies AND holder = ANY($takesHoldersAllowList) when set. */
takesHoldersAllowList?: string[];
sortBy?: 'weight' | 'since_date' | 'created_at';
limit?: number;
offset?: number;
}
/** Search result row from searchTakes / searchTakesVector. */
export interface TakeHit {
take_id: number;
page_id: number;
page_slug: string;
row_num: number;
claim: string;
kind: TakeKind;
holder: string;
weight: number;
score: number; // search rank score (ts_rank for keyword, 1-cos_dist for vector)
}
/** v0.28 stale-takes row (mirrors StaleChunkRow shape). Embedding column intentionally omitted. */
export interface StaleTakeRow {
take_id: number;
page_slug: string;
row_num: number;
claim: string;
}
/** Resolution metadata for resolveTake. */
export interface TakeResolution {
/**
* v0.30.0: primary 3-state input; v0.36.1.1 widened to 4-state with
* 'unresolvable'. When set, takes precedence over `outcome` and the engine
* writes both columns (quality directly; outcome derived:
* `correct→true`, `incorrect→false`, `partial→null`, `unresolvable→null`).
* `unresolvable` marks rows where the judge ran but evidence was
* insufficient to grade; surfaces in `TakesScorecard.unresolvable_count`.
*/
quality?: 'correct' | 'incorrect' | 'partial' | 'unresolvable';
/**
* v0.28 back-compat input. Keep submitting for v0.28 callers; the engine
* derives quality (`true→correct`, `false→incorrect`). When `quality` is
* also set, `quality` wins. When neither is set, the engine throws.
* Mutually-exclusive with `quality === 'partial'` because partial isn't
* binary.
*/
outcome?: boolean;
value?: number;
unit?: string; // 'usd' | 'pct' | 'count' | other
source?: string;
resolvedBy: string; // slug or 'garry'
}
/** v0.30.0: scorecard aggregate. */
export interface TakesScorecard {
total_bets: number;
/**
* Count of resolved rows where `resolved_quality IN
* ('correct','incorrect','partial')`. v0.36.1.1 deliberately keeps this
* 3-state semantic to preserve historical comparisons. Unresolvable rows
* land in the sibling `unresolvable_count` field instead.
*/
resolved: number;
correct: number;
incorrect: number;
partial: number;
/** Accuracy = correct / (correct + incorrect). NULL when n=0. */
accuracy: number | null;
/**
* Brier score over rows where `resolved_quality IN ('correct','incorrect')`.
* Maps `correct→1`, `incorrect→0`, computes `mean((weight − outcome)²)`.
* Lower is better; 0 = perfect; 0.25 = always-50% baseline.
* Excludes partial AND unresolvable — both hide signal; the dedicated
* `partial_rate` and `unresolvable_rate` fields surface them separately.
* NULL when no correct+incorrect rows.
*/
brier: number | null;
/** partial / resolved. NULL when n=0. */
partial_rate: number | null;
/**
* v0.36.1.1: count of rows where `resolved_quality = 'unresolvable'`.
* Sibling field to `resolved` so historical comparisons against pre-v80
* scorecards stay valid; `resolved` retains its 3-state meaning, and
* unresolvable rows count here separately. Optional for SDK back-compat —
* downstream consumers constructing TakesScorecard fixtures shouldn't have
* to update on a hotfix. `finalizeScorecard` always populates it.
*/
unresolvable_count?: number;
/**
* v0.37.2.0: `unresolvable_count / (resolved + unresolvable_count)`. NULL
* when both are 0. Surfaces the spec's headline calibration signal:
* "what fraction of grade-attempted takes couldn't be graded?" — high
* values signal weak evidence retrieval rather than wrong predictions.
* Optional for SDK back-compat; see `unresolvable_count` note above.
*/
unresolvable_rate?: number | null;
}
export interface TakesScorecardOpts {
holder?: string;
domainPrefix?: string; // e.g. 'companies/' to scope the scorecard
since?: string; // ISO date 'YYYY-MM-DD'
until?: string; // ISO date 'YYYY-MM-DD'
}
/** v0.30.0: calibration curve bucket. */
export interface CalibrationBucket {
/** Lower bound of the weight bucket, inclusive. */
bucket_lo: number;
/** Upper bound, exclusive (except for the final bucket which is inclusive of 1.0). */
bucket_hi: number;
/** Count of resolved correct+incorrect bets falling in this weight range. */
n: number;
/** correct / n. NULL when n=0. */
observed: number | null;
/** mean(weight) within the bucket — what was predicted on average. NULL when n=0. */
predicted: number | null;
}
export interface CalibrationCurveOpts {
holder?: string;
bucketSize?: number; // default 0.1
}
/** Synthesis evidence row input (provenance from think synthesis pages). */
export interface SynthesisEvidenceInput {
synthesis_page_id: number;
take_page_id: number;
take_row_num: number;
citation_index: number;
}
/** Dream-cycle Haiku verdict on whether a transcript is worth processing. */
export interface DreamVerdict {
worth_processing: boolean;
reasons: string[];
judged_at: string;
}
/** Input shape for putDreamVerdict — judged_at defaults to now() server-side. */
export interface DreamVerdictInput {
worth_processing: boolean;
reasons: string[];
}
// ============================================================
// v0.31 Hot Memory: facts table + recall surface
// ============================================================
/** Allowed `facts.kind` values. Different decay halflives apply per kind. */
export type FactKind = 'event' | 'preference' | 'commitment' | 'belief' | 'fact';
export const ALL_FACT_KINDS: readonly FactKind[] = [
'event', 'preference', 'commitment', 'belief', 'fact',
] as const;
/** Visibility tier on a fact row. Mirrors takes' world-default ACL contract (D21). */
export type FactVisibility = 'private' | 'world';
/** Status returned by insertFact. */
export type FactInsertStatus = 'inserted' | 'duplicate' | 'superseded';
/** A fact row read from the facts table. */
export interface FactRow {
id: number;
source_id: string;
entity_slug: string | null;
fact: string;
kind: FactKind;
visibility: FactVisibility;
/**
* v0.31.2: salience tier the LLM assigned at extraction time. Surfaces
* to consumers (recall response, daily-page writer, admin dashboard,
* agents reading via MCP `_meta.brain_hot_memory`). Pre-v45 brains had
* no notability column; migration v46 backfills with default 'medium'.
*/
notability: 'high' | 'medium' | 'low';
context: string | null;
valid_from: Date;
valid_until: Date | null;
expired_at: Date | null;
superseded_by: number | null;
consolidated_at: Date | null;
consolidated_into: number | null;
source: string;
source_session: string | null;
confidence: number;
embedding: Float32Array | null;
embedded_at: Date | null;
created_at: Date;
}
/** Input for insertFact. source_id supplied via the ctx arg. */
export interface NewFact {
fact: string;
kind?: FactKind; // default 'fact'
entity_slug?: string | null;
visibility?: FactVisibility; // default 'private'
context?: string | null;
valid_from?: Date; // default now()
valid_until?: Date | null;
source: string; // 'mcp:put_page' | 'mcp:extract_facts' | 'cli:think' | etc
source_session?: string | null;
confidence?: number; // [0,1], default 1.0
notability?: 'high' | 'medium' | 'low'; // salience filter for extraction gate
embedding?: Float32Array | null; // pre-computed; if null, insertFact computes via gateway
/**
* v0.35.4 (D-CDX-5) — typed-claim fields. Optional. When populated,
* `gbrain eval trajectory` + `find_trajectory` MCP op consume them for
* chronological regression detection and drift_score. `claim_metric` is
* normalized to lowercase snake_case by the extraction layer before
* this method sees it; the engine stores verbatim.
*/
claim_metric?: string | null;
claim_value?: number | null;
claim_unit?: string | null;
claim_period?: string | null;
}
/** Options shared by list-facts methods. */
export interface FactListOpts {
/** Hide expired_at IS NOT NULL rows. Default true. */
activeOnly?: boolean;
limit?: number;
offset?: number;
/** Restrict to specific kinds. Default: all kinds. */
kinds?: FactKind[];
/**
* Visibility filter. When undefined, returns all. When set, only matches
* are returned. Remote (untrusted) callers must supply ['world'].
*/
visibility?: FactVisibility[];
}
/** Per-source operational health snapshot consumed by `gbrain doctor`. */
export interface FactsHealth {
source_id: string;
total_active: number; // facts where expired_at IS NULL
total_today: number; // created in last 24h
total_week: number; // created in last 7d
total_expired: number; // expired_at IS NOT NULL
total_consolidated: number; // consolidated_at IS NOT NULL
top_entities: Array<{ entity_slug: string; count: number }>;
/** Optional counters fed by the queue / classifier — populated when those modules report. */
drop_counter?: number;
classifier_fail_counter?: number;
p50_latency_ms?: number;
p99_latency_ms?: number;
}
/**
* v0.35.4 (D-CDX-6) — Options for `BrainEngine.findTrajectory`.
*
* `sourceId` (scalar fast path) and `sourceIds` (federated array) follow
* the v0.34.1.0 search* pattern: when `sourceIds` is set the engine
* applies `WHERE source_id = ANY($N::text[])`; otherwise scalar predicate
* with `sourceId ?? 'default'`.
*
* `remote` (D-CDX-1) gates the visibility filter: when true the engine
* adds `AND visibility = 'world'`, mirroring `recall`'s posture for
* untrusted callers. Local CLI keeps `remote: false` and sees both
* private + world facts.
*/
export interface TrajectoryOpts {
entitySlug: string;
/** Single-source scope; default 'default' when both this and sourceIds are unset. */
sourceId?: string;
/** Federated array scope (mutually exclusive with sourceId; the array wins when set). */
sourceIds?: string[];
/** When true, filters to visibility='world' only. Set by MCP layer from ctx.remote. */
remote?: boolean;
/** Metric filter. When set, only facts with this canonical metric label participate. */
metric?: string;
/** Lower bound on valid_from (inclusive). YYYY-MM-DD or full ISO. */
since?: string | Date;
/** Upper bound on valid_from (inclusive). YYYY-MM-DD or full ISO. */
until?: string | Date;
/** Cap on points returned. Default 100, max 500. */
limit?: number;
}
/**
* A single point in an entity's claim trajectory. Carries the typed-claim
* fields when populated (drives regression detection), the underlying
* fact text (for display), provenance (source_session, source_markdown_slug),
* and the raw embedding so the caller can compute drift_score without a
* second SQL round-trip.
*/
export interface TrajectoryPoint {
fact_id: number;
valid_from: Date;
metric: string | null;
value: number | null;
unit: string | null;
period: string | null;
text: string;
source_session: string | null;
source_markdown_slug: string | null;
/** Raw embedding for drift computation; null when the fact was inserted without one. */
embedding: Float32Array | null;
}
/** Maximum results returned by search operations. Internal bulk operations (listPages) are not clamped. */
export const MAX_SEARCH_LIMIT = 100;
/** Clamp a user-provided search limit to a safe range. */
export function clampSearchLimit(limit: number | undefined, defaultLimit = 20, cap = MAX_SEARCH_LIMIT): number {
if (limit === undefined || limit === null || !Number.isFinite(limit) || Number.isNaN(limit)) return defaultLimit;
if (limit <= 0) return defaultLimit;
return Math.min(Math.floor(limit), cap);
}
export interface BrainEngine {
/** Discriminator: lets migrations and other consumers branch on engine kind without instanceof + dynamic imports. */
readonly kind: 'postgres' | 'pglite';
// Lifecycle
connect(config: EngineConfig): Promise<void>;
disconnect(): Promise<void>;
initSchema(): Promise<void>;
transaction<T>(fn: (engine: BrainEngine) => Promise<T>): Promise<T>;
/**
* Run `fn` with a dedicated connection (Postgres: reserved backend;
* PGLite: pass-through). See `ReservedConnection` for semantics and
* usage constraints. Release is automatic.
*/
withReservedConnection<T>(fn: (conn: ReservedConnection) => Promise<T>): Promise<T>;
// Pages CRUD
/**
* Fetch a page by slug.
* v0.26.5: by default soft-deleted rows return null (matches the search
* filter contract). Pass `opts.includeDeleted: true` to surface them with
* `deleted_at` populated — used by `gbrain pages purge-deleted` listing,
* by `restore_page` flow, and by operator diagnostics.
*/
getPage(slug: string, opts?: GetPageOpts): Promise<Page | null>;
/**
* Insert or update a page. When `opts.sourceId` is omitted, the row is
* written under the schema DEFAULT ('default'). When provided, `source_id`
* is included in the INSERT column list so ON CONFLICT (source_id, slug)
* DO UPDATE actually targets the intended row instead of fabricating a
* duplicate at (default, slug). Multi-source brains MUST pass sourceId.
*/
putPage(slug: string, page: PageInput, opts?: { sourceId?: string }): Promise<Page>;
/**
* Hard-delete a page row. Cascades to content_chunks, page_links,
* chunk_relations via existing FK ON DELETE CASCADE.
*
* v0.26.5: this is no longer the public-facing `delete_page` op handler —
* the op now soft-deletes via `softDeletePage` instead. `deletePage` stays
* as the underlying primitive used by `purgeDeletedPages` and by callers
* that explicitly want hard-delete semantics (e.g. test setup teardown).
*/
/**
* v0.18.0+ multi-source: `opts.sourceId` scopes the DELETE so a source-A
* delete doesn't hard-delete the same-slug pages in sources B/C/D. Without
* it, the bare DELETE matches every row with that slug across all sources.
* Cascades through content_chunks / page_links / chunk_relations via FKs.
*/
deletePage(slug: string, opts?: { sourceId?: string }): Promise<void>;
/**
* v0.26.5 — set `deleted_at = now()` on a page. Returns the slug if a row
* was soft-deleted, null if no row matched (already soft-deleted OR not found).
* Idempotent-as-null. The page stays in the DB and cascade rows (chunks,
* links) stay intact; the autopilot purge phase hard-deletes after 72h.
*/
softDeletePage(slug: string, opts?: { sourceId?: string }): Promise<{ slug: string } | null>;
/**
* v0.26.5 — clear `deleted_at` on a soft-deleted page. Returns true iff a
* row was restored. False if the slug is unknown OR the page is not
* currently soft-deleted (idempotent-as-false).
*/
restorePage(slug: string, opts?: { sourceId?: string }): Promise<boolean>;
/**
* v0.26.5 — hard-delete pages whose `deleted_at` is older than the cutoff.
* Called by the autopilot purge phase and by the `gbrain pages purge-deleted`
* CLI escape hatch. Cascades through existing FKs.
*/
purgeDeletedPages(olderThanHours: number): Promise<{ slugs: string[]; count: number }>;
/**
* v0.26.5: by default `listPages` excludes soft-deleted rows. Set
* `filters.includeDeleted: true` to surface them.
*/
listPages(filters?: PageFilters): Promise<Page[]>;
resolveSlugs(partial: string): Promise<string[]>;
/**
* Returns the slug of every page in the brain. Used by batch commands as a
* mutation-immune iteration source (alternative to listPages OFFSET pagination,
* which is unstable when ordering by updated_at and writes are happening).
*
* v0.31.8 (D12): `opts.sourceId` scopes the result to a single source
* (used by the source-aware reconcileLinks path so wikilink resolution
* doesn't span unrelated sources). When omitted, returns the union of
* slugs across every source (pre-v0.31.8 behavior).
*/
getAllSlugs(opts?: { sourceId?: string }): Promise<Set<string>>;
/**
* v0.32.8: cross-source page enumeration. Returns one row per (slug,
* source_id) pair across the brain, ordered by (source_id, slug) for
* deterministic iteration on large brains. Used by extract-takes,
* extract, and integrity to replace the `getAllSlugs() → getPage(slug)`
* N+1 pattern, which silently defaulted to source_id='default' and
* skipped non-default-source pages.
*
* Cheap by design: only slug + source_id, not the full Page row. For
* loops that need page.compiled_truth / timeline / frontmatter, use
* `forEachPage` from src/core/engine-iter.ts instead.
*/
listAllPageRefs(): Promise<Array<{ slug: string; source_id: string }>>;
/**
* v0.37.0 — prefix-stratified page sampling for `gbrain brainstorm` / `gbrain lsd`
* domain-bank module. Takes a caller-supplied prefix list (cached at the domain-bank
* layer per D3), returns one page per prefix tiebroken by `connection_count`
* (LEFT JOIN to page_links, count of inbound links).
*
* Stale-bias (D5 / LSD): when `opts.staleBias === true`, ROW_NUMBER() ORDER BY
* prefers pages with `last_retrieved_at IS NULL` (never retrieved) > pages older
* than `staleThresholdDays` (default 90) > recently-retrieved.
*
* Source scoping (D5, codex r2 #2 fix): `sourceId` (scalar) and `sourceIds`
* (array, wins over scalar) per the [source-id-canonical-thread] pattern.
* Both threaded from day 1 even though v0.37.0 callers are CLI-local — D7
* MCP exposure ships zero-refactor.
*
* Soft-deleted pages (deleted_at IS NOT NULL) excluded automatically.
*/
listPrefixSampledPages(opts: DomainBankSampleOpts): Promise<DomainBankRow[]>;
/**
* v0.37.0 — corpus-sampling fallback for `gbrain brainstorm` when prefix-stratified
* can't fill M (small brain, single-prefix corpus). Random sample of N pages with
* the same exclusion + source-scope semantics as `listPrefixSampledPages`.
* Deterministic with `opts.seed` set; falls back to RANDOM() otherwise.
*
* Returns the same `DomainBankRow` shape so the orchestrator can union both
* sources of pages and dedup by slug+source_id.
*/
listCorpusSample(opts: CorpusSampleOpts): Promise<DomainBankRow[]>;
// Search
searchKeyword(query: string, opts?: SearchOpts): Promise<SearchResult[]>;
searchVector(embedding: Float32Array, opts?: SearchOpts): Promise<SearchResult[]>;
/**
* Hydrate embeddings for chunks already known by id. v0.36 (D9):
* optional `column` parameter selects which content_chunks column to
* fetch from (default 'embedding'). The dynamic-embedding-column
* search path hands its resolved column name here so cosineReScore
* rehydrates in the right embedding space — otherwise vector search
* against `embedding_voyage` would HNSW-rank against Voyage but
* rescore against OpenAI vectors (NaN / wrong rankings).
*
* The column name MUST be regex-validated by the caller (resolveEmbed-
* dingColumn rejects bad names). Engines identifier-quote on
* interpolation as defense in depth (D12).
*/
getEmbeddingsByChunkIds(ids: number[], column?: string): Promise<Map<number, Float32Array>>;
// Chunks
/**
* Replace the chunk set for a page. Internal page-id lookup is sourceId-
* scoped when `opts.sourceId` is given; without it, the schema DEFAULT
* matches and bare-slug lookup blows up if the same slug exists in
* multiple sources (Postgres 21000).
*/
upsertChunks(slug: string, chunks: ChunkInput[], opts?: { sourceId?: string }): Promise<void>;
/**
* Read every chunk for a page. `opts.sourceId` source-scopes the page
* lookup; without it, multi-source brains return chunks from every
* same-slug source (importCodeFile uses this for incremental embedding
* reuse, which would then attach the wrong source's embeddings).
*/
getChunks(slug: string, opts?: { sourceId?: string }): Promise<Chunk[]>;
/**
* Count chunks across the brain where embedding IS NULL.
* Pre-flight short-circuit for `embed --stale` so a 100%-embedded brain
* does no further work after a single SELECT count(*) (~50 bytes wire).
*
* `opts.sourceId` scopes the count to a single source. When omitted,
* counts across every source in the brain. Operators running
* `gbrain embed --stale --source media-corpus` expect only that
* source's NULLs touched; the caller threads `sourceId` here.
*/
countStaleChunks(opts?: { sourceId?: string }): Promise<number>;
/**
* Return every chunk where embedding IS NULL, with the metadata needed
* to call embedBatch + upsertChunks. The `embedding` column is omitted
* by design — stale rows have NULL embeddings, so shipping them wastes
* wire bytes for no gain. Caller groups by slug, embeds, and re-upserts.
*
* v0.33.3: cursor-paginated — yields up to `batchSize` rows per call
* (default 2000) to stay within Supabase's statement_timeout. Pass the
* last row's `(page_id, chunk_index)` as `afterPageId`/`afterChunkIndex`
* to fetch the next page. When fewer than `batchSize` rows come back,
* the caller has reached the end.
*
* `opts.sourceId` scopes the scan to a single source (matches the
* countStaleChunks contract). Paired with embedAllStale's --source
* support.
*/
listStaleChunks(opts?: {
batchSize?: number;
afterPageId?: number;
afterChunkIndex?: number;
sourceId?: string;
}): Promise<StaleChunkRow[]>;
/**
* Delete every chunk for a page. Internal page-id lookup is sourceId-scoped
* when `opts.sourceId` is given; otherwise the bare-slug subquery returns
* the wrong row count in multi-source brains.
*/
deleteChunks(slug: string, opts?: { sourceId?: string }): Promise<void>;
// Links
/**
* Single-row link insert. linkSource defaults to 'markdown' for back-compat
* with pre-v0.13 callers. Pass 'frontmatter' + originSlug + originField for
* frontmatter-derived edges; 'manual' for user-initiated edges.
*/
/**
* v0.18.0+ multi-source: each endpoint can live in a different source.
* `opts.fromSourceId` / `opts.toSourceId` / `opts.originSourceId` default to
* 'default'. Without these, the original cross-product `FROM pages f, pages t`
* fanned out across every source containing the slug.
*/
addLink(
from: string,
to: string,
context?: string,
linkType?: string,
linkSource?: string,
originSlug?: string,
originField?: string,
opts?: { fromSourceId?: string; toSourceId?: string; originSourceId?: string },
): Promise<void>;
/**
* Bulk insert links via a single multi-row INSERT...SELECT FROM (VALUES) JOIN pages
* statement with ON CONFLICT DO NOTHING. Returns the count of rows actually inserted
* (RETURNING clause excludes conflicts and JOIN-dropped rows whose slugs don't exist).
* Used by extract.ts to avoid 47K sequential round-trips on large brains.
*/
addLinksBatch(links: LinkBatchInput[]): Promise<number>;
/**
* Remove links from `from` to `to`. If linkType is provided, only that specific
* (from, to, type) row is removed. If omitted, ALL link types between the pair
* are removed (matches pre-multi-type-link behavior). linkSource additionally
* constrains the delete to a specific provenance ('frontmatter', 'markdown',
* 'manual') — used by runAutoLink reconciliation to avoid deleting edges from
* other provenances when pruning frontmatter-derived edges.
*/
removeLink(
from: string,
to: string,
linkType?: string,
linkSource?: string,
opts?: { fromSourceId?: string; toSourceId?: string },
): Promise<void>;
/**
* v0.31.8 (D12 + D16): `opts.sourceId` source-scopes the from-page lookup.
* When omitted, the read returns links from every same-slug page across
* sources (pre-v0.31.8 behavior; preserved via two-branch query in both
* engines). When set, the from-page filter becomes
* `WHERE f.slug = $1 AND f.source_id = $X`.
*/
getLinks(slug: string, opts?: { sourceId?: string }): Promise<Link[]>;
/**
* v0.31.8 (D12 + D16): same `opts.sourceId` semantics as `getLinks`,
* applied to the to-page side of the join.
*/
getBacklinks(slug: string, opts?: { sourceId?: string }): Promise<Link[]>;
/**
* Fuzzy-match a display name to a page slug using pg_trgm similarity.
* Zero embedding cost, zero LLM cost — designed for the v0.13 resolver used
* during migration/batch backfill where 5K+ lookups must stay sub-second.
*
* Returns the best match whose title similarity is at or above `minSimilarity`
* (default 0.55). If `dirPrefix` is given (e.g. 'people' or 'companies'),
* only slugs starting with that prefix are considered. Returns null when no
* page meets the threshold.
*
* Uses the `%` trigram operator (GIN-indexed) + the standard `similarity()`
* function. Both engines support pg_trgm (PGLite 0.3+, Postgres always).
*/
findByTitleFuzzy(
name: string,
dirPrefix?: string,
minSimilarity?: number,
): Promise<{ slug: string; similarity: number } | null>;
/**
* v0.34.1 (#861 — P0 leak seal): `opts.sourceId` / `opts.sourceIds`
* constrain visited nodes to a single source or array of sources.
* Pre-fix, the walk ignored source scope and an authenticated MCP
* client could enumerate cross-source topology + page metadata via
* the graph op. MCP-bound callers MUST pass the auth'd scope; local
* CLI callers omit it for the historical unscoped behavior.
*/
traverseGraph(
slug: string,
depth?: number,
opts?: { sourceId?: string; sourceIds?: string[] },
): Promise<GraphNode[]>;
/**
* Edge-based graph traversal with optional type and direction filters.
* Returns a list of edges (GraphPath[]) instead of nodes. Supports:
* - linkType: per-edge filter, only follows matching edges (per-edge semantics)
* - direction: 'in' (follow to->from), 'out' (follow from->to), 'both'
* - depth: max depth from root (default 5)
* - sourceId/sourceIds: v0.34.1 source-isolation filter, see traverseGraph
* Uses cycle prevention (visited array in recursive CTE).
*/
traversePaths(
slug: string,
opts?: { depth?: number; linkType?: string; direction?: 'in' | 'out' | 'both'; sourceId?: string; sourceIds?: string[] },
): Promise<GraphPath[]>;
/**
* For a list of slugs, return how many inbound links each has.
* Used by hybrid search backlink boost. Single SQL query, not N+1.
* Slugs with zero inbound links are present in the map with value 0.
*/
getBacklinkCounts(slugs: string[]): Promise<Map<string, number>>;
/**
* v0.27.0: for a list of slugs, return their updated_at timestamps (or created_at fallback).
* Used by hybrid search recency boost. Single SQL query, not N+1.
* Slugs with no timestamp get no entry in the map.
*
* @deprecated v0.29.1: prefer getEffectiveDates (composite-keyed, multi-source-safe).
* Kept for back-compat with PR #618 callers.
*/
getPageTimestamps(slugs: string[]): Promise<Map<string, Date>>;
/**
* v0.29.1: for a list of (slug, source_id) refs, return COALESCE(effective_date,
* updated_at) per ref. Single SQL query. Composite-keyed map (key format:
* `${source_id}::${slug}`) so multi-source brains don't conflate pages with
* the same slug across sources (codex pass-1 finding #3).
*
* Drives the new applyRecencyBoost post-fusion stage. Returns NULL for refs
* with no row; map omits them.
*/
getEffectiveDates(refs: Array<{slug: string; source_id: string}>): Promise<Map<string, Date>>;
/**
* v0.29.1: for a list of (slug, source_id) refs, return the salience score
* (emotional_weight × 5 + ln(1 + take_count)) per ref. Single SQL query.
* Composite-keyed (`${source_id}::${slug}`) like getEffectiveDates.
*
* Drives the new applySalienceBoost post-fusion stage. Pages with no row
* (or zero emotional_weight + zero takes) get score = 0; the boost stage
* skips them.
*/
getSalienceScores(refs: Array<{slug: string; source_id: string}>): Promise<Map<string, number>>;
/**
* Return every page with no inbound links (from any source).
* Domain comes from the frontmatter `domain` field (null if unset).
* The caller filters pseudo-pages + derives display domain.
* Used by `gbrain orphans` and `runCycle`'s orphan sweep phase.
*/
findOrphanPages(): Promise<Array<{ slug: string; title: string; domain: string | null }>>;
// Tags
/**
* v0.18.0+ multi-source: `opts.sourceId` scopes the page-id lookup. When
* omitted, the schema DEFAULT 'default' applies; in multi-source brains
* with the same slug across sources the bare-slug lookup returns >1 row
* and the INSERT/DELETE fails with Postgres 21000.
*/
addTag(slug: string, tag: string, opts?: { sourceId?: string }): Promise<void>;
removeTag(slug: string, tag: string, opts?: { sourceId?: string }): Promise<void>;
getTags(slug: string, opts?: { sourceId?: string }): Promise<string[]>;
// Timeline
/**
* Insert a timeline entry. By default verifies the page exists and throws if not.
* Pass opts.skipExistenceCheck=true for batch operations where the slug is already
* known to exist (e.g., from a getAllSlugs() snapshot). Duplicates are silently
* deduplicated by the (page_id, date, summary) UNIQUE index (ON CONFLICT DO NOTHING).
*/
/**
* Insert a timeline entry. By default verifies the page exists and throws if not.
* `opts.skipExistenceCheck` skips the pre-check for batch loops where the slug
* is already known to exist. `opts.sourceId` source-scopes both the existence
* check AND the page-id lookup inside the INSERT — required for multi-source
* brains where the slug exists in 2+ sources.
*/
addTimelineEntry(
slug: string,
entry: TimelineInput,
opts?: { skipExistenceCheck?: boolean; sourceId?: string },
): Promise<void>;
/**
* Bulk insert timeline entries via a single multi-row INSERT...SELECT FROM (VALUES)
* JOIN pages statement with ON CONFLICT DO NOTHING. Returns the count of rows
* actually inserted (RETURNING excludes conflicts and JOIN-dropped rows whose
* slugs don't exist). Used by extract.ts to avoid sequential round-trips.
*/
addTimelineEntriesBatch(entries: TimelineBatchInput[]): Promise<number>;
getTimeline(slug: string, opts?: TimelineOpts): Promise<TimelineEntry[]>;
// Raw data
/**
* v0.31.8 (D21): `opts.sourceId` source-scopes the page-id lookup. When
* omitted, the write targets the bare slug (pre-v0.31.8 behavior); the
* Postgres 21000 hazard for multi-source brains exists on this path.
* Multi-source callers MUST pass sourceId to land on the intended row.
*/
putRawData(slug: string, source: string, data: object, opts?: { sourceId?: string }): Promise<void>;
/**
* v0.31.8 (D21): `opts.sourceId` source-scopes the page-id lookup. Without
* it, multi-source brains return raw_data rows from every same-slug page
* (preserved via two-branch query for back-compat).
*/
getRawData(slug: string, source?: string, opts?: { sourceId?: string }): Promise<RawData[]>;
// Files (v0.27.1: binary asset metadata + storage_path. Image bytes never
// enter the DB; storage_path references a path inside the brain repo or an
// external store).
upsertFile(spec: FileSpec): Promise<{ id: number; created: boolean }>;
getFile(sourceId: string, storagePath: string): Promise<FileRow | null>;
listFilesForPage(pageId: number): Promise<FileRow[]>;
// ============================================================
// v0.28: Takes (typed/weighted/attributed claims) + synthesis evidence
// ============================================================
/**
* Bulk insert/upsert takes. Uses `unnest()` (Postgres) or manual `$N`
* placeholders (PGLite). Idempotency: ON CONFLICT (page_id, row_num) DO UPDATE
* — re-extract on a changed claim/weight updates the row in place.
* Returns the number of rows inserted OR updated.
*
* Weight outside [0, 1] is clamped server-side and surfaces a stderr
* warning per call (`TAKES_WEIGHT_CLAMPED`). Invalid `kind` values
* fail the whole batch via the CHECK constraint — caller is responsible
* for parser validation upstream.
*/
addTakesBatch(rows: TakeBatchInput[]): Promise<number>;
/** List takes filtered by holder/kind/active/etc. Resolves page_slug via JOIN. */
listTakes(opts?: TakesListOpts): Promise<Take[]>;
/**
* Keyword search across active takes. Uses pg_trgm similarity over claim text.
* Honors `takesHoldersAllowList` via WHERE filter so MCP-bound calls cannot
* retrieve holders outside the token's allow-list.
*/
searchTakes(query: string, opts?: SearchOpts & { takesHoldersAllowList?: string[] }): Promise<TakeHit[]>;
/**
* Vector search across active takes. Cosine distance against `embedding`.
* Skipped (returns []) when no embedding column has been populated yet.
*/
searchTakesVector(
embedding: Float32Array,
opts?: SearchOpts & { takesHoldersAllowList?: string[] },
): Promise<TakeHit[]>;
/** Look up embeddings by take id (mirrors getEmbeddingsByChunkIds). */
getTakeEmbeddings(ids: number[]): Promise<Map<number, Float32Array>>;
/** Pre-flight count for `gbrain embed --stale`. WHERE active AND embedding IS NULL. */
countStaleTakes(): Promise<number>;
/** List stale takes (no embedding column in payload — same pattern as listStaleChunks). */
listStaleTakes(): Promise<StaleTakeRow[]>;
/**
* Update a take's mutable fields. May NOT change claim/kind/holder per the
* supersession invariants — those route through supersedeTake. Throws
* `TAKE_ROW_NOT_FOUND` when (page_id, row_num) doesn't exist.
*/
updateTake(
pageId: number,
rowNum: number,
fields: { weight?: number; since_date?: string; source?: string },
): Promise<void>;
/**
* Supersede the take at (page_id, oldRow). Marks old row active=false +
* sets superseded_by; appends new row at the next row_num for the page;
* returns both row_nums. Atomic (transactional). Cycle prevention: if newRow
* sets superseded_by pointing to a chain that comes back to oldRow, throws
* `TAKES_SUPERSEDE_CYCLE`. Resolved bets (`resolved_at IS NOT NULL`) cannot
* be superseded — throws `TAKE_RESOLVED_IMMUTABLE`.
*/
supersedeTake(
pageId: number,
oldRow: number,
newRow: Omit<TakeBatchInput, 'page_id' | 'row_num' | 'superseded_by'>,
): Promise<{ oldRow: number; newRow: number }>;
/**
* Resolve a bet (or take). Sets resolved_* columns. Immutable: re-resolve
* attempts throw `TAKE_ALREADY_RESOLVED`. Use supersede to express a new bet.
*
* v0.30.0: accepts either `quality` (3-state, primary) or `outcome` (boolean,
* back-compat). When both set, `quality` wins. The engine writes BOTH columns
* derived from whichever input was given: `quality='correct'/'incorrect'` →
* `outcome=true/false`; `quality='partial'` → `outcome=NULL`. The schema
* `takes_resolution_consistency` CHECK constraint catches contradictory
* states at the DB layer as a defense-in-depth backstop.
*/
resolveTake(pageId: number, rowNum: number, resolution: TakeResolution): Promise<void>;
/**
* v0.30.0: aggregate calibration scorecard. Pure SQL aggregation; no LLM.