From 1e70d60743fb692ab1f3d8c5c8393dd789374026 Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Fri, 21 Nov 2025 11:55:01 -0500 Subject: [PATCH 1/8] feat: Add graph data structure and transformer pipeline for content extraction. --- packages/evalite/src/generation/graph.ts | 69 ++++++++++ .../transformers/chunk-extractor.ts | 37 +++++ .../transformers/embed-extractor.ts | 53 ++++++++ .../transformers/entity-extractor.ts | 126 ++++++++++++++++++ .../transformers/summary-extractor.ts | 75 +++++++++++ .../transformers/topic-extractor.ts | 83 ++++++++++++ .../generation/transformers/transformer.ts | 32 +++++ 7 files changed, 475 insertions(+) create mode 100644 packages/evalite/src/generation/graph.ts create mode 100644 packages/evalite/src/generation/transformers/chunk-extractor.ts create mode 100644 packages/evalite/src/generation/transformers/embed-extractor.ts create mode 100644 packages/evalite/src/generation/transformers/entity-extractor.ts create mode 100644 packages/evalite/src/generation/transformers/summary-extractor.ts create mode 100644 packages/evalite/src/generation/transformers/topic-extractor.ts create mode 100644 packages/evalite/src/generation/transformers/transformer.ts diff --git a/packages/evalite/src/generation/graph.ts b/packages/evalite/src/generation/graph.ts new file mode 100644 index 00000000..7fcb3be6 --- /dev/null +++ b/packages/evalite/src/generation/graph.ts @@ -0,0 +1,69 @@ +export class Graph { + private nodes: Map> = new Map(); + + constructor(nodes?: Node[]) { + if (nodes) { + nodes.forEach((node) => this.addNode(node)); + } + } + + addNode(node: Node) { + this.nodes.set(node.id, node); + } + + getNode(id: string) { + return this.nodes.get(id); + } + + getNodes() { + return this.nodes; + } + + addEdge(node1: string, node2: string, type: string) { + const node1Node = this.nodes.get(node1); + const node2Node = this.nodes.get(node2); + if (!node1Node || !node2Node) { + throw new Error("One or more nodes not found"); + } + node1Node.addEdge(new Edge(node1Node, node2Node, type)); + } +} + +export class Node { + data: T; + readonly type: "document" | "chunk"; + private edges: Map> = new Map(); + + constructor( + readonly id: string, + type: "document" | "chunk", + data: T + ) { + this.type = type; + this.data = data; + } + + addEdge(edge: Edge) { + this.edges.set(edge.to.id, edge); + } + + getEdges() { + return Array.from(this.edges.values()); + } +} + +export class Edge { + constructor( + readonly from: Node, + readonly to: Node, + readonly type: string + ) {} +} + +export function graph(nodes?: Node[]) { + return new Graph(nodes); +} + +export function node(type: "document" | "chunk", data: T) { + return new Node(crypto.randomUUID(), type, data); +} diff --git a/packages/evalite/src/generation/transformers/chunk-extractor.ts b/packages/evalite/src/generation/transformers/chunk-extractor.ts new file mode 100644 index 00000000..d581a5f3 --- /dev/null +++ b/packages/evalite/src/generation/transformers/chunk-extractor.ts @@ -0,0 +1,37 @@ +import { Edge, Graph, Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; + +export type ChunkerFn = (content: string) => string[]; + +export function chunkExtractor({ + chunker, + filter, +}: { + chunker: ChunkerFn; + filter?: (node: Node) => boolean; +}): Transformer> { + return async (graph: Graph) => { + const newGraph = new Graph<{ content: string } & Partial>(); + + for (const node of graph.getNodes().values()) { + if (filter && !filter(node)) { + newGraph.addNode(new Node(node.id, node.type, node.data)); + continue; + } + + const chunks = chunker(node.data.content); + newGraph.addNode(new Node(node.id, node.type, node.data)); + + for (const chunk of chunks) { + const newNode = new Node(crypto.randomUUID(), "chunk", { + content: chunk, + } as { content: string } & Partial); + newGraph.addNode(newNode); + newGraph.addEdge(node.id, newNode.id, "chunk" as const); + newGraph.addEdge(newNode.id, node.id, "parent" as const); + } + } + + return newGraph; + }; +} diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts new file mode 100644 index 00000000..af86c1e0 --- /dev/null +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -0,0 +1,53 @@ +import { embed, type EmbeddingModel } from "ai"; +import { Graph, Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; + +export function embedExtractor< + TInput extends { content: string }, + Field extends keyof TInput & string, +>({ + model, + field, + filter, +}: { + model: EmbeddingModel; + field: Field; + filter?: (node: Node) => boolean; +}): Transformer { + return async (graph: Graph) => { + const nodes: Node[] = + []; + + for (const node of graph.getNodes().values()) { + if (filter && !filter(node)) { + nodes.push( + new Node(node.id, node.type, { + ...node.data, + }) + ); + continue; + } + const value = node.data[field]; + + if (typeof value !== "string") { + throw new Error( + `Field "${field}" must be a string to be embedded. Found type: ${typeof value}` + ); + } + + const { embedding } = await embed({ + model, + value, + }); + + const newData = { + ...node.data, + [`${field}Embedding`]: embedding, + } as TInput & { [K in `${Field}Embedding`]: number[] }; + + nodes.push(new Node(node.id, node.type, newData)); + } + + return new Graph(nodes); + }; +} diff --git a/packages/evalite/src/generation/transformers/entity-extractor.ts b/packages/evalite/src/generation/transformers/entity-extractor.ts new file mode 100644 index 00000000..7040d3fc --- /dev/null +++ b/packages/evalite/src/generation/transformers/entity-extractor.ts @@ -0,0 +1,126 @@ +import { Graph, Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; +import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { promptBuilder } from "../../scorers/prompt-builder.js"; + +const EntitiesSchema = jsonSchema<{ + entities: Array<{ + type: string; + value: string; + description?: string; + }>; +}>({ + type: "object", + properties: { + entities: { + type: "array", + items: { + type: "object", + properties: { + type: { + type: "string", + description: + "The type or category of the entity (e.g., PERSON, ORGANIZATION, LOCATION, DATE, etc.)", + }, + value: { + type: "string", + description: "The actual entity value extracted from the content", + }, + description: { + type: "string", + description: + "Optional additional context or description about the entity", + }, + }, + required: ["type", "value"], + }, + }, + }, + required: ["entities"], +}); + +const extractEntitiesPrompt = promptBuilder({ + prompt: + "Extract all named entities from the provided content. Identify entities such as people, organizations, locations, dates, products, or any other relevant entities. For each entity, provide its type, value, and optionally a brief description. Output JSON following the required schema.", + examples: [ + { + input: { + content: + "Apple Inc. announced that Tim Cook will speak at the conference in San Francisco on March 15, 2024.", + }, + output: { + entities: [ + { + type: "ORGANIZATION", + value: "Apple Inc.", + description: "Technology company", + }, + { + type: "PERSON", + value: "Tim Cook", + description: "CEO of Apple", + }, + { + type: "LOCATION", + value: "San Francisco", + description: "City location of the conference", + }, + { + type: "DATE", + value: "March 15, 2024", + description: "Conference date", + }, + ], + }, + }, + ], + task: ["content"], +}); + +export function entityExtractor({ + model, + filter, +}: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer< + TInput, + TInput & { + entities?: { type: string; value: string; description?: string }[]; + } +> { + return async (graph: Graph) => { + const nodes: Node< + TInput & { + entities?: { type: string; value: string; description?: string }[]; + } + >[] = []; + + for (const node of graph.getNodes().values()) { + if (filter && !filter(node)) { + nodes.push( + new Node(node.id, node.type, { + ...node.data, + }) + ); + continue; + } + const result = await generateObject({ + model, + schema: EntitiesSchema, + prompt: extractEntitiesPrompt({ + content: node.data.content, + }), + }); + + nodes.push( + new Node(node.id, node.type, { + ...node.data, + entities: result.object.entities, + }) + ); + } + + return new Graph(nodes); + }; +} diff --git a/packages/evalite/src/generation/transformers/summary-extractor.ts b/packages/evalite/src/generation/transformers/summary-extractor.ts new file mode 100644 index 00000000..fd99657d --- /dev/null +++ b/packages/evalite/src/generation/transformers/summary-extractor.ts @@ -0,0 +1,75 @@ +import { Graph, Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; +import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { promptBuilder } from "../../scorers/prompt-builder.js"; + +const SummarySchema = jsonSchema<{ + summary: string; +}>({ + type: "object", + properties: { + summary: { + type: "string", + description: + "A concise summary of the content, capturing the main points and key information", + }, + }, + required: ["summary"], +}); + +const extractSummaryPrompt = promptBuilder({ + prompt: + "Generate a concise summary of the provided content. Capture the main points, key information, and essential details in a clear and coherent way. The summary should be comprehensive yet brief. Output JSON following the required schema.", + examples: [ + { + input: { + content: + "Apple Inc. announced that Tim Cook will speak at the conference in San Francisco on March 15, 2024. The conference will focus on the future of technology and innovation. Cook is expected to discuss Apple's latest developments in artificial intelligence and their vision for integrating AI into consumer products. Industry experts anticipate major announcements regarding new product lines.", + }, + output: { + summary: + "Apple's CEO Tim Cook will speak at a technology conference in San Francisco on March 15, 2024, focusing on AI developments and Apple's vision for AI-integrated consumer products, with anticipated announcements of new product lines.", + }, + }, + ], + task: ["content"], +}); + +export function summaryExtractor({ + model, + filter, +}: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer { + return async (graph: Graph) => { + const nodes: Node[] = []; + + for (const node of graph.getNodes().values()) { + if (filter && !filter(node)) { + nodes.push( + new Node(node.id, node.type, { + ...node.data, + }) + ); + continue; + } + const result = await generateObject({ + model, + schema: SummarySchema, + prompt: extractSummaryPrompt({ + content: node.data.content, + }), + }); + + nodes.push( + new Node(node.id, node.type, { + ...node.data, + summary: result.object.summary, + }) + ); + } + + return new Graph(nodes); + }; +} diff --git a/packages/evalite/src/generation/transformers/topic-extractor.ts b/packages/evalite/src/generation/transformers/topic-extractor.ts new file mode 100644 index 00000000..40700e80 --- /dev/null +++ b/packages/evalite/src/generation/transformers/topic-extractor.ts @@ -0,0 +1,83 @@ +import { Graph, Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; +import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { promptBuilder } from "../../scorers/prompt-builder.js"; + +const TopicSchema = jsonSchema<{ + topics: string[]; +}>({ + type: "object", + properties: { + topics: { + type: "array", + items: { + type: "string", + description: "A key topic or keyword extracted from the content", + }, + }, + }, + required: ["topics"], +}); + +const extractTopicPrompt = promptBuilder({ + prompt: + "Extract a list of key topics or keywords from the provided content. These should represent the main themes or subjects discussed. Output JSON following the required schema.", + examples: [ + { + input: { + content: + "Machine learning is a field of inquiry devoted to understanding and building methods that 'learn', that is, methods that leverage data to improve performance on some set of tasks.", + }, + output: { + topics: [ + "Machine Learning", + "Artificial Intelligence", + "Data Science", + "Algorithms", + ], + }, + }, + ], + task: ["content"], +}); + +export function topicExtractor({ + model, + filter, +}: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer { + return async (graph: Graph) => { + const nodes: Node[] = []; + + for (const node of graph.getNodes().values()) { + if (filter && !filter(node)) { + nodes.push( + new Node(node.id, node.type, { + ...node.data, + }) + ); + continue; + } + const result = await generateObject({ + model, + schema: TopicSchema, + prompt: extractTopicPrompt({ + content: node.data.content, + }), + }); + + nodes.push( + new Node(node.id, node.type, { + ...node.data, + topics: result.object.topics.map((topic) => + topic.trim().toLowerCase() + ), + }) + ); + } + + return new Graph(nodes); + }; +} diff --git a/packages/evalite/src/generation/transformers/transformer.ts b/packages/evalite/src/generation/transformers/transformer.ts new file mode 100644 index 00000000..ad56c350 --- /dev/null +++ b/packages/evalite/src/generation/transformers/transformer.ts @@ -0,0 +1,32 @@ +import type { Graph } from "../graph.js"; + +export type Transformer = ( + graph: Graph +) => PromiseLike>; + +export type TransformerPipeline = { + pipe( + transformer: Transformer + ): TransformerPipeline; + build(): Promise>; +}; + +export function transform( + graph: Graph +): TransformerPipeline { + const createPipeline = ( + currentGraph: PromiseLike> + ): TransformerPipeline => ({ + pipe(transformer: Transformer) { + const nextGraph = Promise.resolve(currentGraph).then((resolvedGraph) => + transformer(resolvedGraph) + ); + return createPipeline(nextGraph); + }, + build() { + return Promise.resolve(currentGraph); + }, + }); + + return createPipeline(Promise.resolve(graph)); +} From 589048859c4cd34198bfcccb1414d901a51b2c65 Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Fri, 21 Nov 2025 15:05:55 -0500 Subject: [PATCH 2/8] feat: Add Jaccard and embedding similarity transformers, update embed extractor. --- .../transformers/embed-extractor.ts | 5 +- .../transformers/embedding-similarity.ts | 50 ++++++++++++++++ .../transformers/jaccard-similarity.ts | 57 +++++++++++++++++++ 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 packages/evalite/src/generation/transformers/embedding-similarity.ts create mode 100644 packages/evalite/src/generation/transformers/jaccard-similarity.ts diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index af86c1e0..2a366125 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -1,6 +1,7 @@ -import { embed, type EmbeddingModel } from "ai"; +import { embed } from "ai"; import { Graph, Node } from "../graph.js"; import type { Transformer } from "./transformer.js"; +import type { EmbeddingModelV2 } from "@ai-sdk/provider"; export function embedExtractor< TInput extends { content: string }, @@ -10,7 +11,7 @@ export function embedExtractor< field, filter, }: { - model: EmbeddingModel; + model: EmbeddingModelV2; field: Field; filter?: (node: Node) => boolean; }): Transformer { diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts new file mode 100644 index 00000000..b8632805 --- /dev/null +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -0,0 +1,50 @@ +import { cosineSimilarity } from "ai"; +import type { Transformer } from "./transformer.js"; +import { Graph, Node } from "../graph.js"; + +export function embeddingSimilarity< + TInput, + TKey extends keyof TInput & string, +>({ + property, + filter, + threshold = 0.5, +}: { + property: TKey; + filter?: (node: Node) => boolean; + threshold?: number; +}): Transformer { + return async (graph: Graph) => { + const nodes = Array.from(graph.getNodes().values()); + + for (let i = 0; i < nodes.length; i++) { + for (let j = i + 1; j < nodes.length; j++) { + const nodeA = nodes[i]; + const nodeB = nodes[j]; + + if (!nodeA || !nodeB) continue; + + if (filter && (!filter(nodeA) || !filter(nodeB))) { + continue; + } + + const valueA = nodeA.data[property]; + const valueB = nodeB.data[property]; + + if (!valueA || !valueB) continue; + + if (!Array.isArray(valueA) || !Array.isArray(valueB)) { + continue; + } + + const similarity = cosineSimilarity(valueA, valueB); + + if (similarity > threshold) { + graph.addEdge(nodeA.id, nodeB.id, "similarity"); + } + } + } + + return graph; + }; +} diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts new file mode 100644 index 00000000..1191467f --- /dev/null +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -0,0 +1,57 @@ +import type { Transformer } from "./transformer.js"; +import { Graph, Node } from "../graph.js"; + +export function jaccardSimilarity({ + property, + filter, + threshold = 0.5, +}: { + property: TKey; + filter?: (node: Node) => boolean; + threshold?: number; +}): Transformer { + return async (graph: Graph) => { + const nodes = Array.from(graph.getNodes().values()); + + for (let i = 0; i < nodes.length; i++) { + for (let j = i + 1; j < nodes.length; j++) { + const nodeA = nodes[i]; + const nodeB = nodes[j]; + + if (!nodeA || !nodeB) continue; + + if (filter && (!filter(nodeA) || !filter(nodeB))) { + continue; + } + + const valueA = nodeA.data[property]; + const valueB = nodeB.data[property]; + + if (!valueA || !valueB) continue; + + const setA = new Set( + Array.isArray(valueA) + ? valueA + : String(valueA).toLowerCase().split(/\s+/) + ); + const setB = new Set( + Array.isArray(valueB) + ? valueB + : String(valueB).toLowerCase().split(/\s+/) + ); + + const intersection = new Set([...setA].filter((x) => setB.has(x))); + const union = new Set([...setA, ...setB]); + + const similarity = + union.size === 0 ? 0 : intersection.size / union.size; + + if (similarity > threshold) { + graph.addEdge(nodeA.id, nodeB.id, "similarity"); + } + } + } + + return graph; + }; +} From 4ca8d48d08ba28e404fb559783353108fa000ab0 Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Mon, 24 Nov 2025 13:38:26 -0500 Subject: [PATCH 3/8] feat: Introduce generic edge type data maps to transformers and graph, enabling explicit edge data and type definitions. --- packages/evalite/src/generation/graph.ts | 73 +++++++++++++------ .../transformers/chunk-extractor.ts | 39 +++++++--- .../transformers/embed-extractor.ts | 57 +++++++-------- .../transformers/embedding-similarity.ts | 30 ++++++-- .../transformers/jaccard-similarity.ts | 35 +++++++-- .../transformers/summary-extractor.ts | 40 +++++----- .../transformers/topic-extractor.ts | 42 ++++++----- .../generation/transformers/transformer.ts | 58 +++++++++++---- 8 files changed, 245 insertions(+), 129 deletions(-) diff --git a/packages/evalite/src/generation/graph.ts b/packages/evalite/src/generation/graph.ts index 7fcb3be6..7e3b0177 100644 --- a/packages/evalite/src/generation/graph.ts +++ b/packages/evalite/src/generation/graph.ts @@ -1,13 +1,28 @@ -export class Graph { - private nodes: Map> = new Map(); +export type Edge< + TNodeData, + TEdgeTypeDataMap extends Record = {}, +> = { + [K in keyof TEdgeTypeDataMap]: { + type: K; + data: TEdgeTypeDataMap[K]; + from: Node; + to: Node; + }; +}[keyof TEdgeTypeDataMap]; - constructor(nodes?: Node[]) { +export class Graph< + TNodeData, + TEdgeTypeDataMap extends Record = {}, +> { + private nodes: Map> = new Map(); + + constructor(nodes?: Node[]) { if (nodes) { nodes.forEach((node) => this.addNode(node)); } } - addNode(node: Node) { + addNode(node: Node) { this.nodes.set(node.id, node); } @@ -19,31 +34,45 @@ export class Graph { return this.nodes; } - addEdge(node1: string, node2: string, type: string) { + addEdge( + node1: string, + node2: string, + type: K, + data: TEdgeTypeDataMap[K] + ) { const node1Node = this.nodes.get(node1); const node2Node = this.nodes.get(node2); if (!node1Node || !node2Node) { throw new Error("One or more nodes not found"); } - node1Node.addEdge(new Edge(node1Node, node2Node, type)); + const edge = { + from: node1Node, + to: node2Node, + type, + data, + } as Edge; + node1Node.addEdge(edge); } } -export class Node { - data: T; +export class Node< + TNodeData, + TEdgeTypeDataMap extends Record = {}, +> { + data: TNodeData; readonly type: "document" | "chunk"; - private edges: Map> = new Map(); + private edges: Map> = new Map(); constructor( readonly id: string, type: "document" | "chunk", - data: T + data: TNodeData ) { this.type = type; this.data = data; } - addEdge(edge: Edge) { + addEdge(edge: Edge) { this.edges.set(edge.to.id, edge); } @@ -52,18 +81,16 @@ export class Node { } } -export class Edge { - constructor( - readonly from: Node, - readonly to: Node, - readonly type: string - ) {} -} - -export function graph(nodes?: Node[]) { - return new Graph(nodes); +export function graph< + TNodeData, + TEdgeTypeDataMap extends Record = {}, +>(nodes?: Node[]) { + return new Graph(nodes); } -export function node(type: "document" | "chunk", data: T) { - return new Node(crypto.randomUUID(), type, data); +export function node< + TNodeData, + TEdgeTypeDataMap extends Record = {}, +>(type: "document" | "chunk", data: TNodeData) { + return new Node(crypto.randomUUID(), type, data); } diff --git a/packages/evalite/src/generation/transformers/chunk-extractor.ts b/packages/evalite/src/generation/transformers/chunk-extractor.ts index d581a5f3..ae84aae6 100644 --- a/packages/evalite/src/generation/transformers/chunk-extractor.ts +++ b/packages/evalite/src/generation/transformers/chunk-extractor.ts @@ -1,17 +1,28 @@ -import { Edge, Graph, Node } from "../graph.js"; +import { Graph, Node, type Edge } from "../graph.js"; import type { Transformer } from "./transformer.js"; export type ChunkerFn = (content: string) => string[]; -export function chunkExtractor({ +export function chunkExtractor< + TInput extends { content: string }, + TInputEdgeTypeDataMap extends Record = {}, +>({ chunker, filter, }: { chunker: ChunkerFn; - filter?: (node: Node) => boolean; -}): Transformer> { - return async (graph: Graph) => { - const newGraph = new Graph<{ content: string } & Partial>(); + filter?: (node: Node) => boolean; +}): Transformer< + TInput, + { content: string } & Partial, + TInputEdgeTypeDataMap, + TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } +> { + return async (graph: Graph) => { + const newGraph = new Graph< + { content: string } & Partial, + TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } + >(); for (const node of graph.getNodes().values()) { if (filter && !filter(node)) { @@ -20,15 +31,23 @@ export function chunkExtractor({ } const chunks = chunker(node.data.content); - newGraph.addNode(new Node(node.id, node.type, node.data)); + newGraph.addNode( + new Node< + { content: string } & Partial, + TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } + >(node.id, node.type, node.data) + ); for (const chunk of chunks) { - const newNode = new Node(crypto.randomUUID(), "chunk", { + const newNode = new Node< + { content: string } & Partial, + TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } + >(crypto.randomUUID(), "chunk", { content: chunk, } as { content: string } & Partial); newGraph.addNode(newNode); - newGraph.addEdge(node.id, newNode.id, "chunk" as const); - newGraph.addEdge(newNode.id, node.id, "parent" as const); + newGraph.addEdge(node.id, newNode.id, "chunk", undefined as any); + newGraph.addEdge(newNode.id, node.id, "parent", undefined as any); } } diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index 2a366125..864bf2c5 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -1,54 +1,49 @@ import { embed } from "ai"; import { Graph, Node } from "../graph.js"; import type { Transformer } from "./transformer.js"; -import type { EmbeddingModelV2 } from "@ai-sdk/provider"; +import type { EmbeddingModel } from "ai"; export function embedExtractor< - TInput extends { content: string }, - Field extends keyof TInput & string, + TInput, + TKey extends keyof TInput, + TEdgeTypeDataMap extends Record = {}, >({ model, field, filter, }: { - model: EmbeddingModelV2; - field: Field; - filter?: (node: Node) => boolean; -}): Transformer { - return async (graph: Graph) => { - const nodes: Node[] = - []; + model: EmbeddingModel; + field: TKey; + filter?: (node: Node) => boolean; +}): Transformer< + TInput, + TInput & { embedding: number[] }, + TEdgeTypeDataMap, + TEdgeTypeDataMap +> { + return async (graph: Graph) => { + const nodes = Array.from(graph.getNodes().values()); - for (const node of graph.getNodes().values()) { - if (filter && !filter(node)) { - nodes.push( - new Node(node.id, node.type, { - ...node.data, - }) - ); - continue; - } - const value = node.data[field]; + for (const node of nodes) { + if (filter && !filter(node)) continue; + if (node.data[field] == null) continue; - if (typeof value !== "string") { - throw new Error( - `Field "${field}" must be a string to be embedded. Found type: ${typeof value}` - ); - } + const value = String(node.data[field]); const { embedding } = await embed({ model, value, }); - const newData = { + node.data = { ...node.data, - [`${field}Embedding`]: embedding, - } as TInput & { [K in `${Field}Embedding`]: number[] }; - - nodes.push(new Node(node.id, node.type, newData)); + embedding, + }; } - return new Graph(nodes); + return graph as unknown as Graph< + TInput & { embedding: number[] }, + TEdgeTypeDataMap + >; }; } diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts index b8632805..0e599abe 100644 --- a/packages/evalite/src/generation/transformers/embedding-similarity.ts +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -5,16 +5,24 @@ import { Graph, Node } from "../graph.js"; export function embeddingSimilarity< TInput, TKey extends keyof TInput & string, + TInputEdgeTypeDataMap extends Record = {}, >({ property, filter, threshold = 0.5, }: { property: TKey; - filter?: (node: Node) => boolean; + filter?: (node: Node) => boolean; threshold?: number; -}): Transformer { - return async (graph: Graph) => { +}): Transformer< + TInput, + TInput, + TInputEdgeTypeDataMap, + TInputEdgeTypeDataMap & { + [K in `${Uppercase}_EMBEDDING_SIMILARITY`]: { score: number }; + } +> { + return async (graph: Graph) => { const nodes = Array.from(graph.getNodes().values()); for (let i = 0; i < nodes.length; i++) { @@ -40,11 +48,23 @@ export function embeddingSimilarity< const similarity = cosineSimilarity(valueA, valueB); if (similarity > threshold) { - graph.addEdge(nodeA.id, nodeB.id, "similarity"); + (graph as any).addEdge( + nodeA.id, + nodeB.id, + `${property.toUpperCase()}_EMBEDDING_SIMILARITY`, + { + score: similarity, + } + ); } } } - return graph; + return graph as unknown as Graph< + TInput, + TInputEdgeTypeDataMap & { + [K in `${Uppercase}_EMBEDDING_SIMILARITY`]: { score: number }; + } + >; }; } diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts index 1191467f..118fec6a 100644 --- a/packages/evalite/src/generation/transformers/jaccard-similarity.ts +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -1,16 +1,27 @@ import type { Transformer } from "./transformer.js"; import { Graph, Node } from "../graph.js"; -export function jaccardSimilarity({ +export function jaccardSimilarity< + TInput, + TKey extends keyof TInput & string, + TInputEdgeTypeDataMap extends Record = {}, +>({ property, filter, threshold = 0.5, }: { property: TKey; - filter?: (node: Node) => boolean; + filter?: (node: Node) => boolean; threshold?: number; -}): Transformer { - return async (graph: Graph) => { +}): Transformer< + TInput, + TInput, + TInputEdgeTypeDataMap, + TInputEdgeTypeDataMap & { + [K in `${Uppercase}_JACCARD_SIMILARITY`]: { score: number }; + } +> { + return async (graph: Graph) => { const nodes = Array.from(graph.getNodes().values()); for (let i = 0; i < nodes.length; i++) { @@ -47,11 +58,23 @@ export function jaccardSimilarity({ union.size === 0 ? 0 : intersection.size / union.size; if (similarity > threshold) { - graph.addEdge(nodeA.id, nodeB.id, "similarity"); + (graph as any).addEdge( + nodeA.id, + nodeB.id, + `${property.toUpperCase()}_JACCARD_SIMILARITY`, + { + score: similarity, + } + ); } } } - return graph; + return graph as unknown as Graph< + TInput, + TInputEdgeTypeDataMap & { + [K in `${Uppercase}_JACCARD_SIMILARITY`]: { score: number }; + } + >; }; } diff --git a/packages/evalite/src/generation/transformers/summary-extractor.ts b/packages/evalite/src/generation/transformers/summary-extractor.ts index fd99657d..bf41da57 100644 --- a/packages/evalite/src/generation/transformers/summary-extractor.ts +++ b/packages/evalite/src/generation/transformers/summary-extractor.ts @@ -35,23 +35,26 @@ const extractSummaryPrompt = promptBuilder({ task: ["content"], }); -export function summaryExtractor({ +export function summaryExtractor< + TInput extends { content: string }, + TEdgeTypeDataMap extends Record = {}, +>({ model, filter, }: { model: LanguageModel; - filter?: (node: Node) => boolean; -}): Transformer { - return async (graph: Graph) => { - const nodes: Node[] = []; + filter?: (node: Node) => boolean; +}): Transformer< + TInput, + TInput & { summary?: string }, + TEdgeTypeDataMap, + TEdgeTypeDataMap +> { + return async (graph: Graph) => { + const nodes = Array.from(graph.getNodes().values()); - for (const node of graph.getNodes().values()) { + for (const node of nodes) { if (filter && !filter(node)) { - nodes.push( - new Node(node.id, node.type, { - ...node.data, - }) - ); continue; } const result = await generateObject({ @@ -62,14 +65,15 @@ export function summaryExtractor({ }), }); - nodes.push( - new Node(node.id, node.type, { - ...node.data, - summary: result.object.summary, - }) - ); + node.data = { + ...node.data, + summary: result.object.summary, + }; } - return new Graph(nodes); + return graph as unknown as Graph< + TInput & { summary?: string }, + TEdgeTypeDataMap + >; }; } diff --git a/packages/evalite/src/generation/transformers/topic-extractor.ts b/packages/evalite/src/generation/transformers/topic-extractor.ts index 40700e80..3ed1e1a1 100644 --- a/packages/evalite/src/generation/transformers/topic-extractor.ts +++ b/packages/evalite/src/generation/transformers/topic-extractor.ts @@ -41,23 +41,26 @@ const extractTopicPrompt = promptBuilder({ task: ["content"], }); -export function topicExtractor({ +export function topicExtractor< + TInput extends { content: string }, + TEdgeTypeDataMap extends Record = {}, +>({ model, filter, }: { model: LanguageModel; - filter?: (node: Node) => boolean; -}): Transformer { - return async (graph: Graph) => { - const nodes: Node[] = []; + filter?: (node: Node) => boolean; +}): Transformer< + TInput, + TInput & { topics?: string[] }, + TEdgeTypeDataMap, + TEdgeTypeDataMap +> { + return async (graph: Graph) => { + const nodes = Array.from(graph.getNodes().values()); - for (const node of graph.getNodes().values()) { + for (const node of nodes) { if (filter && !filter(node)) { - nodes.push( - new Node(node.id, node.type, { - ...node.data, - }) - ); continue; } const result = await generateObject({ @@ -68,16 +71,15 @@ export function topicExtractor({ }), }); - nodes.push( - new Node(node.id, node.type, { - ...node.data, - topics: result.object.topics.map((topic) => - topic.trim().toLowerCase() - ), - }) - ); + node.data = { + ...node.data, + topics: result.object.topics.map((topic) => topic.trim().toLowerCase()), + }; } - return new Graph(nodes); + return graph as unknown as Graph< + TInput & { topics?: string[] }, + TEdgeTypeDataMap + >; }; } diff --git a/packages/evalite/src/generation/transformers/transformer.ts b/packages/evalite/src/generation/transformers/transformer.ts index ad56c350..d2150a62 100644 --- a/packages/evalite/src/generation/transformers/transformer.ts +++ b/packages/evalite/src/generation/transformers/transformer.ts @@ -1,27 +1,53 @@ import type { Graph } from "../graph.js"; -export type Transformer = ( - graph: Graph -) => PromiseLike>; +export type Transformer< + TInputNodeData = {}, + TOutputNodeData = {}, + TInputEdgeTypeDataMap extends Record = {}, + TOutputEdgeTypeDataMap extends Record = {}, +> = ( + graph: Graph +) => PromiseLike>; -export type TransformerPipeline = { - pipe( - transformer: Transformer - ): TransformerPipeline; - build(): Promise>; +export type TransformerPipeline< + TCurrentNodeData, + TCurrentEdgeTypeDataMap extends Record, +> = { + pipe>( + transformer: Transformer< + TCurrentNodeData, + TNextNodeData, + TCurrentEdgeTypeDataMap, + TNextEdgeTypeDataMap + > + ): TransformerPipeline; + build(): Promise>; }; -export function transform( - graph: Graph -): TransformerPipeline { - const createPipeline = ( - currentGraph: PromiseLike> - ): TransformerPipeline => ({ - pipe(transformer: Transformer) { +export function transform< + TInputNodeData, + TInputEdgeTypeDataMap extends Record = {}, +>( + graph: Graph +): TransformerPipeline { + const createPipeline = < + TCurrentNodeData, + TCurrentEdgeTypeDataMap extends Record, + >( + currentGraph: PromiseLike> + ): TransformerPipeline => ({ + pipe>( + transformer: Transformer< + TCurrentNodeData, + TNextNodeData, + TCurrentEdgeTypeDataMap, + TNextEdgeTypeDataMap + > + ) { const nextGraph = Promise.resolve(currentGraph).then((resolvedGraph) => transformer(resolvedGraph) ); - return createPipeline(nextGraph); + return createPipeline(nextGraph); }, build() { return Promise.resolve(currentGraph); From f53ef671cd2b095521667b1aac48f273f64c8f6a Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Mon, 24 Nov 2025 13:49:28 -0500 Subject: [PATCH 4/8] feat: standardize transformer output naming conventions. --- .../transformers/embed-extractor.ts | 8 ++++---- .../transformers/embedding-similarity.ts | 20 +++++-------------- .../transformers/jaccard-similarity.ts | 6 +++--- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index 864bf2c5..dc52ef00 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -17,7 +17,7 @@ export function embedExtractor< filter?: (node: Node) => boolean; }): Transformer< TInput, - TInput & { embedding: number[] }, + TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, TEdgeTypeDataMap, TEdgeTypeDataMap > { @@ -37,12 +37,12 @@ export function embedExtractor< node.data = { ...node.data, - embedding, - }; + [`${String(field)}Embedding`]: embedding, + } as any; } return graph as unknown as Graph< - TInput & { embedding: number[] }, + TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, TEdgeTypeDataMap >; }; diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts index 0e599abe..ae6999dd 100644 --- a/packages/evalite/src/generation/transformers/embedding-similarity.ts +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -19,7 +19,7 @@ export function embeddingSimilarity< TInput, TInputEdgeTypeDataMap, TInputEdgeTypeDataMap & { - [K in `${Uppercase}_EMBEDDING_SIMILARITY`]: { score: number }; + [K in `${TKey}Similarity`]: { score: number }; } > { return async (graph: Graph) => { @@ -48,23 +48,13 @@ export function embeddingSimilarity< const similarity = cosineSimilarity(valueA, valueB); if (similarity > threshold) { - (graph as any).addEdge( - nodeA.id, - nodeB.id, - `${property.toUpperCase()}_EMBEDDING_SIMILARITY`, - { - score: similarity, - } - ); + (graph as any).addEdge(nodeA.id, nodeB.id, `${property}Similarity`, { + score: similarity, + }); } } } - return graph as unknown as Graph< - TInput, - TInputEdgeTypeDataMap & { - [K in `${Uppercase}_EMBEDDING_SIMILARITY`]: { score: number }; - } - >; + return graph; }; } diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts index 118fec6a..aac7bd45 100644 --- a/packages/evalite/src/generation/transformers/jaccard-similarity.ts +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -18,7 +18,7 @@ export function jaccardSimilarity< TInput, TInputEdgeTypeDataMap, TInputEdgeTypeDataMap & { - [K in `${Uppercase}_JACCARD_SIMILARITY`]: { score: number }; + [K in `${TKey}JaccardSimilarity`]: { score: number }; } > { return async (graph: Graph) => { @@ -61,7 +61,7 @@ export function jaccardSimilarity< (graph as any).addEdge( nodeA.id, nodeB.id, - `${property.toUpperCase()}_JACCARD_SIMILARITY`, + `${property}JaccardSimilarity`, { score: similarity, } @@ -73,7 +73,7 @@ export function jaccardSimilarity< return graph as unknown as Graph< TInput, TInputEdgeTypeDataMap & { - [K in `${Uppercase}_JACCARD_SIMILARITY`]: { score: number }; + [K in `${TKey}JaccardSimilarity`]: { score: number }; } >; }; From b3235cb73deb4d3dc3e5b9869760b571779470fa Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Mon, 24 Nov 2025 14:09:02 -0500 Subject: [PATCH 5/8] refactor: Implement graph cloning to enable immutable transformer operations. --- packages/evalite/src/generation/graph.ts | 42 ++++++++++++++++++- .../transformers/chunk-extractor.ts | 4 +- .../transformers/embed-extractor.ts | 13 +++--- .../transformers/embedding-similarity.ts | 12 ++++-- .../transformers/jaccard-similarity.ts | 17 ++++---- .../transformers/summary-extractor.ts | 11 ++--- .../transformers/topic-extractor.ts | 11 ++--- 7 files changed, 80 insertions(+), 30 deletions(-) diff --git a/packages/evalite/src/generation/graph.ts b/packages/evalite/src/generation/graph.ts index 7e3b0177..bdbd2078 100644 --- a/packages/evalite/src/generation/graph.ts +++ b/packages/evalite/src/generation/graph.ts @@ -39,7 +39,14 @@ export class Graph< node2: string, type: K, data: TEdgeTypeDataMap[K] - ) { + ): void; + addEdge( + node1: string, + node2: string, + type: K, + data: D + ): void; + addEdge(node1: string, node2: string, type: string, data: unknown) { const node1Node = this.nodes.get(node1); const node2Node = this.nodes.get(node2); if (!node1Node || !node2Node) { @@ -53,6 +60,39 @@ export class Graph< } as Edge; node1Node.addEdge(edge); } + + clone< + TNewNodeData = TNodeData, + TNewEdgeTypeDataMap extends Record = TEdgeTypeDataMap, + >(): Graph { + const newNodes = new Map>(); + + for (const [id, node] of this.nodes) { + const clonedNode = new Node( + node.id, + node.type, + structuredClone(node.data) as unknown as TNewNodeData + ); + newNodes.set(id, clonedNode); + } + + for (const [id, node] of this.nodes) { + const clonedNode = newNodes.get(id)!; + for (const edge of node.getEdges()) { + const clonedFromNode = newNodes.get(edge.from.id)!; + const clonedToNode = newNodes.get(edge.to.id)!; + const clonedEdge = { + from: clonedFromNode, + to: clonedToNode, + type: edge.type, + data: structuredClone(edge.data), + } as unknown as Edge; + clonedNode.addEdge(clonedEdge); + } + } + + return new Graph(Array.from(newNodes.values())); + } } export class Node< diff --git a/packages/evalite/src/generation/transformers/chunk-extractor.ts b/packages/evalite/src/generation/transformers/chunk-extractor.ts index ae84aae6..bc0fdc22 100644 --- a/packages/evalite/src/generation/transformers/chunk-extractor.ts +++ b/packages/evalite/src/generation/transformers/chunk-extractor.ts @@ -46,8 +46,8 @@ export function chunkExtractor< content: chunk, } as { content: string } & Partial); newGraph.addNode(newNode); - newGraph.addEdge(node.id, newNode.id, "chunk", undefined as any); - newGraph.addEdge(newNode.id, node.id, "parent", undefined as any); + newGraph.addEdge(node.id, newNode.id, "chunk", undefined); + newGraph.addEdge(newNode.id, node.id, "parent", undefined); } } diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index dc52ef00..d79572f5 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -22,7 +22,11 @@ export function embedExtractor< TEdgeTypeDataMap > { return async (graph: Graph) => { - const nodes = Array.from(graph.getNodes().values()); + const clonedGraph = graph.clone< + TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, + TEdgeTypeDataMap + >(); + const nodes = Array.from(clonedGraph.getNodes().values()); for (const node of nodes) { if (filter && !filter(node)) continue; @@ -38,12 +42,9 @@ export function embedExtractor< node.data = { ...node.data, [`${String(field)}Embedding`]: embedding, - } as any; + }; } - return graph as unknown as Graph< - TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, - TEdgeTypeDataMap - >; + return clonedGraph; }; } diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts index ae6999dd..253c56da 100644 --- a/packages/evalite/src/generation/transformers/embedding-similarity.ts +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -23,7 +23,13 @@ export function embeddingSimilarity< } > { return async (graph: Graph) => { - const nodes = Array.from(graph.getNodes().values()); + const clonedGraph = graph.clone< + TInput, + TInputEdgeTypeDataMap & { + [K in `${TKey}Similarity`]: { score: number }; + } + >(); + const nodes = Array.from(clonedGraph.getNodes().values()); for (let i = 0; i < nodes.length; i++) { for (let j = i + 1; j < nodes.length; j++) { @@ -48,13 +54,13 @@ export function embeddingSimilarity< const similarity = cosineSimilarity(valueA, valueB); if (similarity > threshold) { - (graph as any).addEdge(nodeA.id, nodeB.id, `${property}Similarity`, { + clonedGraph.addEdge(nodeA.id, nodeB.id, `${property}Similarity`, { score: similarity, }); } } } - return graph; + return clonedGraph; }; } diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts index aac7bd45..e462fab5 100644 --- a/packages/evalite/src/generation/transformers/jaccard-similarity.ts +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -22,7 +22,13 @@ export function jaccardSimilarity< } > { return async (graph: Graph) => { - const nodes = Array.from(graph.getNodes().values()); + const clonedGraph = graph.clone< + TInput, + TInputEdgeTypeDataMap & { + [K in `${TKey}JaccardSimilarity`]: { score: number }; + } + >(); + const nodes = Array.from(clonedGraph.getNodes().values()); for (let i = 0; i < nodes.length; i++) { for (let j = i + 1; j < nodes.length; j++) { @@ -58,7 +64,7 @@ export function jaccardSimilarity< union.size === 0 ? 0 : intersection.size / union.size; if (similarity > threshold) { - (graph as any).addEdge( + clonedGraph.addEdge( nodeA.id, nodeB.id, `${property}JaccardSimilarity`, @@ -70,11 +76,6 @@ export function jaccardSimilarity< } } - return graph as unknown as Graph< - TInput, - TInputEdgeTypeDataMap & { - [K in `${TKey}JaccardSimilarity`]: { score: number }; - } - >; + return clonedGraph; }; } diff --git a/packages/evalite/src/generation/transformers/summary-extractor.ts b/packages/evalite/src/generation/transformers/summary-extractor.ts index bf41da57..40861044 100644 --- a/packages/evalite/src/generation/transformers/summary-extractor.ts +++ b/packages/evalite/src/generation/transformers/summary-extractor.ts @@ -51,7 +51,11 @@ export function summaryExtractor< TEdgeTypeDataMap > { return async (graph: Graph) => { - const nodes = Array.from(graph.getNodes().values()); + const clonedGraph = graph.clone< + TInput & { summary?: string }, + TEdgeTypeDataMap + >(); + const nodes = Array.from(clonedGraph.getNodes().values()); for (const node of nodes) { if (filter && !filter(node)) { @@ -71,9 +75,6 @@ export function summaryExtractor< }; } - return graph as unknown as Graph< - TInput & { summary?: string }, - TEdgeTypeDataMap - >; + return clonedGraph; }; } diff --git a/packages/evalite/src/generation/transformers/topic-extractor.ts b/packages/evalite/src/generation/transformers/topic-extractor.ts index 3ed1e1a1..e1e96ffc 100644 --- a/packages/evalite/src/generation/transformers/topic-extractor.ts +++ b/packages/evalite/src/generation/transformers/topic-extractor.ts @@ -57,7 +57,11 @@ export function topicExtractor< TEdgeTypeDataMap > { return async (graph: Graph) => { - const nodes = Array.from(graph.getNodes().values()); + const clonedGraph = graph.clone< + TInput & { topics?: string[] }, + TEdgeTypeDataMap + >(); + const nodes = Array.from(clonedGraph.getNodes().values()); for (const node of nodes) { if (filter && !filter(node)) { @@ -77,9 +81,6 @@ export function topicExtractor< }; } - return graph as unknown as Graph< - TInput & { topics?: string[] }, - TEdgeTypeDataMap - >; + return clonedGraph; }; } From 0577b439eadfc04e93cd1aeb064a7eedc29ddde8 Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Mon, 24 Nov 2025 14:56:46 -0500 Subject: [PATCH 6/8] feat: add persona generation module and graph transformation example --- packages/evalite/src/generation/persona.ts | 155 +++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 packages/evalite/src/generation/persona.ts diff --git a/packages/evalite/src/generation/persona.ts b/packages/evalite/src/generation/persona.ts new file mode 100644 index 00000000..904c909a --- /dev/null +++ b/packages/evalite/src/generation/persona.ts @@ -0,0 +1,155 @@ +import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import type { Graph, Node } from "./graph.js"; +import { promptBuilder } from "../scorers/prompt-builder.js"; + +export interface Persona { + description: string; + knowledgeLevel: "novice" | "intermediate" | "expert"; +} + +const PersonaSchema = jsonSchema<{ + description: string; +}>({ + type: "object", + properties: { + description: { + type: "string", + description: + "A detailed description of the fictional persona who would consume this content, including their background, motivations, and how they would interact with the material", + }, + }, + required: ["description"], +}); + +const generatePersonaPrompt = promptBuilder({ + prompt: + "Generate a fictional persona who would be interested in consuming the following content. The persona should represent a realistic reader/user with the specified knowledge level ({knowledgeLevel}). Provide a detailed description of who they are, their motivations for engaging with this content, and their background. Output JSON following the required schema.", + examples: [ + { + input: { + summary: + "A comprehensive guide to machine learning algorithms, covering supervised and unsupervised learning techniques with practical Python examples.", + knowledgeLevel: "intermediate", + }, + output: { + description: + "Sarah is a 32-year-old software developer at a mid-sized tech company. She has 5 years of experience in backend development and recently became interested in adding ML capabilities to her team's products. She's comfortable with Python but has limited exposure to data science concepts beyond basic statistics. She wants to understand the fundamentals well enough to have meaningful conversations with data scientists and potentially prototype simple ML features.", + }, + }, + { + input: { + summary: + "Introduction to gardening for beginners, covering basic soil preparation, plant selection, and watering techniques.", + knowledgeLevel: "novice", + }, + output: { + description: + "Emily is a 45-year-old office manager who just bought her first home with a backyard. She grew up in apartments and has never had outdoor space before. She's excited to start a vegetable garden but feels overwhelmed by all the options and doesn't know where to begin. She has no prior gardening experience and needs step-by-step guidance.", + }, + }, + { + input: { + summary: + "Advanced distributed systems architecture patterns for high-availability microservices deployments.", + knowledgeLevel: "expert", + }, + output: { + description: + "David is a 40-year-old principal engineer at a large fintech company. He has 15+ years of experience building distributed systems and has led several large-scale migrations. He's looking to stay current with the latest patterns and validate his architectural decisions against industry best practices. He often mentors junior engineers and needs authoritative references to share with his team.", + }, + }, + ], + task: ["summary", "knowledgeLevel"], +}); + +export async function generatePersona< + TNodeData extends { content: string; summary?: string }, + TEdgeTypeDataMap extends Record = Record, +>( + graph: Graph, + { + model, + amount, + filter = (node) => node.type === "document", + }: { + model: LanguageModel; + amount?: number; + filter?: (node: Node) => boolean; + } +): Promise { + const allNodes = Array.from(graph.getNodes().values()); + const filteredNodes = allNodes.filter(filter); + + if (filteredNodes.length === 0) { + return []; + } + + const nodesWithSummaries = filteredNodes.filter( + (node) => node.data.summary !== undefined && node.data.summary.trim() !== "" + ); + + if (nodesWithSummaries.length === 0) { + return []; + } + + const totalPersonas = amount ?? nodesWithSummaries.length; + + if (totalPersonas === 0) { + return []; + } + + const distribution = calculatePersonasPerNode( + totalPersonas, + nodesWithSummaries.length + ); + + const generationPromises: Promise[] = []; + + for (let i = 0; i < nodesWithSummaries.length; i++) { + const node = nodesWithSummaries[i]!; + const personaCount = distribution[i] ?? 0; + + for (let j = 0; j < personaCount; j++) { + const knowledgeLevel = getRandomKnowledgeLevel(); + + const promise = generateObject({ + model, + schema: PersonaSchema, + prompt: generatePersonaPrompt({ + summary: node.data.summary!, + knowledgeLevel, + }), + }).then((result) => ({ + description: result.object.description, + knowledgeLevel, + })); + + generationPromises.push(promise); + } + } + + return Promise.all(generationPromises); +} + +function getRandomKnowledgeLevel(): "novice" | "intermediate" | "expert" { + const levels: readonly ["novice", "intermediate", "expert"] = [ + "novice", + "intermediate", + "expert", + ]; + const index = Math.floor(Math.random() * levels.length); + return levels[index] ?? "intermediate"; +} + +function calculatePersonasPerNode( + totalAmount: number, + nodeCount: number +): number[] { + if (nodeCount === 0) return []; + const baseCount = Math.floor(totalAmount / nodeCount); + const remainder = totalAmount % nodeCount; + return Array.from( + { length: nodeCount }, + (_, i) => baseCount + (i < remainder ? 1 : 0) + ); +} From 15ef0b442b1258af3b2bb128434463c95960ef02 Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Mon, 24 Nov 2025 17:44:28 -0500 Subject: [PATCH 7/8] refactor: introduce a new `transformer` HOF for a more concise and consistent transformer API, and migrate existing extractors to use it. --- packages/evalite/src/generation/graph.ts | 13 +- packages/evalite/src/generation/persona.ts | 6 +- .../transformers/chunk-extractor.ts | 67 +++------- .../transformers/embed-extractor.ts | 64 +++------- .../transformers/embedding-similarity.ts | 90 ++++--------- .../transformers/entity-extractor.ts | 75 +++-------- .../transformers/jaccard-similarity.ts | 119 ++++++------------ .../transformers/summary-extractor.ts | 59 +++------ .../transformers/topic-extractor.ts | 62 +++------ .../generation/transformers/transformer.ts | 117 +++++++++++------ 10 files changed, 241 insertions(+), 431 deletions(-) diff --git a/packages/evalite/src/generation/graph.ts b/packages/evalite/src/generation/graph.ts index bdbd2078..7ba9d374 100644 --- a/packages/evalite/src/generation/graph.ts +++ b/packages/evalite/src/generation/graph.ts @@ -1,3 +1,7 @@ +export type NoData = {}; +export type GraphNodeData = G extends Graph ? N : never; +export type GraphEdgeMap = G extends Graph ? E : never; + export type Edge< TNodeData, TEdgeTypeDataMap extends Record = {}, @@ -24,6 +28,7 @@ export class Graph< addNode(node: Node) { this.nodes.set(node.id, node); + return node; } getNode(id: string) { @@ -131,6 +136,10 @@ export function graph< export function node< TNodeData, TEdgeTypeDataMap extends Record = {}, ->(type: "document" | "chunk", data: TNodeData) { - return new Node(crypto.randomUUID(), type, data); +>(type: "document" | "chunk", data: TNodeData, id?: string) { + return new Node( + id ?? crypto.randomUUID(), + type, + data + ); } diff --git a/packages/evalite/src/generation/persona.ts b/packages/evalite/src/generation/persona.ts index 904c909a..0f7b15af 100644 --- a/packages/evalite/src/generation/persona.ts +++ b/packages/evalite/src/generation/persona.ts @@ -64,9 +64,9 @@ const generatePersonaPrompt = promptBuilder({ export async function generatePersona< TNodeData extends { content: string; summary?: string }, - TEdgeTypeDataMap extends Record = Record, + TEdgeMap extends Record = Record, >( - graph: Graph, + graph: Graph, { model, amount, @@ -74,7 +74,7 @@ export async function generatePersona< }: { model: LanguageModel; amount?: number; - filter?: (node: Node) => boolean; + filter?: (node: Node) => boolean; } ): Promise { const allNodes = Array.from(graph.getNodes().values()); diff --git a/packages/evalite/src/generation/transformers/chunk-extractor.ts b/packages/evalite/src/generation/transformers/chunk-extractor.ts index bc0fdc22..3b04aee6 100644 --- a/packages/evalite/src/generation/transformers/chunk-extractor.ts +++ b/packages/evalite/src/generation/transformers/chunk-extractor.ts @@ -1,56 +1,21 @@ -import { Graph, Node, type Edge } from "../graph.js"; -import type { Transformer } from "./transformer.js"; +import { node, type NoData } from "../graph.js"; +import { transformer } from "./transformer.js"; export type ChunkerFn = (content: string) => string[]; -export function chunkExtractor< - TInput extends { content: string }, - TInputEdgeTypeDataMap extends Record = {}, ->({ - chunker, - filter, -}: { - chunker: ChunkerFn; - filter?: (node: Node) => boolean; -}): Transformer< - TInput, - { content: string } & Partial, - TInputEdgeTypeDataMap, - TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } -> { - return async (graph: Graph) => { - const newGraph = new Graph< - { content: string } & Partial, - TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } - >(); +export const chunkExtractor = transformer< + { chunker: ChunkerFn }, + { content: string }, + {}, + { chunk: NoData; parent: NoData } +>(async ({ chunker }, { graph, nodes }) => { + for (const n of nodes) { + const chunks = chunker(n.data.content); - for (const node of graph.getNodes().values()) { - if (filter && !filter(node)) { - newGraph.addNode(new Node(node.id, node.type, node.data)); - continue; - } - - const chunks = chunker(node.data.content); - newGraph.addNode( - new Node< - { content: string } & Partial, - TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } - >(node.id, node.type, node.data) - ); - - for (const chunk of chunks) { - const newNode = new Node< - { content: string } & Partial, - TInputEdgeTypeDataMap & { chunk: undefined; parent: undefined } - >(crypto.randomUUID(), "chunk", { - content: chunk, - } as { content: string } & Partial); - newGraph.addNode(newNode); - newGraph.addEdge(node.id, newNode.id, "chunk", undefined); - newGraph.addEdge(newNode.id, node.id, "parent", undefined); - } + for (const chunk of chunks) { + const newNode = graph.addNode(node("chunk", { content: chunk })); + graph.addEdge(n.id, newNode.id, "chunk", {}); + graph.addEdge(newNode.id, n.id, "parent", {}); } - - return newGraph; - }; -} + } +}); diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index d79572f5..c9f982b6 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -1,50 +1,18 @@ -import { embed } from "ai"; -import { Graph, Node } from "../graph.js"; -import type { Transformer } from "./transformer.js"; -import type { EmbeddingModel } from "ai"; +import { embed, type EmbeddingModel } from "ai"; +import { transformer } from "./transformer.js"; -export function embedExtractor< - TInput, - TKey extends keyof TInput, - TEdgeTypeDataMap extends Record = {}, ->({ - model, - field, - filter, -}: { - model: EmbeddingModel; - field: TKey; - filter?: (node: Node) => boolean; -}): Transformer< - TInput, - TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, - TEdgeTypeDataMap, - TEdgeTypeDataMap -> { - return async (graph: Graph) => { - const clonedGraph = graph.clone< - TInput & { [K in TKey as `${string & K}Embedding`]: number[] }, - TEdgeTypeDataMap - >(); - const nodes = Array.from(clonedGraph.getNodes().values()); +export const embedExtractor = transformer< + { model: EmbeddingModel; field: string }, + Record, + { embedding: number[]; embeddingField: string } +>(async ({ model, field }, { nodes }) => { + for (const node of nodes) { + if (node.data[field] == null) continue; - for (const node of nodes) { - if (filter && !filter(node)) continue; - if (node.data[field] == null) continue; - - const value = String(node.data[field]); - - const { embedding } = await embed({ - model, - value, - }); - - node.data = { - ...node.data, - [`${String(field)}Embedding`]: embedding, - }; - } - - return clonedGraph; - }; -} + const { embedding } = await embed({ + model, + value: String(node.data[field]), + }); + node.data = { ...node.data, embedding, embeddingField: field }; + } +}); diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts index 253c56da..0fe4bc07 100644 --- a/packages/evalite/src/generation/transformers/embedding-similarity.ts +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -1,66 +1,30 @@ import { cosineSimilarity } from "ai"; -import type { Transformer } from "./transformer.js"; -import { Graph, Node } from "../graph.js"; - -export function embeddingSimilarity< - TInput, - TKey extends keyof TInput & string, - TInputEdgeTypeDataMap extends Record = {}, ->({ - property, - filter, - threshold = 0.5, -}: { - property: TKey; - filter?: (node: Node) => boolean; - threshold?: number; -}): Transformer< - TInput, - TInput, - TInputEdgeTypeDataMap, - TInputEdgeTypeDataMap & { - [K in `${TKey}Similarity`]: { score: number }; - } -> { - return async (graph: Graph) => { - const clonedGraph = graph.clone< - TInput, - TInputEdgeTypeDataMap & { - [K in `${TKey}Similarity`]: { score: number }; - } - >(); - const nodes = Array.from(clonedGraph.getNodes().values()); - - for (let i = 0; i < nodes.length; i++) { - for (let j = i + 1; j < nodes.length; j++) { - const nodeA = nodes[i]; - const nodeB = nodes[j]; - - if (!nodeA || !nodeB) continue; - - if (filter && (!filter(nodeA) || !filter(nodeB))) { - continue; - } - - const valueA = nodeA.data[property]; - const valueB = nodeB.data[property]; - - if (!valueA || !valueB) continue; - - if (!Array.isArray(valueA) || !Array.isArray(valueB)) { - continue; - } - - const similarity = cosineSimilarity(valueA, valueB); - - if (similarity > threshold) { - clonedGraph.addEdge(nodeA.id, nodeB.id, `${property}Similarity`, { - score: similarity, - }); - } +import { transformer } from "./transformer.js"; + +export const embeddingSimilarity = transformer< + { property: string; threshold?: number }, + Record, + {}, + { embeddingSimilarity: { score: number; property: string } } +>(async ({ property, threshold = 0.5 }, { graph, nodes }) => { + for (let i = 0; i < nodes.length; i++) { + for (let j = i + 1; j < nodes.length; j++) { + const nodeA = nodes[i]; + const nodeB = nodes[j]; + if (!nodeA || !nodeB) continue; + + const valueA = nodeA.data[property]; + const valueB = nodeB.data[property]; + if (!valueA || !valueB) continue; + if (!Array.isArray(valueA) || !Array.isArray(valueB)) continue; + + const similarity = cosineSimilarity(valueA, valueB); + if (similarity > threshold) { + graph.addEdge(nodeA.id, nodeB.id, "embeddingSimilarity", { + score: similarity, + property, + }); } } - - return clonedGraph; - }; -} + } +}); diff --git a/packages/evalite/src/generation/transformers/entity-extractor.ts b/packages/evalite/src/generation/transformers/entity-extractor.ts index 7040d3fc..f47787ce 100644 --- a/packages/evalite/src/generation/transformers/entity-extractor.ts +++ b/packages/evalite/src/generation/transformers/entity-extractor.ts @@ -1,14 +1,9 @@ -import { Graph, Node } from "../graph.js"; -import type { Transformer } from "./transformer.js"; +import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; import { promptBuilder } from "../../scorers/prompt-builder.js"; const EntitiesSchema = jsonSchema<{ - entities: Array<{ - type: string; - value: string; - description?: string; - }>; + entities: Array<{ type: string; value: string; description?: string }>; }>({ type: "object", properties: { @@ -55,11 +50,7 @@ const extractEntitiesPrompt = promptBuilder({ value: "Apple Inc.", description: "Technology company", }, - { - type: "PERSON", - value: "Tim Cook", - description: "CEO of Apple", - }, + { type: "PERSON", value: "Tim Cook", description: "CEO of Apple" }, { type: "LOCATION", value: "San Francisco", @@ -77,50 +68,20 @@ const extractEntitiesPrompt = promptBuilder({ task: ["content"], }); -export function entityExtractor({ - model, - filter, -}: { - model: LanguageModel; - filter?: (node: Node) => boolean; -}): Transformer< - TInput, - TInput & { - entities?: { type: string; value: string; description?: string }[]; - } -> { - return async (graph: Graph) => { - const nodes: Node< - TInput & { - entities?: { type: string; value: string; description?: string }[]; - } - >[] = []; - - for (const node of graph.getNodes().values()) { - if (filter && !filter(node)) { - nodes.push( - new Node(node.id, node.type, { - ...node.data, - }) - ); - continue; - } - const result = await generateObject({ - model, - schema: EntitiesSchema, - prompt: extractEntitiesPrompt({ - content: node.data.content, - }), - }); +type Entity = { type: string; value: string; description?: string }; - nodes.push( - new Node(node.id, node.type, { - ...node.data, - entities: result.object.entities, - }) - ); - } +export const entityExtractor = transformer< + { model: LanguageModel }, + { content: string }, + { entities?: Entity[] } +>(async ({ model }, { nodes }) => { + for (const node of nodes) { + const result = await generateObject({ + model, + schema: EntitiesSchema, + prompt: extractEntitiesPrompt({ content: node.data.content }), + }); - return new Graph(nodes); - }; -} + node.data = { ...node.data, entities: result.object.entities }; + } +}); diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts index e462fab5..88ebe0d5 100644 --- a/packages/evalite/src/generation/transformers/jaccard-similarity.ts +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -1,81 +1,42 @@ -import type { Transformer } from "./transformer.js"; -import { Graph, Node } from "../graph.js"; - -export function jaccardSimilarity< - TInput, - TKey extends keyof TInput & string, - TInputEdgeTypeDataMap extends Record = {}, ->({ - property, - filter, - threshold = 0.5, -}: { - property: TKey; - filter?: (node: Node) => boolean; - threshold?: number; -}): Transformer< - TInput, - TInput, - TInputEdgeTypeDataMap, - TInputEdgeTypeDataMap & { - [K in `${TKey}JaccardSimilarity`]: { score: number }; - } -> { - return async (graph: Graph) => { - const clonedGraph = graph.clone< - TInput, - TInputEdgeTypeDataMap & { - [K in `${TKey}JaccardSimilarity`]: { score: number }; - } - >(); - const nodes = Array.from(clonedGraph.getNodes().values()); - - for (let i = 0; i < nodes.length; i++) { - for (let j = i + 1; j < nodes.length; j++) { - const nodeA = nodes[i]; - const nodeB = nodes[j]; - - if (!nodeA || !nodeB) continue; - - if (filter && (!filter(nodeA) || !filter(nodeB))) { - continue; - } - - const valueA = nodeA.data[property]; - const valueB = nodeB.data[property]; - - if (!valueA || !valueB) continue; - - const setA = new Set( - Array.isArray(valueA) - ? valueA - : String(valueA).toLowerCase().split(/\s+/) - ); - const setB = new Set( - Array.isArray(valueB) - ? valueB - : String(valueB).toLowerCase().split(/\s+/) - ); - - const intersection = new Set([...setA].filter((x) => setB.has(x))); - const union = new Set([...setA, ...setB]); - - const similarity = - union.size === 0 ? 0 : intersection.size / union.size; - - if (similarity > threshold) { - clonedGraph.addEdge( - nodeA.id, - nodeB.id, - `${property}JaccardSimilarity`, - { - score: similarity, - } - ); - } +import { transformer } from "./transformer.js"; + +export const jaccardSimilarity = transformer< + { property: string; threshold?: number }, + Record, + {}, + { jaccardSimilarity: { score: number; property: string } } +>(async ({ property, threshold = 0.5 }, { graph, nodes }) => { + for (let i = 0; i < nodes.length; i++) { + for (let j = i + 1; j < nodes.length; j++) { + const nodeA = nodes[i]; + const nodeB = nodes[j]; + if (!nodeA || !nodeB) continue; + + const valueA = nodeA.data[property]; + const valueB = nodeB.data[property]; + if (!valueA || !valueB) continue; + + const setA = new Set( + Array.isArray(valueA) + ? valueA + : String(valueA).toLowerCase().split(/\s+/) + ); + const setB = new Set( + Array.isArray(valueB) + ? valueB + : String(valueB).toLowerCase().split(/\s+/) + ); + + const intersection = new Set([...setA].filter((x) => setB.has(x))); + const union = new Set([...setA, ...setB]); + const similarity = union.size === 0 ? 0 : intersection.size / union.size; + + if (similarity > threshold) { + graph.addEdge(nodeA.id, nodeB.id, "jaccardSimilarity", { + score: similarity, + property, + }); } } - - return clonedGraph; - }; -} + } +}); diff --git a/packages/evalite/src/generation/transformers/summary-extractor.ts b/packages/evalite/src/generation/transformers/summary-extractor.ts index 40861044..1e7c80b9 100644 --- a/packages/evalite/src/generation/transformers/summary-extractor.ts +++ b/packages/evalite/src/generation/transformers/summary-extractor.ts @@ -1,5 +1,4 @@ -import { Graph, Node } from "../graph.js"; -import type { Transformer } from "./transformer.js"; +import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; import { promptBuilder } from "../../scorers/prompt-builder.js"; @@ -35,46 +34,18 @@ const extractSummaryPrompt = promptBuilder({ task: ["content"], }); -export function summaryExtractor< - TInput extends { content: string }, - TEdgeTypeDataMap extends Record = {}, ->({ - model, - filter, -}: { - model: LanguageModel; - filter?: (node: Node) => boolean; -}): Transformer< - TInput, - TInput & { summary?: string }, - TEdgeTypeDataMap, - TEdgeTypeDataMap -> { - return async (graph: Graph) => { - const clonedGraph = graph.clone< - TInput & { summary?: string }, - TEdgeTypeDataMap - >(); - const nodes = Array.from(clonedGraph.getNodes().values()); +export const summaryExtractor = transformer< + { model: LanguageModel }, + { content: string }, + { summary?: string } +>(async ({ model }, { nodes }) => { + for (const node of nodes) { + const result = await generateObject({ + model, + schema: SummarySchema, + prompt: extractSummaryPrompt({ content: node.data.content }), + }); - for (const node of nodes) { - if (filter && !filter(node)) { - continue; - } - const result = await generateObject({ - model, - schema: SummarySchema, - prompt: extractSummaryPrompt({ - content: node.data.content, - }), - }); - - node.data = { - ...node.data, - summary: result.object.summary, - }; - } - - return clonedGraph; - }; -} + node.data = { ...node.data, summary: result.object.summary }; + } +}); diff --git a/packages/evalite/src/generation/transformers/topic-extractor.ts b/packages/evalite/src/generation/transformers/topic-extractor.ts index e1e96ffc..9b316768 100644 --- a/packages/evalite/src/generation/transformers/topic-extractor.ts +++ b/packages/evalite/src/generation/transformers/topic-extractor.ts @@ -1,5 +1,4 @@ -import { Graph, Node } from "../graph.js"; -import type { Transformer } from "./transformer.js"; +import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; import { promptBuilder } from "../../scorers/prompt-builder.js"; @@ -41,46 +40,21 @@ const extractTopicPrompt = promptBuilder({ task: ["content"], }); -export function topicExtractor< - TInput extends { content: string }, - TEdgeTypeDataMap extends Record = {}, ->({ - model, - filter, -}: { - model: LanguageModel; - filter?: (node: Node) => boolean; -}): Transformer< - TInput, - TInput & { topics?: string[] }, - TEdgeTypeDataMap, - TEdgeTypeDataMap -> { - return async (graph: Graph) => { - const clonedGraph = graph.clone< - TInput & { topics?: string[] }, - TEdgeTypeDataMap - >(); - const nodes = Array.from(clonedGraph.getNodes().values()); +export const topicExtractor = transformer< + { model: LanguageModel }, + { content: string }, + { topics?: string[] } +>(async ({ model }, { nodes }) => { + for (const node of nodes) { + const result = await generateObject({ + model, + schema: TopicSchema, + prompt: extractTopicPrompt({ content: node.data.content }), + }); - for (const node of nodes) { - if (filter && !filter(node)) { - continue; - } - const result = await generateObject({ - model, - schema: TopicSchema, - prompt: extractTopicPrompt({ - content: node.data.content, - }), - }); - - node.data = { - ...node.data, - topics: result.object.topics.map((topic) => topic.trim().toLowerCase()), - }; - } - - return clonedGraph; - }; -} + node.data = { + ...node.data, + topics: result.object.topics.map((t) => t.trim().toLowerCase()), + }; + } +}); diff --git a/packages/evalite/src/generation/transformers/transformer.ts b/packages/evalite/src/generation/transformers/transformer.ts index d2150a62..38d4dafb 100644 --- a/packages/evalite/src/generation/transformers/transformer.ts +++ b/packages/evalite/src/generation/transformers/transformer.ts @@ -1,53 +1,90 @@ -import type { Graph } from "../graph.js"; +import type { Graph, Node } from "../graph.js"; export type Transformer< - TInputNodeData = {}, - TOutputNodeData = {}, - TInputEdgeTypeDataMap extends Record = {}, - TOutputEdgeTypeDataMap extends Record = {}, -> = ( - graph: Graph -) => PromiseLike>; - -export type TransformerPipeline< - TCurrentNodeData, - TCurrentEdgeTypeDataMap extends Record, -> = { - pipe>( - transformer: Transformer< - TCurrentNodeData, - TNextNodeData, - TCurrentEdgeTypeDataMap, - TNextEdgeTypeDataMap - > - ): TransformerPipeline; - build(): Promise>; + TInput extends Graph = Graph<{}, {}>, + TOutput extends Graph = Graph<{}, {}>, +> = (graph: TInput) => PromiseLike; + +export type TransformerPipeline> = { + pipe>( + transformer: Transformer + ): TransformerPipeline; + build(): Promise; }; -export function transform< - TInputNodeData, - TInputEdgeTypeDataMap extends Record = {}, +type FilterFn = (node: Node>) => boolean; + +export function transformer< + TOptions, + TInputConstraint, + TDataAdditions = {}, + TEdgeAdditions extends Record = {}, >( - graph: Graph -): TransformerPipeline { - const createPipeline = < - TCurrentNodeData, - TCurrentEdgeTypeDataMap extends Record, + handler: ( + options: TOptions, + data: { + graph: Graph; + nodes: Node[]; + } + ) => PromiseLike +): = {}>( + options: TOptions & { filter?: FilterFn } +) => Transformer< + Graph, + Graph +> { + return < + TInput extends TInputConstraint, + TEdgeMap extends Record, >( - currentGraph: PromiseLike> - ): TransformerPipeline => ({ - pipe>( - transformer: Transformer< - TCurrentNodeData, - TNextNodeData, - TCurrentEdgeTypeDataMap, - TNextEdgeTypeDataMap - > + options: TOptions & { filter?: FilterFn } + ): Transformer< + Graph, + Graph + > => { + return async ( + graph: Graph + ): Promise> => { + const clonedGraph = graph.clone() as unknown as Graph< + TInputConstraint & TDataAdditions, + TEdgeAdditions + >; + + const { filter, ...restOptions } = options; + const allNodes = Array.from(clonedGraph.getNodes().values()); + const filteredNodes = filter + ? allNodes.filter((n) => + filter( + n as unknown as Node> + ) + ) + : allNodes; + + await handler(restOptions as TOptions, { + graph: clonedGraph, + nodes: filteredNodes, + }); + return clonedGraph as Graph< + TInput & TDataAdditions, + TEdgeMap & TEdgeAdditions + >; + }; + }; +} + +export function transform>( + graph: TGraph +): TransformerPipeline { + const createPipeline = >( + currentGraph: PromiseLike + ): TransformerPipeline => ({ + pipe>( + transformer: Transformer ) { const nextGraph = Promise.resolve(currentGraph).then((resolvedGraph) => transformer(resolvedGraph) ); - return createPipeline(nextGraph); + return createPipeline(nextGraph); }, build() { return Promise.resolve(currentGraph); From a396c5e142f3fff6966a53ae34c00789d948fa6e Mon Sep 17 00:00:00 2001 From: Can Temizyurek Date: Wed, 26 Nov 2025 09:29:51 -0500 Subject: [PATCH 8/8] refactor: transformer functions --- packages/evalite/src/generation/graph.ts | 22 ++-- packages/evalite/src/generation/test.ts | 45 +++++++ .../transformers/chunk-extractor.ts | 62 ++++++--- .../transformers/embed-extractor.ts | 55 +++++--- .../transformers/embedding-similarity.ts | 91 +++++++++---- .../transformers/entity-extractor.ts | 44 ++++--- .../transformers/jaccard-similarity.ts | 121 ++++++++++++------ .../transformers/summary-extractor.ts | 44 ++++--- .../transformers/topic-extractor.ts | 50 +++++--- .../generation/transformers/transformer.ts | 62 +-------- 10 files changed, 383 insertions(+), 213 deletions(-) create mode 100644 packages/evalite/src/generation/test.ts diff --git a/packages/evalite/src/generation/graph.ts b/packages/evalite/src/generation/graph.ts index 7ba9d374..1373f99c 100644 --- a/packages/evalite/src/generation/graph.ts +++ b/packages/evalite/src/generation/graph.ts @@ -2,6 +2,17 @@ export type NoData = {}; export type GraphNodeData = G extends Graph ? N : never; export type GraphEdgeMap = G extends Graph ? E : never; +export type AddEdgeTypes< + TBase extends Record, + TNew extends Record, +> = { + [K in keyof TBase | keyof TNew]: K extends keyof TNew + ? TNew[K] + : K extends keyof TBase + ? TBase[K] + : never; +}; + export type Edge< TNodeData, TEdgeTypeDataMap extends Record = {}, @@ -44,14 +55,7 @@ export class Graph< node2: string, type: K, data: TEdgeTypeDataMap[K] - ): void; - addEdge( - node1: string, - node2: string, - type: K, - data: D - ): void; - addEdge(node1: string, node2: string, type: string, data: unknown) { + ): void { const node1Node = this.nodes.get(node1); const node2Node = this.nodes.get(node2); if (!node1Node || !node2Node) { @@ -67,7 +71,7 @@ export class Graph< } clone< - TNewNodeData = TNodeData, + TNewNodeData extends TNodeData = TNodeData, TNewEdgeTypeDataMap extends Record = TEdgeTypeDataMap, >(): Graph { const newNodes = new Map>(); diff --git a/packages/evalite/src/generation/test.ts b/packages/evalite/src/generation/test.ts new file mode 100644 index 00000000..47b6fb5e --- /dev/null +++ b/packages/evalite/src/generation/test.ts @@ -0,0 +1,45 @@ +import { jaccardSimilarity } from "./transformers/jaccard-similarity.js"; +import { topicExtractor } from "./transformers/topic-extractor.js"; +import { summaryExtractor } from "./transformers/summary-extractor.js"; +import { graph, node } from "./graph.js"; +import { transform } from "./transformers/transformer.js"; +import { openai } from "@ai-sdk/openai"; +import { embedExtractor } from "./transformers/embed-extractor.js"; +import { embeddingSimilarity } from "./transformers/embedding-similarity.js"; +import { chunkExtractor } from "./transformers/chunk-extractor.js"; +import { generatePersona } from "./persona.js"; + +const g = await transform(graph([node("document", { content: "Hello world" })])) + .pipe(chunkExtractor({ chunker: (content) => content.split(" ") })) + .pipe(summaryExtractor({ model: openai("gpt-4.1") })) + .pipe(topicExtractor({ model: openai("gpt-4.1") })) + .pipe(jaccardSimilarity({ property: "topics" })) + .pipe( + embedExtractor({ + model: openai.embedding("text-embedding-3-small"), + property: "summary", + }) + ) + .pipe(embeddingSimilarity({ property: "summaryEmbedding" })) + .pipe(embeddingSimilarity({ property: "content" })) + .build(); + +g.getNodes().forEach((node) => { + node.getEdges().forEach((edge) => { + if (edge.type === "jaccardSimilarity") { + console.log( + ` Jaccard score: ${edge.data.score} (property: ${edge.data.property})` + ); + } else if (edge.type === "embeddingSimilarity") { + console.log( + ` Embedding score: ${edge.data.score} (property: ${edge.data.property})` + ); + } else if (edge.type === "chunk" || edge.type === "parent") { + console.log(` Chunk relationship (no score data)`); + } + }); +}); + +generatePersona(g, { model: openai("gpt-4.1") }).then((personas) => { + console.log(personas); +}); diff --git a/packages/evalite/src/generation/transformers/chunk-extractor.ts b/packages/evalite/src/generation/transformers/chunk-extractor.ts index 3b04aee6..0d5e1aa5 100644 --- a/packages/evalite/src/generation/transformers/chunk-extractor.ts +++ b/packages/evalite/src/generation/transformers/chunk-extractor.ts @@ -1,21 +1,51 @@ -import { node, type NoData } from "../graph.js"; -import { transformer } from "./transformer.js"; +import { + node, + type AddEdgeTypes, + type Graph, + type Node, + type NoData, +} from "../graph.js"; +import type { Transformer } from "./transformer.js"; export type ChunkerFn = (content: string) => string[]; -export const chunkExtractor = transformer< - { chunker: ChunkerFn }, - { content: string }, - {}, - { chunk: NoData; parent: NoData } ->(async ({ chunker }, { graph, nodes }) => { - for (const n of nodes) { - const chunks = chunker(n.data.content); +export function chunkExtractor< + TInput extends { content: string }, + TEdges extends Record = {}, +>(options: { + chunker: ChunkerFn; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph> +> { + return async (graph) => { + const originalNodes = Array.from(graph.getNodes().values()); + const filteredIds = new Set( + (options.filter + ? originalNodes.filter(options.filter) + : originalNodes + ).map((n) => n.id) + ); - for (const chunk of chunks) { - const newNode = graph.addNode(node("chunk", { content: chunk })); - graph.addEdge(n.id, newNode.id, "chunk", {}); - graph.addEdge(newNode.id, n.id, "parent", {}); + const cloned = graph.clone< + TInput, + AddEdgeTypes + >(); + + for (const n of cloned.getNodes().values()) { + if (!filteredIds.has(n.id)) continue; + const chunks = options.chunker(n.data.content); + + for (const chunk of chunks) { + const newNode = cloned.addNode( + node("chunk", { content: chunk } as TInput) + ); + cloned.addEdge(n.id, newNode.id, "chunk", {}); + cloned.addEdge(newNode.id, n.id, "parent", {}); + } } - } -}); + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/embed-extractor.ts b/packages/evalite/src/generation/transformers/embed-extractor.ts index c9f982b6..117d6741 100644 --- a/packages/evalite/src/generation/transformers/embed-extractor.ts +++ b/packages/evalite/src/generation/transformers/embed-extractor.ts @@ -1,18 +1,43 @@ import { embed, type EmbeddingModel } from "ai"; -import { transformer } from "./transformer.js"; +import { type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; -export const embedExtractor = transformer< - { model: EmbeddingModel; field: string }, - Record, - { embedding: number[]; embeddingField: string } ->(async ({ model, field }, { nodes }) => { - for (const node of nodes) { - if (node.data[field] == null) continue; +export function embedExtractor< + TInput extends Record, + TEdges extends Record = {}, + TProperty extends keyof TInput & string = keyof TInput & string, +>(options: { + model: EmbeddingModel; + property: TProperty; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph +> { + return async (graph) => { + const cloned = graph.clone< + TInput & { [K in `${TProperty}Embedding`]: number[] }, + TEdges + >(); + const nodes = Array.from(cloned.getNodes().values()); + const filtered = options.filter ? nodes.filter(options.filter) : nodes; - const { embedding } = await embed({ - model, - value: String(node.data[field]), - }); - node.data = { ...node.data, embedding, embeddingField: field }; - } -}); + const embeddingKey = + `${options.property}Embedding` as `${TProperty}Embedding`; + + for (const node of filtered) { + if (node.data[options.property] == null) continue; + + const { embedding } = await embed({ + model: options.model, + value: String(node.data[options.property]), + }); + node.data = { + ...node.data, + [embeddingKey]: embedding, + }; + } + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/embedding-similarity.ts b/packages/evalite/src/generation/transformers/embedding-similarity.ts index 0fe4bc07..f0c7221b 100644 --- a/packages/evalite/src/generation/transformers/embedding-similarity.ts +++ b/packages/evalite/src/generation/transformers/embedding-similarity.ts @@ -1,30 +1,73 @@ import { cosineSimilarity } from "ai"; -import { transformer } from "./transformer.js"; +import { type AddEdgeTypes, type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; -export const embeddingSimilarity = transformer< - { property: string; threshold?: number }, - Record, - {}, - { embeddingSimilarity: { score: number; property: string } } ->(async ({ property, threshold = 0.5 }, { graph, nodes }) => { - for (let i = 0; i < nodes.length; i++) { - for (let j = i + 1; j < nodes.length; j++) { - const nodeA = nodes[i]; - const nodeB = nodes[j]; - if (!nodeA || !nodeB) continue; +export function embeddingSimilarity< + TInput extends Record, + TEdges extends Record = {}, +>(options: { + property: keyof TInput & string; + threshold?: number; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph< + TInput, + AddEdgeTypes< + TEdges, + { + embeddingSimilarity: { score: number; property: keyof TInput & string }; + } + > + > +> { + return async (graph) => { + const originalNodes = Array.from(graph.getNodes().values()); + const filteredIds = new Set( + (options.filter + ? originalNodes.filter(options.filter) + : originalNodes + ).map((n) => n.id) + ); + + const cloned = graph.clone< + TInput, + AddEdgeTypes< + TEdges, + { + embeddingSimilarity: { + score: number; + property: keyof TInput & string; + }; + } + > + >(); + const filtered = Array.from(cloned.getNodes().values()).filter((n) => + filteredIds.has(n.id) + ); + const threshold = options.threshold ?? 0.5; - const valueA = nodeA.data[property]; - const valueB = nodeB.data[property]; - if (!valueA || !valueB) continue; - if (!Array.isArray(valueA) || !Array.isArray(valueB)) continue; + for (let i = 0; i < filtered.length; i++) { + for (let j = i + 1; j < filtered.length; j++) { + const nodeA = filtered[i]; + const nodeB = filtered[j]; + if (!nodeA || !nodeB) continue; - const similarity = cosineSimilarity(valueA, valueB); - if (similarity > threshold) { - graph.addEdge(nodeA.id, nodeB.id, "embeddingSimilarity", { - score: similarity, - property, - }); + const valueA = nodeA.data[options.property]; + const valueB = nodeB.data[options.property]; + if (!valueA || !valueB) continue; + if (!Array.isArray(valueA) || !Array.isArray(valueB)) continue; + + const similarity = cosineSimilarity(valueA, valueB); + if (similarity > threshold) { + cloned.addEdge(nodeA.id, nodeB.id, "embeddingSimilarity", { + score: similarity, + property: options.property, + }); + } } } - } -}); + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/entity-extractor.ts b/packages/evalite/src/generation/transformers/entity-extractor.ts index f47787ce..30f4ae39 100644 --- a/packages/evalite/src/generation/transformers/entity-extractor.ts +++ b/packages/evalite/src/generation/transformers/entity-extractor.ts @@ -1,5 +1,6 @@ -import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; import { promptBuilder } from "../../scorers/prompt-builder.js"; const EntitiesSchema = jsonSchema<{ @@ -70,18 +71,31 @@ const extractEntitiesPrompt = promptBuilder({ type Entity = { type: string; value: string; description?: string }; -export const entityExtractor = transformer< - { model: LanguageModel }, - { content: string }, - { entities?: Entity[] } ->(async ({ model }, { nodes }) => { - for (const node of nodes) { - const result = await generateObject({ - model, - schema: EntitiesSchema, - prompt: extractEntitiesPrompt({ content: node.data.content }), - }); +export function entityExtractor< + TInput extends { content: string }, + TEdges extends Record = {}, +>(options: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph +> { + return async (graph) => { + const cloned = graph.clone(); + const nodes = Array.from(cloned.getNodes().values()); + const filtered = options.filter ? nodes.filter(options.filter) : nodes; - node.data = { ...node.data, entities: result.object.entities }; - } -}); + for (const node of filtered) { + const result = await generateObject({ + model: options.model, + schema: EntitiesSchema, + prompt: extractEntitiesPrompt({ content: node.data.content }), + }); + + node.data = { ...node.data, entities: result.object.entities }; + } + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/jaccard-similarity.ts b/packages/evalite/src/generation/transformers/jaccard-similarity.ts index 88ebe0d5..d6ab7454 100644 --- a/packages/evalite/src/generation/transformers/jaccard-similarity.ts +++ b/packages/evalite/src/generation/transformers/jaccard-similarity.ts @@ -1,42 +1,83 @@ -import { transformer } from "./transformer.js"; - -export const jaccardSimilarity = transformer< - { property: string; threshold?: number }, - Record, - {}, - { jaccardSimilarity: { score: number; property: string } } ->(async ({ property, threshold = 0.5 }, { graph, nodes }) => { - for (let i = 0; i < nodes.length; i++) { - for (let j = i + 1; j < nodes.length; j++) { - const nodeA = nodes[i]; - const nodeB = nodes[j]; - if (!nodeA || !nodeB) continue; - - const valueA = nodeA.data[property]; - const valueB = nodeB.data[property]; - if (!valueA || !valueB) continue; - - const setA = new Set( - Array.isArray(valueA) - ? valueA - : String(valueA).toLowerCase().split(/\s+/) - ); - const setB = new Set( - Array.isArray(valueB) - ? valueB - : String(valueB).toLowerCase().split(/\s+/) - ); - - const intersection = new Set([...setA].filter((x) => setB.has(x))); - const union = new Set([...setA, ...setB]); - const similarity = union.size === 0 ? 0 : intersection.size / union.size; - - if (similarity > threshold) { - graph.addEdge(nodeA.id, nodeB.id, "jaccardSimilarity", { - score: similarity, - property, - }); +import { type AddEdgeTypes, type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; + +export function jaccardSimilarity< + TInput extends Record, + TEdges extends Record = {}, +>(options: { + property: keyof TInput & string; + threshold?: number; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph< + TInput, + AddEdgeTypes< + TEdges, + { + jaccardSimilarity: { score: number; property: keyof TInput & string }; + } + > + > +> { + return async (graph) => { + const originalNodes = Array.from(graph.getNodes().values()); + const filteredIds = new Set( + (options.filter + ? originalNodes.filter(options.filter) + : originalNodes + ).map((n) => n.id) + ); + + const cloned = graph.clone< + TInput, + AddEdgeTypes< + TEdges, + { + jaccardSimilarity: { score: number; property: keyof TInput & string }; + } + > + >(); + const filtered = Array.from(cloned.getNodes().values()).filter((n) => + filteredIds.has(n.id) + ); + const threshold = options.threshold ?? 0.5; + + for (let i = 0; i < filtered.length; i++) { + for (let j = i + 1; j < filtered.length; j++) { + const nodeA = filtered[i]; + const nodeB = filtered[j]; + if (!nodeA || !nodeB) continue; + + const valueA = nodeA.data[options.property]; + const valueB = nodeB.data[options.property]; + if (!valueA || !valueB) continue; + + const setA = new Set( + Array.isArray(valueA) + ? valueA + : String(valueA).toLowerCase().split(/\s+/) + ); + const setB = new Set( + Array.isArray(valueB) + ? valueB + : String(valueB).toLowerCase().split(/\s+/) + ); + + const intersection = new Set([...setA].filter((x) => setB.has(x))); + const union = new Set([...setA, ...setB]); + const similarity = + union.size === 0 ? 0 : intersection.size / union.size; + + if (similarity > threshold) { + cloned.addEdge(nodeA.id, nodeB.id, "jaccardSimilarity", { + score: similarity, + property: options.property, + }); + } } } - } -}); + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/summary-extractor.ts b/packages/evalite/src/generation/transformers/summary-extractor.ts index 1e7c80b9..61a8c59a 100644 --- a/packages/evalite/src/generation/transformers/summary-extractor.ts +++ b/packages/evalite/src/generation/transformers/summary-extractor.ts @@ -1,5 +1,6 @@ -import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; import { promptBuilder } from "../../scorers/prompt-builder.js"; const SummarySchema = jsonSchema<{ @@ -34,18 +35,31 @@ const extractSummaryPrompt = promptBuilder({ task: ["content"], }); -export const summaryExtractor = transformer< - { model: LanguageModel }, - { content: string }, - { summary?: string } ->(async ({ model }, { nodes }) => { - for (const node of nodes) { - const result = await generateObject({ - model, - schema: SummarySchema, - prompt: extractSummaryPrompt({ content: node.data.content }), - }); +export function summaryExtractor< + TInput extends { content: string }, + TEdges extends Record = {}, +>(options: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph +> { + return async (graph) => { + const cloned = graph.clone(); + const nodes = Array.from(cloned.getNodes().values()); + const filtered = options.filter ? nodes.filter(options.filter) : nodes; - node.data = { ...node.data, summary: result.object.summary }; - } -}); + for (const node of filtered) { + const result = await generateObject({ + model: options.model, + schema: SummarySchema, + prompt: extractSummaryPrompt({ content: node.data.content }), + }); + + node.data = { ...node.data, summary: result.object.summary }; + } + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/topic-extractor.ts b/packages/evalite/src/generation/transformers/topic-extractor.ts index 9b316768..2db7d326 100644 --- a/packages/evalite/src/generation/transformers/topic-extractor.ts +++ b/packages/evalite/src/generation/transformers/topic-extractor.ts @@ -1,5 +1,6 @@ -import { transformer } from "./transformer.js"; import { generateObject, jsonSchema, type LanguageModel } from "ai"; +import { type Graph, type Node } from "../graph.js"; +import type { Transformer } from "./transformer.js"; import { promptBuilder } from "../../scorers/prompt-builder.js"; const TopicSchema = jsonSchema<{ @@ -40,21 +41,34 @@ const extractTopicPrompt = promptBuilder({ task: ["content"], }); -export const topicExtractor = transformer< - { model: LanguageModel }, - { content: string }, - { topics?: string[] } ->(async ({ model }, { nodes }) => { - for (const node of nodes) { - const result = await generateObject({ - model, - schema: TopicSchema, - prompt: extractTopicPrompt({ content: node.data.content }), - }); +export function topicExtractor< + TInput extends { content: string }, + TEdges extends Record = {}, +>(options: { + model: LanguageModel; + filter?: (node: Node) => boolean; +}): Transformer< + Graph, + Graph +> { + return async (graph) => { + const cloned = graph.clone(); + const nodes = Array.from(cloned.getNodes().values()); + const filtered = options.filter ? nodes.filter(options.filter) : nodes; - node.data = { - ...node.data, - topics: result.object.topics.map((t) => t.trim().toLowerCase()), - }; - } -}); + for (const node of filtered) { + const result = await generateObject({ + model: options.model, + schema: TopicSchema, + prompt: extractTopicPrompt({ content: node.data.content }), + }); + + node.data = { + ...node.data, + topics: result.object.topics.map((t) => t.trim().toLowerCase()), + }; + } + + return cloned; + }; +} diff --git a/packages/evalite/src/generation/transformers/transformer.ts b/packages/evalite/src/generation/transformers/transformer.ts index 38d4dafb..d33a0643 100644 --- a/packages/evalite/src/generation/transformers/transformer.ts +++ b/packages/evalite/src/generation/transformers/transformer.ts @@ -1,4 +1,4 @@ -import type { Graph, Node } from "../graph.js"; +import type { Graph } from "../graph.js"; export type Transformer< TInput extends Graph = Graph<{}, {}>, @@ -12,66 +12,6 @@ export type TransformerPipeline> = { build(): Promise; }; -type FilterFn = (node: Node>) => boolean; - -export function transformer< - TOptions, - TInputConstraint, - TDataAdditions = {}, - TEdgeAdditions extends Record = {}, ->( - handler: ( - options: TOptions, - data: { - graph: Graph; - nodes: Node[]; - } - ) => PromiseLike -): = {}>( - options: TOptions & { filter?: FilterFn } -) => Transformer< - Graph, - Graph -> { - return < - TInput extends TInputConstraint, - TEdgeMap extends Record, - >( - options: TOptions & { filter?: FilterFn } - ): Transformer< - Graph, - Graph - > => { - return async ( - graph: Graph - ): Promise> => { - const clonedGraph = graph.clone() as unknown as Graph< - TInputConstraint & TDataAdditions, - TEdgeAdditions - >; - - const { filter, ...restOptions } = options; - const allNodes = Array.from(clonedGraph.getNodes().values()); - const filteredNodes = filter - ? allNodes.filter((n) => - filter( - n as unknown as Node> - ) - ) - : allNodes; - - await handler(restOptions as TOptions, { - graph: clonedGraph, - nodes: filteredNodes, - }); - return clonedGraph as Graph< - TInput & TDataAdditions, - TEdgeMap & TEdgeAdditions - >; - }; - }; -} - export function transform>( graph: TGraph ): TransformerPipeline {