Skip to content
149 changes: 149 additions & 0 deletions packages/evalite/src/generation/graph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
export type NoData = {};
export type GraphNodeData<G> = G extends Graph<infer N, any> ? N : never;
export type GraphEdgeMap<G> = G extends Graph<any, infer E> ? E : never;

export type AddEdgeTypes<
TBase extends Record<string, any>,
TNew extends Record<string, any>,
> = {
[K in keyof TBase | keyof TNew]: K extends keyof TNew
? TNew[K]
: K extends keyof TBase
? TBase[K]
: never;
};

export type Edge<
TNodeData,
TEdgeTypeDataMap extends Record<string, any> = {},
> = {
[K in keyof TEdgeTypeDataMap]: {
type: K;
data: TEdgeTypeDataMap[K];
from: Node<TNodeData, TEdgeTypeDataMap>;
to: Node<TNodeData, TEdgeTypeDataMap>;
};
}[keyof TEdgeTypeDataMap];

export class Graph<
TNodeData,
TEdgeTypeDataMap extends Record<string, any> = {},
> {
private nodes: Map<string, Node<TNodeData, TEdgeTypeDataMap>> = new Map();

constructor(nodes?: Node<TNodeData, TEdgeTypeDataMap>[]) {
if (nodes) {
nodes.forEach((node) => this.addNode(node));
}
}

addNode(node: Node<TNodeData, TEdgeTypeDataMap>) {
this.nodes.set(node.id, node);
return node;
}

getNode(id: string) {
return this.nodes.get(id);
}

getNodes() {
return this.nodes;
}

addEdge<K extends keyof TEdgeTypeDataMap>(
node1: string,
node2: string,
type: K,
data: TEdgeTypeDataMap[K]
): void {
const node1Node = this.nodes.get(node1);
const node2Node = this.nodes.get(node2);
if (!node1Node || !node2Node) {
throw new Error("One or more nodes not found");
}
const edge = {
from: node1Node,
to: node2Node,
type,
data,
} as Edge<TNodeData, TEdgeTypeDataMap>;
node1Node.addEdge(edge);
}

clone<
TNewNodeData extends TNodeData = TNodeData,
TNewEdgeTypeDataMap extends Record<string, any> = TEdgeTypeDataMap,
>(): Graph<TNewNodeData, TNewEdgeTypeDataMap> {
const newNodes = new Map<string, Node<TNewNodeData, TNewEdgeTypeDataMap>>();

for (const [id, node] of this.nodes) {
const clonedNode = new Node<TNewNodeData, TNewEdgeTypeDataMap>(
node.id,
node.type,
structuredClone(node.data) as unknown as TNewNodeData
);
newNodes.set(id, clonedNode);
}

for (const [id, node] of this.nodes) {
const clonedNode = newNodes.get(id)!;
for (const edge of node.getEdges()) {
const clonedFromNode = newNodes.get(edge.from.id)!;
const clonedToNode = newNodes.get(edge.to.id)!;
const clonedEdge = {
from: clonedFromNode,
to: clonedToNode,
type: edge.type,
data: structuredClone(edge.data),
} as unknown as Edge<TNewNodeData, TNewEdgeTypeDataMap>;
clonedNode.addEdge(clonedEdge);
}
}

return new Graph(Array.from(newNodes.values()));
}
}

export class Node<
TNodeData,
TEdgeTypeDataMap extends Record<string, any> = {},
> {
data: TNodeData;
readonly type: "document" | "chunk";
private edges: Map<string, Edge<TNodeData, TEdgeTypeDataMap>> = new Map();

constructor(
readonly id: string,
type: "document" | "chunk",
data: TNodeData
) {
this.type = type;
this.data = data;
}

addEdge(edge: Edge<TNodeData, TEdgeTypeDataMap>) {
this.edges.set(edge.to.id, edge);
}

getEdges() {
return Array.from(this.edges.values());
}
}

export function graph<
TNodeData,
TEdgeTypeDataMap extends Record<string, any> = {},
>(nodes?: Node<TNodeData, TEdgeTypeDataMap>[]) {
return new Graph<TNodeData, TEdgeTypeDataMap>(nodes);
}

export function node<
TNodeData,
TEdgeTypeDataMap extends Record<string, any> = {},
>(type: "document" | "chunk", data: TNodeData, id?: string) {
return new Node<TNodeData, TEdgeTypeDataMap>(
id ?? crypto.randomUUID(),
type,
data
);
}
155 changes: 155 additions & 0 deletions packages/evalite/src/generation/persona.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import { generateObject, jsonSchema, type LanguageModel } from "ai";
import type { Graph, Node } from "./graph.js";
import { promptBuilder } from "../scorers/prompt-builder.js";

export interface Persona {
description: string;
knowledgeLevel: "novice" | "intermediate" | "expert";
}

const PersonaSchema = jsonSchema<{
description: string;
}>({
type: "object",
properties: {
description: {
type: "string",
description:
"A detailed description of the fictional persona who would consume this content, including their background, motivations, and how they would interact with the material",
},
},
required: ["description"],
});

const generatePersonaPrompt = promptBuilder({
prompt:
"Generate a fictional persona who would be interested in consuming the following content. The persona should represent a realistic reader/user with the specified knowledge level ({knowledgeLevel}). Provide a detailed description of who they are, their motivations for engaging with this content, and their background. Output JSON following the required schema.",
examples: [
{
input: {
summary:
"A comprehensive guide to machine learning algorithms, covering supervised and unsupervised learning techniques with practical Python examples.",
knowledgeLevel: "intermediate",
},
output: {
description:
"Sarah is a 32-year-old software developer at a mid-sized tech company. She has 5 years of experience in backend development and recently became interested in adding ML capabilities to her team's products. She's comfortable with Python but has limited exposure to data science concepts beyond basic statistics. She wants to understand the fundamentals well enough to have meaningful conversations with data scientists and potentially prototype simple ML features.",
},
},
{
input: {
summary:
"Introduction to gardening for beginners, covering basic soil preparation, plant selection, and watering techniques.",
knowledgeLevel: "novice",
},
output: {
description:
"Emily is a 45-year-old office manager who just bought her first home with a backyard. She grew up in apartments and has never had outdoor space before. She's excited to start a vegetable garden but feels overwhelmed by all the options and doesn't know where to begin. She has no prior gardening experience and needs step-by-step guidance.",
},
},
{
input: {
summary:
"Advanced distributed systems architecture patterns for high-availability microservices deployments.",
knowledgeLevel: "expert",
},
output: {
description:
"David is a 40-year-old principal engineer at a large fintech company. He has 15+ years of experience building distributed systems and has led several large-scale migrations. He's looking to stay current with the latest patterns and validate his architectural decisions against industry best practices. He often mentors junior engineers and needs authoritative references to share with his team.",
},
},
],
task: ["summary", "knowledgeLevel"],
});

export async function generatePersona<
TNodeData extends { content: string; summary?: string },
TEdgeMap extends Record<string, unknown> = Record<string, unknown>,
>(
graph: Graph<TNodeData, TEdgeMap>,
{
model,
amount,
filter = (node) => node.type === "document",
}: {
model: LanguageModel;
amount?: number;
filter?: (node: Node<TNodeData, TEdgeMap>) => boolean;
}
): Promise<Persona[]> {
const allNodes = Array.from(graph.getNodes().values());
const filteredNodes = allNodes.filter(filter);

if (filteredNodes.length === 0) {
return [];
}

const nodesWithSummaries = filteredNodes.filter(
(node) => node.data.summary !== undefined && node.data.summary.trim() !== ""
);

if (nodesWithSummaries.length === 0) {
return [];
}

const totalPersonas = amount ?? nodesWithSummaries.length;

if (totalPersonas === 0) {
return [];
}

const distribution = calculatePersonasPerNode(
totalPersonas,
nodesWithSummaries.length
);

const generationPromises: Promise<Persona>[] = [];

for (let i = 0; i < nodesWithSummaries.length; i++) {
const node = nodesWithSummaries[i]!;
const personaCount = distribution[i] ?? 0;

for (let j = 0; j < personaCount; j++) {
const knowledgeLevel = getRandomKnowledgeLevel();

const promise = generateObject({
model,
schema: PersonaSchema,
prompt: generatePersonaPrompt({
summary: node.data.summary!,
knowledgeLevel,
}),
}).then((result) => ({
description: result.object.description,
knowledgeLevel,
}));

generationPromises.push(promise);
}
}

return Promise.all(generationPromises);
}

function getRandomKnowledgeLevel(): "novice" | "intermediate" | "expert" {
const levels: readonly ["novice", "intermediate", "expert"] = [
"novice",
"intermediate",
"expert",
];
const index = Math.floor(Math.random() * levels.length);
return levels[index] ?? "intermediate";
}

function calculatePersonasPerNode(
totalAmount: number,
nodeCount: number
): number[] {
if (nodeCount === 0) return [];
const baseCount = Math.floor(totalAmount / nodeCount);
const remainder = totalAmount % nodeCount;
return Array.from(
{ length: nodeCount },
(_, i) => baseCount + (i < remainder ? 1 : 0)
);
}
45 changes: 45 additions & 0 deletions packages/evalite/src/generation/test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { jaccardSimilarity } from "./transformers/jaccard-similarity.js";
import { topicExtractor } from "./transformers/topic-extractor.js";
import { summaryExtractor } from "./transformers/summary-extractor.js";
import { graph, node } from "./graph.js";
import { transform } from "./transformers/transformer.js";
import { openai } from "@ai-sdk/openai";
import { embedExtractor } from "./transformers/embed-extractor.js";
import { embeddingSimilarity } from "./transformers/embedding-similarity.js";
import { chunkExtractor } from "./transformers/chunk-extractor.js";
import { generatePersona } from "./persona.js";

const g = await transform(graph([node("document", { content: "Hello world" })]))
.pipe(chunkExtractor({ chunker: (content) => content.split(" ") }))
.pipe(summaryExtractor({ model: openai("gpt-4.1") }))
.pipe(topicExtractor({ model: openai("gpt-4.1") }))
.pipe(jaccardSimilarity({ property: "topics" }))
.pipe(
embedExtractor({
model: openai.embedding("text-embedding-3-small"),
property: "summary",
})
)
.pipe(embeddingSimilarity({ property: "summaryEmbedding" }))
.pipe(embeddingSimilarity({ property: "content" }))
.build();

g.getNodes().forEach((node) => {
node.getEdges().forEach((edge) => {
if (edge.type === "jaccardSimilarity") {
console.log(
` Jaccard score: ${edge.data.score} (property: ${edge.data.property})`
);
} else if (edge.type === "embeddingSimilarity") {
console.log(
` Embedding score: ${edge.data.score} (property: ${edge.data.property})`
);
} else if (edge.type === "chunk" || edge.type === "parent") {
console.log(` Chunk relationship (no score data)`);
}
});
});

generatePersona(g, { model: openai("gpt-4.1") }).then((personas) => {
console.log(personas);
});
Loading
Loading