Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions sdks/python/openrag_sdk/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,27 +125,46 @@ async def wait_for_task(
await asyncio.sleep(poll_interval)
elapsed += poll_interval

raise TimeoutError(f"Ingestion task {task_id} did not complete within {timeout}s")
raise TimeoutError(
f"Ingestion task {task_id} did not complete within {timeout}s"
)

async def delete(self, filename: str) -> DeleteDocumentResponse:
async def delete(
self,
filename: str | None = None,
*,
filter_id: str | None = None,
) -> DeleteDocumentResponse:
"""
Delete a document from the knowledge base.
Delete document(s) from the knowledge base.

Args:
filename: Name of the file to delete.
Provide exactly one of:
filename: delete all chunks for that filename.
filter_id: delete chunks for each filename in the filter's data_sources.

Returns:
DeleteDocumentResponse with deleted chunk count.
"""
if bool(filename) == bool(filter_id):
raise ValueError("Provide exactly one of `filename` or `filter_id`")

body: dict[str, str] = {}
if filename is not None:
body["filename"] = filename
if filter_id is not None:
body["filter_id"] = filter_id

try:
response = await self._client._request(
"DELETE",
"/api/v1/documents",
json={"filename": filename},
json=body,
)
except NotFoundError as e:
# Keep delete idempotent for SDK callers: a missing document is not an exception.
if getattr(e, "status_code", None) == 404:
# Keep delete idempotent for SDK callers: a missing document is not
# an exception.
# Filter-not-found 404s still raise because the filter_id is caller input.
if filename is not None and getattr(e, "status_code", None) == 404:
return DeleteDocumentResponse(
success=False,
deleted_chunks=0,
Expand Down
7 changes: 5 additions & 2 deletions sdks/python/openrag_sdk/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""OpenRAG SDK data models."""

from datetime import datetime
from typing import Any, Literal
from typing import Literal

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -98,6 +97,10 @@ class DeleteDocumentResponse(BaseModel):
filename: str | None = None
message: str | None = None
error: str | None = None
# Populated when deleting by filter_id — one entry per resolved data_source.
filenames: list[str] | None = None
filter_id: str | None = None
per_file: list[dict] | None = None


# Chat history models
Expand Down
43 changes: 34 additions & 9 deletions sdks/typescript/src/documents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import type { OpenRAGClient } from "./client";
import type {
DeleteDocumentOptions,
DeleteDocumentResponse,
IngestResponse,
IngestTaskStatus,
Expand Down Expand Up @@ -144,33 +145,57 @@ export class DocumentsClient {
}

/**
* Delete a document from the knowledge base.
* Delete document(s) from the knowledge base.
*
* Provide exactly one of:
* - filename: a single filename, or
* - { filename } / { filterId }: an options object.
*
* @param filename - Name of the file to delete.
* @returns DeleteDocumentResponse with deleted chunk count.
*/
async delete(filename: string): Promise<DeleteDocumentResponse> {
async delete(
arg: string | DeleteDocumentOptions
): Promise<DeleteDocumentResponse> {
const opts: DeleteDocumentOptions =
typeof arg === "string" ? { filename: arg } : arg;

if (!opts.filename === !opts.filterId) {
throw new Error(
"Provide exactly one of `filename` or `filterId`"
);
}

const body: Record<string, string> = {};
if (opts.filename) body.filename = opts.filename;
if (opts.filterId) body.filter_id = opts.filterId;

try {
const response = await this.client._request("DELETE", "/api/v1/documents", {
body: JSON.stringify({ filename }),
body: JSON.stringify(body),
});

const data = await response.json();
return {
success: data.success ?? false,
deleted_chunks: data.deleted_chunks ?? 0,
filename: data.filename ?? filename,
filename: data.filename ?? opts.filename ?? null,
message: data.message ?? null,
error: data.error ?? null,
filenames: data.filenames ?? null,
filter_id: data.filter_id ?? null,
per_file: data.per_file ?? null,
};
} catch (error) {
// Delete is idempotent: if no chunks match, backend may return 404.
// Surface this as a non-throwing "nothing deleted" response.
if ((error as NotFoundError)?.statusCode === 404) {
// Filename delete stays idempotent (404 -> success:false). Filter-id 404s
// are caller errors (bad filter id) and should propagate.
if (
opts.filename &&
(error as NotFoundError)?.statusCode === 404
) {
return {
success: false,
deleted_chunks: 0,
filename,
filename: opts.filename,
message: null,
error: (error as Error)?.message ?? "Resource not found",
};
Expand Down
9 changes: 9 additions & 0 deletions sdks/typescript/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ export interface DeleteDocumentResponse {
filename?: string | null;
message?: string | null;
error?: string | null;
// Populated when deleting by filter_id — one entry per resolved data_source.
filenames?: string[] | null;
filter_id?: string | null;
per_file?: Array<Record<string, unknown>> | null;
}

export interface DeleteDocumentOptions {
filename?: string;
filterId?: string;
}

// Chat history types
Expand Down
134 changes: 113 additions & 21 deletions sdks/typescript/tests/integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,55 +190,147 @@ describe.skipIf(SKIP_TESTS)("OpenRAG TypeScript SDK Integration", () => {
expect(filter).toBeNull();
});

it("should use filterId in chat", async () => {
// Create a filter first
it("filterId in chat actually scopes retrieval to data_sources", async () => {
// Ingest two distinguishable docs.
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants live here.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers live here.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

const createResult = await client.knowledgeFilters.create({
name: "Chat Test Filter",
description: "Filter for testing chat with filterId",
name: `TS chat filter scope ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "test",
limit: 5,
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
// Use filter in chat
const response = await client.chat.create({
message: "Hello with filter",
message: "What animals appear in these documents?",
filterId,
});
expect(response.sources).toBeDefined();
const names = (response.sources ?? []).map((s) => s.filename);
// Beta must NOT leak through the filter.
expect(names).not.toContain(betaName);
} finally {
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
}, 60_000);

it("filterId in search actually scopes results to data_sources", async () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants live here.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers live here.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

expect(response.response).toBeDefined();
const createResult = await client.knowledgeFilters.create({
name: `TS search filter scope ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
const results = await client.search.query("animals", { filterId });
for (const r of results.results) {
expect(r.filename).not.toBe(betaName);
}
} finally {
// Cleanup
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
});
}, 60_000);

it("documents.delete(filterId) only removes filenames in the filter", async () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

it("should use filterId in search", async () => {
// Create a filter first
const createResult = await client.knowledgeFilters.create({
name: "Search Test Filter",
description: "Filter for testing search with filterId",
name: `TS delete-by-filter ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "test",
limit: 5,
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
// Use filter in search
const results = await client.search.query("test query", { filterId });
const result = await client.documents.delete({ filterId });
expect(result.success).toBe(true);
expect(result.filenames).toContain(alphaName);
expect(result.filenames ?? []).not.toContain(betaName);

expect(results.results).toBeDefined();
// Beta still searchable
const remaining = await client.search.query("tigers");
const remainingNames = remaining.results.map((r) => r.filename);
expect(remainingNames).toContain(betaName);
} finally {
// Cleanup
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
}, 60_000);

it("documents.delete rejects both filename and filterId together", async () => {
await expect(
client.documents.delete({ filename: "x.pdf", filterId: "y" })
).rejects.toThrow();
});

it("documents.delete rejects when neither arg is set", async () => {
await expect(client.documents.delete({})).rejects.toThrow();
});
});

Expand Down
Loading
Loading