From c9b9f7ffc43d2d70e9fb33cd330c76e849e05dd2 Mon Sep 17 00:00:00 2001 From: Ben Davis Date: Thu, 5 Feb 2026 13:44:45 -0800 Subject: [PATCH 1/5] first pass on web resource --- apps/cli/src/client/index.ts | 12 +- apps/cli/src/commands/add.ts | 386 +++++---- apps/cli/src/commands/remove.ts | 14 +- apps/cli/src/commands/resources.ts | 7 + apps/cli/src/tui/components/input-section.tsx | 4 +- apps/cli/src/tui/components/main-input.tsx | 2 +- .../tui/components/repo-mention-palette.tsx | 2 +- apps/cli/src/tui/components/status-bar.tsx | 4 +- apps/cli/src/tui/services.ts | 10 +- .../api-reference/local/config-resources.mdx | 2 +- apps/docs/api-reference/local/resources.mdx | 2 +- apps/docs/btca.spec.md | 45 +- apps/docs/guides/cli-reference.mdx | 9 +- apps/docs/guides/configuration.mdx | 10 + apps/server/package.json | 1 + apps/server/src/collections/service.ts | 22 +- .../src/collections/virtual-metadata.ts | 3 +- apps/server/src/index.ts | 66 +- .../src/resources/impls/website.test.ts | 214 +++++ apps/server/src/resources/impls/website.ts | 739 ++++++++++++++++++ apps/server/src/resources/index.ts | 14 +- apps/server/src/resources/schema.test.ts | 59 ++ apps/server/src/resources/schema.ts | 68 +- apps/server/src/resources/service.ts | 36 +- apps/server/src/resources/types.ts | 14 +- apps/server/src/resources/website.e2e.test.ts | 203 +++++ apps/server/src/validation/index.ts | 9 +- apps/web/src/convex/_generated/api.d.ts | 299 ++++--- apps/web/src/convex/_generated/api.js | 2 +- apps/web/src/convex/_generated/dataModel.d.ts | 22 +- apps/web/src/convex/_generated/server.d.ts | 34 +- apps/web/src/convex/_generated/server.js | 16 +- apps/web/src/lib/types/index.ts | 6 +- apps/web/static/btca.schema.json | 50 +- .../web/static/docs/example-btca.config.jsonc | 9 + btca.config.jsonc | 282 +++---- bun.lock | 83 +- 37 files changed, 2209 insertions(+), 551 deletions(-) create mode 100644 apps/server/src/resources/impls/website.test.ts create mode 100644 apps/server/src/resources/impls/website.ts create mode 100644 apps/server/src/resources/schema.test.ts create mode 100644 apps/server/src/resources/website.e2e.test.ts diff --git a/apps/cli/src/client/index.ts b/apps/cli/src/client/index.ts index 909e4f37..550ec3d9 100644 --- a/apps/cli/src/client/index.ts +++ b/apps/cli/src/client/index.ts @@ -185,7 +185,17 @@ export interface LocalResourceInput { specialNotes?: string; } -export type ResourceInput = GitResourceInput | LocalResourceInput; +export interface WebsiteResourceInput { + type: 'website'; + name: string; + url: string; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; + specialNotes?: string; +} + +export type ResourceInput = GitResourceInput | LocalResourceInput | WebsiteResourceInput; /** * Add a new resource diff --git a/apps/cli/src/commands/add.ts b/apps/cli/src/commands/add.ts index e096ffeb..7663fdf8 100644 --- a/apps/cli/src/commands/add.ts +++ b/apps/cli/src/commands/add.ts @@ -1,24 +1,24 @@ import { Result } from 'better-result'; import { Command } from 'commander'; -import * as readline from 'readline'; import path from 'node:path'; -import { ensureServer } from '../server/manager.ts'; +import * as readline from 'readline'; + import { addResource, BtcaError } from '../client/index.ts'; import { dim } from '../lib/utils/colors.ts'; +import { ensureServer } from '../server/manager.ts'; interface GitHubUrlParts { owner: string; repo: string; } -/** - * Parse a GitHub URL and extract owner/repo. - */ -function parseGitHubUrl(url: string): GitHubUrlParts | null { - // Handle various GitHub URL formats: - // - https://github.com/owner/repo - // - https://github.com/owner/repo.git - // - github.com/owner/repo +type ResourceType = 'git' | 'local' | 'website'; + +const DEFAULT_WEBSITE_MAX_PAGES = 200; +const DEFAULT_WEBSITE_MAX_DEPTH = 3; +const DEFAULT_WEBSITE_TTL_HOURS = 24; + +const parseGitHubUrl = (url: string): GitHubUrlParts | null => { const patterns = [ /^https?:\/\/github\.com\/([^/]+)\/([^/]+?)(\.git)?$/, /^github\.com\/([^/]+)\/([^/]+?)(\.git)?$/ @@ -35,21 +35,15 @@ function parseGitHubUrl(url: string): GitHubUrlParts | null { } return null; -} +}; -/** - * Normalize GitHub URL to standard format. - */ -function normalizeGitHubUrl(url: string): string { +const normalizeGitHubUrl = (url: string) => { const parts = parseGitHubUrl(url); if (!parts) return url; return `https://github.com/${parts.owner}/${parts.repo}`; -} +}; -/** - * Format an error for display, including hint if available. - */ -function formatError(error: unknown): string { +const formatError = (error: unknown): string => { if (error instanceof BtcaError) { let output = `Error: ${error.message}`; if (error.hint) { @@ -58,52 +52,28 @@ function formatError(error: unknown): string { return output; } return `Error: ${error instanceof Error ? error.message : String(error)}`; -} +}; -/** - * Create a readline interface for prompts. - */ -function createRl(): readline.Interface { - return readline.createInterface({ - input: process.stdin, - output: process.stdout - }); -} +const createRl = () => readline.createInterface({ input: process.stdin, output: process.stdout }); -/** - * Prompt for input with a default value. - */ -async function promptInput( - rl: readline.Interface, - question: string, - defaultValue?: string -): Promise { - return new Promise((resolve) => { +const promptInput = async (rl: readline.Interface, question: string, defaultValue?: string) => + new Promise((resolve) => { const defaultHint = defaultValue ? ` ${dim(`(${defaultValue})`)}` : ''; rl.question(`${question}${defaultHint}: `, (answer) => { const value = answer.trim(); resolve(value || defaultValue || ''); }); }); -} -/** - * Prompt for confirmation (y/n). - */ -async function promptConfirm(rl: readline.Interface, question: string): Promise { - return new Promise((resolve) => { +const promptConfirm = async (rl: readline.Interface, question: string) => + new Promise((resolve) => { rl.question(`${question} ${dim('(y/n)')}: `, (answer) => { resolve(answer.trim().toLowerCase() === 'y'); }); }); -} -/** - * Prompt for repeated entries (search paths). - */ -async function promptRepeated(rl: readline.Interface, itemName: string): Promise { +const promptRepeated = async (rl: readline.Interface, itemName: string) => { const items: string[] = []; - console.log(`\nEnter ${itemName} one at a time. Press Enter with empty input when done.`); while (true) { @@ -113,47 +83,61 @@ async function promptRepeated(rl: readline.Interface, itemName: string): Promise } return items; -} +}; -/** - * Prompt for single selection from a list. - */ -async function promptSelect( +const promptSelect = async ( question: string, options: { label: string; value: T }[] -): Promise { - return new Promise((resolve, reject) => { - const rl = readline.createInterface({ - input: process.stdin, - output: process.stdout - }); +): Promise => + new Promise((resolve, reject) => { + const rl = createRl(); console.log(`\n${question}\n`); - options.forEach((opt, idx) => { - console.log(` ${idx + 1}) ${opt.label}`); + options.forEach((option, index) => { + console.log(` ${index + 1}) ${option.label}`); }); console.log(''); rl.question('Enter number: ', (answer) => { rl.close(); - const num = parseInt(answer.trim(), 10); - if (isNaN(num) || num < 1 || num > options.length) { + const selection = Number.parseInt(answer.trim(), 10); + if (!Number.isFinite(selection) || selection < 1 || selection > options.length) { reject(new Error('Invalid selection')); return; } - resolve(options[num - 1]!.value); + resolve(options[selection - 1]!.value); }); }); -} -/** - * Interactive wizard for adding a git resource. - */ -async function addGitResourceWizard( +const parseRequiredInt = (raw: string, field: string, min: number) => { + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed < min) { + throw new Error(`${field} must be an integer >= ${min}`); + } + return parsed; +}; + +const defaultWebsiteNameFromUrl = (url: string) => { + const result = Result.try(() => new URL(url)); + return result.match({ + ok: (parsed) => { + const host = parsed.hostname.split('.').filter(Boolean); + const base = host.length > 1 ? host[host.length - 2] : (host[0] ?? 'website'); + const slug = (parsed.pathname.split('/').filter(Boolean).join('-') || '').replace( + /[^a-zA-Z0-9._-]/g, + '' + ); + return slug ? `${base}-${slug}` : base; + }, + err: () => 'website' + }); +}; + +const addGitResourceWizard = async ( url: string, options: { global?: boolean }, globalOpts: { server?: string; port?: number } | undefined -): Promise { +) => { const urlParts = parseGitHubUrl(url); if (!urlParts) { console.error('Error: Invalid GitHub URL.'); @@ -162,61 +146,40 @@ async function addGitResourceWizard( } const normalizedUrl = normalizeGitHubUrl(url); - console.log('\n--- Add Git Resource ---\n'); console.log(`Repository: ${normalizedUrl}`); const rl = createRl(); - const result = await Result.tryPromise(async () => { - // Step 1: URL (prefilled, confirm) const finalUrl = await promptInput(rl, 'URL', normalizedUrl); - - // Step 2: Name (default = repo name) - const defaultName = urlParts.repo; - const name = await promptInput(rl, 'Name', defaultName); - - // Step 3: Branch (default = main) + const name = await promptInput(rl, 'Name', urlParts.repo); const branch = await promptInput(rl, 'Branch', 'main'); - - // Step 4: Search paths (optional, repeated) const wantSearchPaths = await promptConfirm( rl, 'Do you want to add search paths (subdirectories to focus on)?' ); const searchPaths = wantSearchPaths ? await promptRepeated(rl, 'Search path') : []; - - // Step 5: Notes (optional) const notes = await promptInput(rl, 'Notes (optional)'); - rl.close(); - // Summary console.log('\n--- Summary ---\n'); - console.log(` Type: git`); + console.log(' Type: git'); console.log(` Name: ${name}`); console.log(` URL: ${finalUrl}`); console.log(` Branch: ${branch}`); - if (searchPaths.length > 0) { - console.log(` Search: ${searchPaths.join(', ')}`); - } - if (notes) { - console.log(` Notes: ${notes}`); - } + if (searchPaths.length > 0) console.log(` Search: ${searchPaths.join(', ')}`); + if (notes) console.log(` Notes: ${notes}`); console.log(` Config: ${options.global ? 'global' : 'project'}`); console.log(''); - // Confirm const confirmRl = createRl(); const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); confirmRl.close(); - if (!confirmed) { console.log('\nCancelled.'); process.exit(0); } - // Add the resource via server const server = await ensureServer({ serverUrl: globalOpts?.server, port: globalOpts?.port, @@ -244,65 +207,43 @@ async function addGitResourceWizard( }); rl.close(); + if (Result.isError(result)) throw result.error; +}; - if (Result.isError(result)) { - throw result.error; - } -} - -/** - * Interactive wizard for adding a local resource. - */ -async function addLocalResourceWizard( +const addLocalResourceWizard = async ( localPath: string, options: { global?: boolean }, globalOpts: { server?: string; port?: number } | undefined -): Promise { - // Resolve the path +) => { const resolvedPath = path.isAbsolute(localPath) ? localPath : path.resolve(process.cwd(), localPath); - console.log('\n--- Add Local Resource ---\n'); console.log(`Directory: ${resolvedPath}`); const rl = createRl(); - const result = await Result.tryPromise(async () => { - // Step 1: Path (prefilled, confirm) const finalPath = await promptInput(rl, 'Path', resolvedPath); - - // Step 2: Name (default = directory name) - const defaultName = path.basename(finalPath); - const name = await promptInput(rl, 'Name', defaultName); - - // Step 3: Notes (optional) + const name = await promptInput(rl, 'Name', path.basename(finalPath)); const notes = await promptInput(rl, 'Notes (optional)'); - rl.close(); - // Summary console.log('\n--- Summary ---\n'); - console.log(` Type: local`); + console.log(' Type: local'); console.log(` Name: ${name}`); console.log(` Path: ${finalPath}`); - if (notes) { - console.log(` Notes: ${notes}`); - } + if (notes) console.log(` Notes: ${notes}`); console.log(` Config: ${options.global ? 'global' : 'project'}`); console.log(''); - // Confirm const confirmRl = createRl(); const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); confirmRl.close(); - if (!confirmed) { console.log('\nCancelled.'); process.exit(0); } - // Add the resource via server const server = await ensureServer({ serverUrl: globalOpts?.server, port: globalOpts?.port, @@ -317,28 +258,108 @@ async function addLocalResourceWizard( }); server.stop(); - console.log(`\nAdded resource: ${name}`); console.log('\nYou can now use this resource:'); console.log(` btca ask -r ${name} -q "your question"`); }); rl.close(); + if (Result.isError(result)) throw result.error; +}; - if (Result.isError(result)) { - throw result.error; - } -} +const addWebsiteResourceWizard = async ( + websiteUrl: string, + options: { global?: boolean }, + globalOpts: { server?: string; port?: number } | undefined +) => { + console.log('\n--- Add Website Resource ---\n'); + console.log(`Website: ${websiteUrl}`); + + const rl = createRl(); + const result = await Result.tryPromise(async () => { + const finalUrl = await promptInput(rl, 'URL', websiteUrl); + const name = await promptInput(rl, 'Name', defaultWebsiteNameFromUrl(finalUrl)); + const maxPages = parseRequiredInt( + await promptInput(rl, 'Max Pages', String(DEFAULT_WEBSITE_MAX_PAGES)), + 'maxPages', + 1 + ); + const maxDepth = parseRequiredInt( + await promptInput(rl, 'Max Depth', String(DEFAULT_WEBSITE_MAX_DEPTH)), + 'maxDepth', + 0 + ); + const ttlHours = parseRequiredInt( + await promptInput(rl, 'TTL Hours', String(DEFAULT_WEBSITE_TTL_HOURS)), + 'ttlHours', + 1 + ); + const notes = await promptInput(rl, 'Notes (optional)'); + rl.close(); + + console.log('\n--- Summary ---\n'); + console.log(' Type: website'); + console.log(` Name: ${name}`); + console.log(` URL: ${finalUrl}`); + console.log(` Max Pages: ${maxPages}`); + console.log(` Max Depth: ${maxDepth}`); + console.log(` TTL Hours: ${ttlHours}`); + if (notes) console.log(` Notes: ${notes}`); + console.log(` Config: ${options.global ? 'global' : 'project'}`); + console.log(''); + + const confirmRl = createRl(); + const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); + confirmRl.close(); + if (!confirmed) { + console.log('\nCancelled.'); + process.exit(0); + } + + const server = await ensureServer({ + serverUrl: globalOpts?.server, + port: globalOpts?.port, + quiet: true + }); + + await addResource(server.url, { + type: 'website', + name, + url: finalUrl, + maxPages, + maxDepth, + ttlHours, + ...(notes && { specialNotes: notes }) + }); + + server.stop(); + console.log(`\nAdded resource: ${name}`); + console.log('\nYou can now use this resource:'); + console.log(` btca ask -r ${name} -q "your question"`); + }); + + rl.close(); + if (Result.isError(result)) throw result.error; +}; export const addCommand = new Command('add') - .description('Add a resource (git repository or local directory)') - .argument('[url-or-path]', 'GitHub repository URL or local directory path') + .description('Add a resource (git repository, website, or local directory)') + .argument('[url-or-path]', 'GitHub repository URL, website URL, or local directory path') .option('-g, --global', 'Add to global config instead of project config') .option('-n, --name ', 'Resource name') .option('-b, --branch ', 'Git branch (default: main)') .option('-s, --search-path ', 'Search paths within repo (can specify multiple)') + .option('--max-pages ', 'Max pages for website crawl', (value) => + Number.parseInt(value, 10) + ) + .option('--max-depth ', 'Max depth for website crawl', (value) => + Number.parseInt(value, 10) + ) + .option('--ttl-hours ', 'Website cache TTL in hours', (value) => + Number.parseInt(value, 10) + ) .option('--notes ', 'Special notes for the agent') - .option('-t, --type ', 'Resource type: git or local (auto-detected if not specified)') + .option('-t, --type ', 'Resource type: git, website, or local (auto-detected if omitted)') .action( async ( urlOrPath: string | undefined, @@ -347,6 +368,9 @@ export const addCommand = new Command('add') name?: string; branch?: string; searchPath?: string[]; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; notes?: string; type?: string; }, @@ -355,12 +379,12 @@ export const addCommand = new Command('add') const globalOpts = command.parent?.opts() as { server?: string; port?: number } | undefined; const result = await Result.tryPromise(async () => { - // If no argument provided, start interactive wizard if (!urlOrPath) { - const resourceType = await promptSelect<'git' | 'local'>( + const resourceType = await promptSelect( 'What type of resource do you want to add?', [ { label: 'Git repository', value: 'git' }, + { label: 'Website', value: 'website' }, { label: 'Local directory', value: 'local' } ] ); @@ -374,40 +398,46 @@ export const addCommand = new Command('add') process.exit(1); } await addGitResourceWizard(url, options, globalOpts); - } else { - const localPath = await promptInput(rl, 'Local path'); + return; + } + if (resourceType === 'website') { + const url = await promptInput(rl, 'Website URL'); rl.close(); - if (!localPath) { - console.error('Error: Path is required.'); + if (!url) { + console.error('Error: URL is required.'); process.exit(1); } - await addLocalResourceWizard(localPath, options, globalOpts); + await addWebsiteResourceWizard(url, options, globalOpts); + return; } + + const localPath = await promptInput(rl, 'Local path'); + rl.close(); + if (!localPath) { + console.error('Error: Path is required.'); + process.exit(1); + } + await addLocalResourceWizard(localPath, options, globalOpts); return; } - // Determine type from argument or explicit flag - let resourceType: 'git' | 'local' = 'git'; - + let resourceType: ResourceType = 'git'; if (options.type) { - if (options.type !== 'git' && options.type !== 'local') { - console.error('Error: --type must be "git" or "local"'); + if (options.type !== 'git' && options.type !== 'website' && options.type !== 'local') { + console.error('Error: --type must be "git", "website", or "local"'); process.exit(1); } - resourceType = options.type as 'git' | 'local'; + resourceType = options.type as ResourceType; } else { - // Auto-detect: if it looks like a URL, it's git; otherwise local - const isUrl = + const looksLikeUrl = urlOrPath.startsWith('http://') || urlOrPath.startsWith('https://') || urlOrPath.startsWith('github.com/') || urlOrPath.includes('github.com/'); - resourceType = isUrl ? 'git' : 'local'; + resourceType = looksLikeUrl ? 'git' : 'local'; } - // If all required options provided via flags, skip wizard if (options.name && resourceType === 'git' && parseGitHubUrl(urlOrPath)) { - // Non-interactive git add const normalizedUrl = normalizeGitHubUrl(urlOrPath); const server = await ensureServer({ serverUrl: globalOpts?.server, @@ -427,7 +457,6 @@ export const addCommand = new Command('add') }); server.stop(); - console.log(`Added git resource: ${options.name}`); if (resource.type === 'git' && resource.url !== normalizedUrl) { console.log(` URL normalized: ${resource.url}`); @@ -435,8 +464,46 @@ export const addCommand = new Command('add') return; } + if (options.name && resourceType === 'website') { + if (!urlOrPath.startsWith('http://') && !urlOrPath.startsWith('https://')) { + console.error('Error: website resources require an absolute URL.'); + process.exit(1); + } + const server = await ensureServer({ + serverUrl: globalOpts?.server, + port: globalOpts?.port, + quiet: true + }); + + const maxPages = options.maxPages ?? DEFAULT_WEBSITE_MAX_PAGES; + const maxDepth = options.maxDepth ?? DEFAULT_WEBSITE_MAX_DEPTH; + const ttlHours = options.ttlHours ?? DEFAULT_WEBSITE_TTL_HOURS; + if (!Number.isFinite(maxPages) || maxPages < 1) { + throw new Error('maxPages must be an integer >= 1'); + } + if (!Number.isFinite(maxDepth) || maxDepth < 0) { + throw new Error('maxDepth must be an integer >= 0'); + } + if (!Number.isFinite(ttlHours) || ttlHours < 1) { + throw new Error('ttlHours must be an integer >= 1'); + } + + await addResource(server.url, { + type: 'website', + name: options.name, + url: urlOrPath, + maxPages, + maxDepth, + ttlHours, + ...(options.notes && { specialNotes: options.notes }) + }); + + server.stop(); + console.log(`Added website resource: ${options.name}`); + return; + } + if (options.name && resourceType === 'local') { - // Non-interactive local add const resolvedPath = path.isAbsolute(urlOrPath) ? urlOrPath : path.resolve(process.cwd(), urlOrPath); @@ -458,12 +525,15 @@ export const addCommand = new Command('add') return; } - // Interactive wizard based on type if (resourceType === 'git') { await addGitResourceWizard(urlOrPath, options, globalOpts); - } else { - await addLocalResourceWizard(urlOrPath, options, globalOpts); + return; + } + if (resourceType === 'website') { + await addWebsiteResourceWizard(urlOrPath, options, globalOpts); + return; } + await addLocalResourceWizard(urlOrPath, options, globalOpts); }); if (Result.isError(result)) { diff --git a/apps/cli/src/commands/remove.ts b/apps/cli/src/commands/remove.ts index 2e992f61..a8175b0e 100644 --- a/apps/cli/src/commands/remove.ts +++ b/apps/cli/src/commands/remove.ts @@ -25,7 +25,17 @@ interface LocalResource { specialNotes?: string; } -type ResourceDefinition = GitResource | LocalResource; +interface WebsiteResource { + type: 'website'; + name: string; + url: string; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; + specialNotes?: string; +} + +type ResourceDefinition = GitResource | LocalResource | WebsiteResource; const isGitResource = (r: ResourceDefinition): r is GitResource => r.type === 'git'; @@ -42,7 +52,7 @@ async function selectSingleResource(resources: ResourceDefinition[]): Promise { - const location = isGitResource(r) ? r.url : r.path; + const location = isGitResource(r) ? r.url : r.type === 'website' ? r.url : r.path; console.log(` ${idx + 1}. ${r.name} ${dim(`(${location})`)}`); }); console.log(''); diff --git a/apps/cli/src/commands/resources.ts b/apps/cli/src/commands/resources.ts index 02ef5779..46598451 100644 --- a/apps/cli/src/commands/resources.ts +++ b/apps/cli/src/commands/resources.ts @@ -47,6 +47,13 @@ export const resourcesCommand = new Command('resources') console.log(` Search Path: ${r.searchPath}`); } if (r.specialNotes) console.log(` Notes: ${r.specialNotes}`); + } else if (r.type === 'website') { + console.log(` ${r.name} (website)`); + console.log(` URL: ${r.url}`); + console.log(` Max Pages: ${r.maxPages ?? 200}`); + console.log(` Max Depth: ${r.maxDepth ?? 3}`); + console.log(` TTL Hours: ${r.ttlHours ?? 24}`); + if (r.specialNotes) console.log(` Notes: ${r.specialNotes}`); } else { console.log(` ${r.name} (local)`); console.log(` Path: ${r.path}`); diff --git a/apps/cli/src/tui/components/input-section.tsx b/apps/cli/src/tui/components/input-section.tsx index 5733061b..c3e0f48b 100644 --- a/apps/cli/src/tui/components/input-section.tsx +++ b/apps/cli/src/tui/components/input-section.tsx @@ -90,7 +90,7 @@ export const InputSection: Component = () => { // Validate resources - require at least one @mention OR existing thread resources if (parsed.repos.length === 0 && existingResources.length === 0) { - messages.addSystemMessage('Use @reponame to add context. Example: @svelte How do I...?'); + messages.addSystemMessage('Use @resource to add context. Example: @svelte How do I...?'); return; } if (!parsed.question.trim()) { @@ -108,7 +108,7 @@ export const InputSection: Component = () => { } if (invalidRepos.length > 0) { messages.addSystemMessage( - `Repo(s) not found: ${invalidRepos.join(', ')}. Configure resources with "btca add".` + `Resource(s) not found: ${invalidRepos.join(', ')}. Configure resources with "btca add".` ); return; } diff --git a/apps/cli/src/tui/components/main-input.tsx b/apps/cli/src/tui/components/main-input.tsx index 442a22a6..ab6e3b13 100644 --- a/apps/cli/src/tui/components/main-input.tsx +++ b/apps/cli/src/tui/components/main-input.tsx @@ -40,7 +40,7 @@ export const MainInput: Component = (props) => { if (props.isStreaming) { return 'press esc to cancel'; } - return '@repo question... or / for commands'; + return '@resource question... or / for commands'; }; const getPartValueLength = (p: InputState[number]) => diff --git a/apps/cli/src/tui/components/repo-mention-palette.tsx b/apps/cli/src/tui/components/repo-mention-palette.tsx index f0a27d7c..96211c3d 100644 --- a/apps/cli/src/tui/components/repo-mention-palette.tsx +++ b/apps/cli/src/tui/components/repo-mention-palette.tsx @@ -130,7 +130,7 @@ export const RepoMentionPalette: Component = (props) => padding: 1 }} > - + {(repo, i) => { const actualIndex = () => visibleRange().start + i(); diff --git a/apps/cli/src/tui/components/status-bar.tsx b/apps/cli/src/tui/components/status-bar.tsx index df382da9..51e73e5a 100644 --- a/apps/cli/src/tui/components/status-bar.tsx +++ b/apps/cli/src/tui/components/status-bar.tsx @@ -55,10 +55,10 @@ export const StatusBar: Component = (props) => { // Show different help based on whether we have thread resources if (props.threadResources.length > 0) { - return ' Ask follow-up or [@repo] to add context [/] Commands [Ctrl+Q] Quit'; + return ' Ask follow-up or [@resource] to add context [/] Commands [Ctrl+Q] Quit'; } - return ' [@repo] Ask question [/] Commands [Ctrl+Q] Quit'; + return ' [@resource] Ask question [/] Commands [Ctrl+Q] Quit'; }; const getResourcesLabel = () => { diff --git a/apps/cli/src/tui/services.ts b/apps/cli/src/tui/services.ts index a1c6c14f..687ec0c2 100644 --- a/apps/cli/src/tui/services.ts +++ b/apps/cli/src/tui/services.ts @@ -38,20 +38,20 @@ export interface ModelUpdateResult { export const services = { /** - * Get all resources as Repos (only git resources for now) + * Get mentionable resources for the TUI. */ getRepos: async (): Promise => { const client = createClient(getServerUrl()); const { resources } = await getResources(client); return resources - .filter((r) => r.type === 'git') + .filter((r) => r.type === 'git' || r.type === 'website') .map((r) => ({ name: r.name, url: r.url ?? '', - branch: r.branch ?? 'main', + branch: r.type === 'git' ? (r.branch ?? 'main') : 'website', specialNotes: r.specialNotes ?? undefined, - searchPath: r.searchPath ?? undefined, - searchPaths: r.searchPaths ?? undefined + searchPath: r.type === 'git' ? (r.searchPath ?? undefined) : undefined, + searchPaths: r.type === 'git' ? (r.searchPaths ?? undefined) : undefined })); }, diff --git a/apps/docs/api-reference/local/config-resources.mdx b/apps/docs/api-reference/local/config-resources.mdx index b3a3d266..aa638ce5 100644 --- a/apps/docs/api-reference/local/config-resources.mdx +++ b/apps/docs/api-reference/local/config-resources.mdx @@ -3,4 +3,4 @@ title: 'Add a resource' openapi: 'POST /config/resources' --- -Adds a git or local resource to the current config. +Adds a git, local, or website resource to the current config. diff --git a/apps/docs/api-reference/local/resources.mdx b/apps/docs/api-reference/local/resources.mdx index 8ab21986..f3da82f8 100644 --- a/apps/docs/api-reference/local/resources.mdx +++ b/apps/docs/api-reference/local/resources.mdx @@ -3,4 +3,4 @@ title: 'List resources' openapi: 'GET /resources' --- -Lists all configured resources from the active config. +Lists all configured resources from the active config (git, local, and website). diff --git a/apps/docs/btca.spec.md b/apps/docs/btca.spec.md index 96365dae..b6771cfe 100644 --- a/apps/docs/btca.spec.md +++ b/apps/docs/btca.spec.md @@ -138,6 +138,14 @@ Example: "type": "local", "name": "internal-docs", "path": "/abs/path/docs" + }, + { + "type": "website", + "name": "public-docs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 } ] } @@ -217,7 +225,7 @@ REPL supports `@resource` mentions. ### 4.3 `btca add [url-or-path]` -Add a git repo or local directory resource. +Add a git repo, website, or local directory resource. Options: @@ -225,14 +233,18 @@ Options: - `-n, --name ` - `-b, --branch ` (default `main`) - `-s, --search-path ` +- `--max-pages ` (website, default `200`) +- `--max-depth ` (website, default `3`) +- `--ttl-hours ` (website, default `24`) - `--notes ` -- `-t, --type ` +- `-t, --type ` Behavior: - If no argument, interactive wizard. - If `--type` omitted, auto‑detects URL vs path. - Git URLs are normalized to base repo when GitHub. +- Website resources require an absolute HTTPS URL. - Local paths are resolved to absolute paths. ### 4.4 `btca remove [name]` @@ -520,6 +532,15 @@ Response: "type": "local", "path": "/abs/path/docs", "specialNotes": null + }, + { + "name": "public-docs", + "type": "website", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24, + "specialNotes": null } ] } @@ -623,6 +644,19 @@ Request (local): { "type": "local", "name": "docs", "path": "/abs/path/docs" } ``` +Request (website): + +```json +{ + "type": "website", + "name": "public-docs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 +} +``` + Response: the created resource (GitHub URLs normalized to base repo). ### 6.10 `DELETE /config/resources` @@ -763,6 +797,13 @@ Git URL validation: - No localhost or private IPs - GitHub URLs normalized to base repo +Website URL validation: + +- HTTPS only +- No embedded credentials +- No localhost or private IPs +- Defaults: `maxPages=200`, `maxDepth=3`, `ttlHours=24` + --- ## 9. Remote Cloud API (used by CLI) diff --git a/apps/docs/guides/cli-reference.mdx b/apps/docs/guides/cli-reference.mdx index 1ddc3f22..78c5c3fc 100644 --- a/apps/docs/guides/cli-reference.mdx +++ b/apps/docs/guides/cli-reference.mdx @@ -31,7 +31,7 @@ The REPL supports `@resource` mentions. ## `btca add [url-or-path]` -Adds a git repo or local directory resource. +Adds a git repo, website, or local directory resource. Options: @@ -40,9 +40,12 @@ Options: - `-b, --branch ` sets a branch (default `main`). - `-s, --search-path ` sets one or more search paths. - `--notes ` sets special notes. -- `-t, --type ` forces the resource type. +- `-t, --type ` forces the resource type. +- `--max-pages ` sets a website crawl page cap (default `200`). +- `--max-depth ` sets a website crawl depth cap (default `3`). +- `--ttl-hours ` sets website cache TTL hours (default `24`). -Behavior: If no argument is provided, the CLI starts an interactive wizard. When `--type` is omitted, it auto-detects URL vs path. GitHub URLs are normalized to the base repo. Local paths are resolved to absolute paths. +Behavior: If no argument is provided, the CLI starts an interactive wizard. When `--type` is omitted, it auto-detects URL vs path (`git` for URLs, `local` for paths). GitHub URLs are normalized to the base repo. Local paths are resolved to absolute paths. Example (local path): diff --git a/apps/docs/guides/configuration.mdx b/apps/docs/guides/configuration.mdx index c5f5fba6..6a3dab42 100644 --- a/apps/docs/guides/configuration.mdx +++ b/apps/docs/guides/configuration.mdx @@ -42,6 +42,14 @@ Example: "branch": "main", "searchPath": "apps/svelte.dev", "specialNotes": "Focus on docs content" + }, + { + "type": "website", + "name": "myDocs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 } ] } @@ -104,6 +112,8 @@ Example: - Question length: max 100,000 chars - Resources per request: max 20 - Git URL: HTTPS only, no embedded credentials, no localhost/private IPs +- Website URL: HTTPS only, no embedded credentials, no localhost/private IPs +- Website defaults: `maxPages=200`, `maxDepth=3`, `ttlHours=24` - GitHub URLs are normalized to the base repo. ## Known gaps diff --git a/apps/server/package.json b/apps/server/package.json index 304abc89..ff1ed30d 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -58,6 +58,7 @@ "@btca/shared": "workspace:*", "ai": "^6.0.49", "better-result": "^2.6.0", + "cheerio": "^1.2.0", "hono": "^4.7.11", "just-bash": "^2.7.0", "opencode-ai": "^1.1.36", diff --git a/apps/server/src/collections/service.ts b/apps/server/src/collections/service.ts index ba9affb0..4f45b51f 100644 --- a/apps/server/src/collections/service.ts +++ b/apps/server/src/collections/service.ts @@ -7,7 +7,7 @@ import { Transaction } from '../context/transaction.ts'; import { CommonHints, getErrorHint, getErrorMessage } from '../errors.ts'; import { Metrics } from '../metrics/index.ts'; import { Resources } from '../resources/service.ts'; -import { isGitResource } from '../resources/schema.ts'; +import { isGitResource, isWebsiteResource } from '../resources/schema.ts'; import { FS_RESOURCE_SYSTEM_NOTE, type BtcaFsResource } from '../resources/types.ts'; import { CollectionError, getCollectionKey, type CollectionResult } from './types.ts'; import { VirtualFs } from '../vfs/virtual-fs.ts'; @@ -150,6 +150,26 @@ export namespace Collections { repoSubPaths: args.resource.repoSubPaths, loadedAt: args.loadedAt }; + if (isWebsiteResource(args.definition)) { + const manifestResult = await Result.tryPromise(() => + Bun.file(path.join(args.resourcePath, '.btca-website-manifest.json')).text() + ); + const crawledAt = manifestResult.match({ + ok: (content) => { + const parsedResult = Result.try(() => JSON.parse(content) as { crawledAt?: string }); + return parsedResult.match({ + ok: (parsed) => parsed.crawledAt, + err: () => undefined + }); + }, + err: () => undefined + }); + return { + ...base, + url: args.definition.url, + crawledAt + }; + } if (!isGitResource(args.definition)) return base; const commit = await getGitHeadHash(args.resourcePath); return { diff --git a/apps/server/src/collections/virtual-metadata.ts b/apps/server/src/collections/virtual-metadata.ts index ab33ce38..f25a22a8 100644 --- a/apps/server/src/collections/virtual-metadata.ts +++ b/apps/server/src/collections/virtual-metadata.ts @@ -1,12 +1,13 @@ export type VirtualResourceMetadata = { name: string; fsName: string; - type: 'git' | 'local'; + type: 'git' | 'local' | 'website'; path: string; repoSubPaths: readonly string[]; url?: string; branch?: string; commit?: string; + crawledAt?: string; loadedAt: string; }; diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index 3af8eacd..9618dc8a 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -11,7 +11,11 @@ import { Context } from './context/index.ts'; import { getErrorMessage, getErrorTag, getErrorHint } from './errors.ts'; import { Metrics } from './metrics/index.ts'; import { Resources } from './resources/service.ts'; -import { GitResourceSchema, LocalResourceSchema } from './resources/schema.ts'; +import { + GitResourceSchema, + LocalResourceSchema, + WebsiteResourceSchema +} from './resources/schema.ts'; import { StreamService } from './stream/service.ts'; import type { BtcaStreamMetaEvent } from './stream/types.ts'; import { LIMITS, normalizeGitHubUrl } from './validation/index.ts'; @@ -141,9 +145,35 @@ const AddLocalResourceRequestSchema = z.object({ specialNotes: LocalResourceSchema.shape.specialNotes }); +const AddWebsiteResourceRequestSchema = z.object({ + type: z.literal('website'), + name: WebsiteResourceSchema.shape.name, + url: WebsiteResourceSchema.shape.url, + maxPages: z.coerce + .number() + .int('maxPages must be an integer') + .min(1) + .max(LIMITS.WEBSITE_MAX_PAGES_MAX) + .optional(), + maxDepth: z.coerce + .number() + .int('maxDepth must be an integer') + .min(0) + .max(LIMITS.WEBSITE_MAX_DEPTH_MAX) + .optional(), + ttlHours: z.coerce + .number() + .int('ttlHours must be an integer') + .min(1) + .max(LIMITS.WEBSITE_TTL_HOURS_MAX) + .optional(), + specialNotes: WebsiteResourceSchema.shape.specialNotes +}); + const AddResourceRequestSchema = z.discriminatedUnion('type', [ AddGitResourceRequestSchema, - AddLocalResourceRequestSchema + AddLocalResourceRequestSchema, + AddWebsiteResourceRequestSchema ]); const RemoveResourceRequestSchema = z.object({ @@ -263,14 +293,24 @@ const createApp = (deps: { searchPaths: r.searchPaths ?? null, specialNotes: r.specialNotes ?? null }; - } else { + } + if (r.type === 'website') { return { name: r.name, type: r.type, - path: r.path, + url: r.url, + maxPages: r.maxPages, + maxDepth: r.maxDepth, + ttlHours: r.ttlHours, specialNotes: r.specialNotes ?? null }; } + return { + name: r.name, + type: r.type, + path: r.path, + specialNotes: r.specialNotes ?? null + }; }) }); }) @@ -407,16 +447,28 @@ const createApp = (deps: { }; const added = await config.addResource(resource); return c.json(added, 201); - } else { + } + if (decoded.type === 'website') { const resource = { - type: 'local' as const, + type: 'website' as const, name: decoded.name, - path: decoded.path, + url: decoded.url, + maxPages: decoded.maxPages ?? LIMITS.WEBSITE_DEFAULT_MAX_PAGES, + maxDepth: decoded.maxDepth ?? LIMITS.WEBSITE_DEFAULT_MAX_DEPTH, + ttlHours: decoded.ttlHours ?? LIMITS.WEBSITE_DEFAULT_TTL_HOURS, ...(decoded.specialNotes && { specialNotes: decoded.specialNotes }) }; const added = await config.addResource(resource); return c.json(added, 201); } + const resource = { + type: 'local' as const, + name: decoded.name, + path: decoded.path, + ...(decoded.specialNotes && { specialNotes: decoded.specialNotes }) + }; + const added = await config.addResource(resource); + return c.json(added, 201); }) // DELETE /config/resources - Remove a resource diff --git a/apps/server/src/resources/impls/website.test.ts b/apps/server/src/resources/impls/website.test.ts new file mode 100644 index 00000000..17923719 --- /dev/null +++ b/apps/server/src/resources/impls/website.test.ts @@ -0,0 +1,214 @@ +import { afterEach, beforeEach, describe, expect, it } from 'bun:test'; +import { promises as fs } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { GlobTool } from '../../tools/glob.ts'; +import { GrepTool } from '../../tools/grep.ts'; +import { ListTool } from '../../tools/list.ts'; +import { ReadTool } from '../../tools/read.ts'; +import { VirtualFs } from '../../vfs/virtual-fs.ts'; +import { loadWebsiteResource } from './website.ts'; + +const FIXTURE_URL = 'https://docs.example.com/docs'; + +type MockResponseInit = { + status?: number; + headers?: Record; + body?: string; +}; + +type MockRoutes = Record MockResponseInit)>; + +describe('Website Resource', () => { + let tempDir = ''; + let originalFetch: typeof fetch; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'btca-website-test-')); + originalFetch = globalThis.fetch; + }); + + afterEach(async () => { + globalThis.fetch = originalFetch; + VirtualFs.disposeAll(); + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + const withMockFetch = (routes: MockRoutes, fallback?: () => never) => { + const calls: string[] = []; + globalThis.fetch = (async (input: string | URL | Request) => { + const url = + typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url; + calls.push(url); + const route = routes[url]; + if (!route) { + if (fallback) fallback(); + return new Response('not found', { status: 404 }); + } + const response = typeof route === 'function' ? route() : route; + return new Response(response.body ?? '', { + status: response.status ?? 200, + headers: response.headers + }); + }) as typeof fetch; + return calls; + }; + + const baseArgs = () => ({ + type: 'website' as const, + name: 'docs-site', + url: FIXTURE_URL, + maxPages: 10, + maxDepth: 3, + ttlHours: 24, + resourcesDirectoryPath: tempDir, + specialAgentInstructions: '', + quiet: true + }); + + it('rejects non-HTTPS website URLs', async () => { + expect( + loadWebsiteResource({ + ...baseArgs(), + url: 'http://docs.example.com/docs' + }) + ).rejects.toThrow(); + }); + + it('crawls website pages, respects robots, and supports tools over snapshot files', async () => { + withMockFetch({ + 'https://docs.example.com/robots.txt': { + body: 'User-agent: *\nDisallow: /docs/private\n' + }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: ` + https://docs.example.com/docs/getting-started + https://docs.example.com/docs/private + ` + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: ` + Docs Home +
+

Docs Home

+

Welcome to docs.

+ Start + Private +
+ + ` + }, + 'https://docs.example.com/docs/getting-started': { + headers: { 'content-type': 'text/html' }, + body: ` + Getting Started +
+

Getting Started

+

Install and run.

+
+ + ` + }, + 'https://docs.example.com/docs/private': { + headers: { 'content-type': 'text/html' }, + body: 'Private

blocked

' + } + }); + + const resource = await loadWebsiteResource(baseArgs()); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/getting-started.md')).exists()).toBe( + true + ); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/private.md')).exists()).toBe(false); + + const indexLines = (await Bun.file(path.join(resourcePath, '_index.jsonl')).text()) + .split('\n') + .filter(Boolean); + expect(indexLines.length).toBe(2); + + const vfsId = VirtualFs.create(); + await VirtualFs.mkdir('/', { recursive: true }, vfsId); + await VirtualFs.importDirectoryFromDisk({ + sourcePath: resourcePath, + destinationPath: '/docs-site', + vfsId + }); + + const context = { basePath: '/', vfsId }; + const listResult = await ListTool.execute({ path: '.' }, context); + expect(listResult.output).toContain('docs-site'); + + const globResult = await GlobTool.execute({ pattern: '**/*.md' }, context); + expect(globResult.output).toContain('docs-site/pages/docs/getting-started.md'); + + const grepResult = await GrepTool.execute({ pattern: 'Getting Started' }, context); + expect(grepResult.output).toContain('docs-site/pages/docs/getting-started.md'); + + const readResult = await ReadTool.execute({ path: 'docs-site/pages/docs.md' }, context); + expect(readResult.output).toContain('Source: https://docs.example.com/docs'); + + VirtualFs.dispose(vfsId); + }); + + it('uses cached snapshot when still fresh', async () => { + const initialCalls = withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: 'Docs

cached

' + } + }); + + const resource = await loadWebsiteResource(baseArgs()); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + expect(initialCalls.length).toBeGreaterThan(0); + + const cachedCalls = withMockFetch({}, () => { + throw new Error('fetch should not be called for fresh cache'); + }); + const cached = await loadWebsiteResource(baseArgs()); + expect(await cached.getAbsoluteDirectoryPath()).toBe(resourcePath); + expect(cachedCalls.length).toBe(0); + }); + + it('falls back to stale cache when re-crawl fails', async () => { + withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: 'Docs

cached

' + } + }); + + const seeded = await loadWebsiteResource({ ...baseArgs(), ttlHours: 1 }); + const resourcePath = await seeded.getAbsoluteDirectoryPath(); + const manifestPath = path.join(resourcePath, '.btca-website-manifest.json'); + const manifest = JSON.parse(await Bun.file(manifestPath).text()) as { crawledAt: string }; + manifest.crawledAt = new Date(Date.now() - 48 * 60 * 60 * 1000).toISOString(); + await Bun.write(manifestPath, JSON.stringify(manifest)); + + withMockFetch({ + 'https://docs.example.com/robots.txt': () => { + throw new Error('network failure'); + } + }); + + const fallback = await loadWebsiteResource({ ...baseArgs(), ttlHours: 1 }); + expect(await fallback.getAbsoluteDirectoryPath()).toBe(resourcePath); + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + }); +}); diff --git a/apps/server/src/resources/impls/website.ts b/apps/server/src/resources/impls/website.ts new file mode 100644 index 00000000..de306cd8 --- /dev/null +++ b/apps/server/src/resources/impls/website.ts @@ -0,0 +1,739 @@ +import { promises as fs } from 'node:fs'; +import path from 'node:path'; + +import { Result } from 'better-result'; +import { load } from 'cheerio'; + +import { CommonHints } from '../../errors.ts'; +import { Metrics } from '../../metrics/index.ts'; +import { LIMITS } from '../../validation/index.ts'; +import { ResourceError, resourceNameToKey } from '../helpers.ts'; +import { WebsiteResourceSchema } from '../schema.ts'; +import type { BtcaFsResource, BtcaWebsiteResourceArgs } from '../types.ts'; + +type RobotsRules = { + allows: string[]; + disallows: string[]; +}; + +type CrawlQueueItem = { + url: string; + depth: number; +}; + +type CrawledPage = { + url: string; + title: string; + markdown: string; + headings: string[]; + fetchedAt: string; +}; + +type CrawlResult = { + pages: CrawledPage[]; + scopePath: string; +}; + +type WebsiteManifestPage = { + url: string; + title: string; + filePath: string; + fetchedAt: string; +}; + +type WebsiteManifest = { + version: 1; + url: string; + scopePath: string; + crawledAt: string; + maxPages: number; + maxDepth: number; + pageCount: number; + pages: WebsiteManifestPage[]; +}; + +const MANIFEST_FILE = '.btca-website-manifest.json'; +const INDEX_FILE = '_index.jsonl'; +const MAX_FETCH_BYTES = 2 * 1024 * 1024; +const BOT_USER_AGENT = 'btca-website-crawler/1.0'; + +const fileExists = async (filePath: string) => { + const result = await Result.tryPromise(() => fs.stat(filePath)); + return result.match({ + ok: () => true, + err: () => false + }); +}; + +const directoryExists = async (filePath: string) => { + const result = await Result.tryPromise(() => fs.stat(filePath)); + return result.match({ + ok: (stats) => stats.isDirectory(), + err: () => false + }); +}; + +const readManifest = async (resourcePath: string): Promise => { + const result = await Result.tryPromise(async () => { + const content = await Bun.file(path.join(resourcePath, MANIFEST_FILE)).text(); + const parsed = JSON.parse(content) as Partial; + if ( + parsed.version !== 1 || + typeof parsed.url !== 'string' || + typeof parsed.scopePath !== 'string' || + typeof parsed.crawledAt !== 'string' || + typeof parsed.maxPages !== 'number' || + typeof parsed.maxDepth !== 'number' || + typeof parsed.pageCount !== 'number' || + !Array.isArray(parsed.pages) + ) { + return null; + } + return parsed as WebsiteManifest; + }); + + return result.match({ + ok: (manifest) => manifest, + err: () => null + }); +}; + +const hasSnapshotFiles = async (resourcePath: string) => { + const [hasPagesDir, hasIndexFile] = await Promise.all([ + directoryExists(path.join(resourcePath, 'pages')), + fileExists(path.join(resourcePath, INDEX_FILE)) + ]); + return hasPagesDir && hasIndexFile; +}; + +const isManifestFresh = (manifest: WebsiteManifest, ttlHours: number) => { + const crawledAtMs = Date.parse(manifest.crawledAt); + if (Number.isNaN(crawledAtMs)) return false; + const ttlMs = ttlHours * 60 * 60 * 1000; + return Date.now() - crawledAtMs < ttlMs; +}; + +const validateWebsiteUrl = (url: string) => { + const result = WebsiteResourceSchema.shape.url.safeParse(url); + if (result.success) return { success: true as const }; + return { + success: false as const, + error: result.error.errors[0]?.message ?? 'Invalid website URL' + }; +}; + +const normalizeUrl = (value: string, base?: string): string | null => { + const result = Result.try(() => new URL(value, base)); + return result.match({ + ok: (url) => { + if (url.protocol !== 'http:' && url.protocol !== 'https:') return null; + url.hash = ''; + url.search = ''; + if (url.pathname.length > 1 && url.pathname.endsWith('/')) { + url.pathname = url.pathname.slice(0, -1); + } + return url.toString(); + }, + err: () => null + }); +}; + +const scopePathFromStartUrl = (startUrl: URL) => { + const trimmed = startUrl.pathname.replace(/\/+$/, ''); + if (trimmed.length === 0) return '/'; + const segments = trimmed.split('/').filter(Boolean); + const lastSegment = segments[segments.length - 1] ?? ''; + if (!lastSegment.includes('.')) return trimmed; + const parent = trimmed.slice(0, trimmed.lastIndexOf('/')); + return parent.length > 0 ? parent : '/'; +}; + +const isInScope = (candidate: URL, origin: string, scopePath: string) => { + if (candidate.origin !== origin) return false; + if (scopePath === '/') return true; + return candidate.pathname === scopePath || candidate.pathname.startsWith(`${scopePath}/`); +}; + +const hasBinaryExtension = (candidateUrl: URL) => { + const ext = path.extname(candidateUrl.pathname).toLowerCase(); + return new Set([ + '.png', + '.jpg', + '.jpeg', + '.gif', + '.webp', + '.svg', + '.ico', + '.pdf', + '.zip', + '.tar', + '.gz', + '.mp4', + '.mp3', + '.woff', + '.woff2', + '.ttf', + '.eot' + ]).has(ext); +}; + +const decodeXmlEntities = (value: string) => + value + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'"); + +const parseRobots = (robotsText: string): RobotsRules => { + type Group = { agents: string[]; allows: string[]; disallows: string[] }; + const groups: Group[] = []; + let current: Group = { agents: [], allows: [], disallows: [] }; + let hasRulesInGroup = false; + + for (const rawLine of robotsText.split('\n')) { + const line = rawLine.split('#')[0]?.trim() ?? ''; + if (!line) continue; + + const lower = line.toLowerCase(); + if (lower.startsWith('user-agent:')) { + if (hasRulesInGroup && current.agents.length > 0) { + groups.push(current); + current = { agents: [], allows: [], disallows: [] }; + hasRulesInGroup = false; + } + const agent = line + .slice(line.indexOf(':') + 1) + .trim() + .toLowerCase(); + if (agent) current.agents.push(agent); + continue; + } + + if (lower.startsWith('allow:')) { + const rule = line.slice(line.indexOf(':') + 1).trim(); + current.allows.push(rule); + hasRulesInGroup = true; + continue; + } + + if (lower.startsWith('disallow:')) { + const rule = line.slice(line.indexOf(':') + 1).trim(); + current.disallows.push(rule); + hasRulesInGroup = true; + } + } + + if (current.agents.length > 0) groups.push(current); + + const matching = groups.filter((group) => + group.agents.some((agent) => agent === '*' || agent === BOT_USER_AGENT.toLowerCase()) + ); + + return { + allows: matching.flatMap((group) => group.allows), + disallows: matching.flatMap((group) => group.disallows) + }; +}; + +const isPathAllowedByRobots = (candidatePath: string, rules: RobotsRules) => { + let best: { len: number; allow: boolean } | null = null; + + const testRules = (paths: string[], allow: boolean) => { + for (const rawRule of paths) { + const rule = rawRule.trim(); + if (!rule) continue; + if (!candidatePath.startsWith(rule)) continue; + const next = { len: rule.length, allow }; + if (!best || next.len > best.len || (next.len === best.len && next.allow)) { + best = next; + } + } + }; + + testRules(rules.disallows, false); + testRules(rules.allows, true); + + const winner = best as { len: number; allow: boolean } | null; + if (!winner) return true; + return winner.allow; +}; + +const fetchRobotsRules = async (origin: string, quiet: boolean): Promise => { + const result = await Result.tryPromise(async () => { + const response = await fetch(`${origin}/robots.txt`, { + headers: { 'user-agent': BOT_USER_AGENT }, + signal: AbortSignal.timeout(10_000) + }); + if (!response.ok) return { allows: [], disallows: [] }; + const text = await response.text(); + return parseRobots(text); + }); + + return result.match({ + ok: (rules) => rules, + err: (error) => { + if (!quiet) { + Metrics.error('resource.website.robots.error', { error: Metrics.errorInfo(error) }); + } + return { allows: [], disallows: [] }; + } + }); +}; + +const fetchSitemapUrls = async (start: URL, quiet: boolean): Promise => { + const result = await Result.tryPromise(async () => { + const response = await fetch(`${start.origin}/sitemap.xml`, { + headers: { 'user-agent': BOT_USER_AGENT }, + signal: AbortSignal.timeout(12_000) + }); + if (!response.ok) return []; + const text = await response.text(); + const locMatches = Array.from(text.matchAll(/(.*?)<\/loc>/gi)); + const urls = locMatches + .map((match) => match[1]?.trim() ?? '') + .filter(Boolean) + .map((value) => decodeXmlEntities(value)); + return urls; + }); + + return result.match({ + ok: (urls) => urls, + err: (error) => { + if (!quiet) { + Metrics.error('resource.website.sitemap.error', { error: Metrics.errorInfo(error) }); + } + return []; + } + }); +}; + +const normalizeWhitespace = (value: string) => value.replace(/\s+/g, ' ').trim(); + +const parseMetaRobots = (html: string) => { + const $ = load(html); + const tokens = new Set(); + + $('meta[name]').each((_, element) => { + const name = ($(element).attr('name') ?? '').toLowerCase(); + if (name !== 'robots' && name !== 'googlebot') return; + const content = ($(element).attr('content') ?? '').toLowerCase(); + for (const token of content.split(',')) { + const normalized = token.trim(); + if (normalized) tokens.add(normalized); + } + }); + + return { + noindex: tokens.has('noindex') || tokens.has('none'), + nofollow: tokens.has('nofollow') || tokens.has('none') + }; +}; + +const extractLinks = (html: string, pageUrl: string) => { + const $ = load(html); + const links = new Set(); + + $('a[href]').each((_, element) => { + const href = $(element).attr('href'); + if (!href || href.startsWith('#')) return; + if (href.startsWith('mailto:') || href.startsWith('tel:') || href.startsWith('javascript:')) { + return; + } + const normalized = normalizeUrl(href, pageUrl); + if (!normalized) return; + links.add(normalized); + }); + + return Array.from(links); +}; + +const pageToMarkdown = (args: { pageUrl: string; html: string }) => { + const $ = load(args.html); + + $('script, style, noscript, template, svg').remove(); + + const title = + normalizeWhitespace($('title').first().text()) || + normalizeWhitespace($('h1').first().text()) || + new URL(args.pageUrl).pathname || + args.pageUrl; + + const headings = $('h1, h2, h3') + .map((_, element) => normalizeWhitespace($(element).text())) + .get() + .filter(Boolean) + .slice(0, 100); + + const textBlocks = $('main p, article p, main li, article li, p, li') + .map((_, element) => normalizeWhitespace($(element).text())) + .get() + .filter(Boolean) + .filter((text, index, all) => all.indexOf(text) === index) + .slice(0, 300); + + const fallback = normalizeWhitespace($('main').text() || $('article').text() || $('body').text()) + .split(/\.(?:\s+|$)/) + .map((chunk) => normalizeWhitespace(chunk)) + .filter(Boolean) + .map((chunk) => `${chunk}.`) + .slice(0, 150); + + const contentLines = (textBlocks.length > 0 ? textBlocks : fallback).slice(0, 300); + + const lines = [ + `# ${title}`, + '', + `Source: ${args.pageUrl}`, + '', + headings.length > 0 ? '## Headings' : '', + headings.length > 0 ? headings.map((heading) => `- ${heading}`).join('\n') : '', + headings.length > 0 ? '' : '', + '## Content', + ...contentLines + ].filter(Boolean); + + let markdown = lines.join('\n\n'); + if (markdown.length > 120_000) { + markdown = `${markdown.slice(0, 120_000)}\n\n[Content truncated due to size]`; + } + + return { + title, + headings, + markdown + }; +}; + +const fetchPage = async (url: string, quiet: boolean) => { + const result = await Result.tryPromise(async () => { + const response = await fetch(url, { + headers: { + 'user-agent': BOT_USER_AGENT, + accept: 'text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.1' + }, + signal: AbortSignal.timeout(15_000) + }); + if (!response.ok) return null; + + const contentLengthHeader = response.headers.get('content-length'); + if (contentLengthHeader) { + const contentLength = Number.parseInt(contentLengthHeader, 10); + if (Number.isFinite(contentLength) && contentLength > MAX_FETCH_BYTES) return null; + } + + const contentType = (response.headers.get('content-type') ?? '').toLowerCase(); + const isHtml = + contentType.includes('text/html') || + contentType.includes('application/xhtml+xml') || + contentType.includes('application/xml'); + const isText = contentType.startsWith('text/'); + if (!isHtml && !isText) return null; + + const body = await response.text(); + if (body.length > MAX_FETCH_BYTES) return null; + + if (!isHtml) { + const normalizedText = normalizeWhitespace(body); + if (!normalizedText) return null; + return { + title: new URL(url).pathname || url, + headings: [] as string[], + markdown: `# ${new URL(url).pathname || url}\n\nSource: ${url}\n\n## Content\n\n${normalizedText}`, + links: [] as string[], + meta: { noindex: false, nofollow: false } + }; + } + + const meta = parseMetaRobots(body); + const { title, headings, markdown } = pageToMarkdown({ pageUrl: url, html: body }); + const links = extractLinks(body, url); + return { + title, + headings, + markdown, + links, + meta + }; + }); + + return result.match({ + ok: (page) => page, + err: (error) => { + if (!quiet) { + Metrics.error('resource.website.page.error', { + url, + error: Metrics.errorInfo(error) + }); + } + return null; + } + }); +}; + +const crawlWebsite = async (args: { + startUrl: string; + maxPages: number; + maxDepth: number; + quiet: boolean; +}): Promise => { + const normalizedStart = normalizeUrl(args.startUrl); + if (!normalizedStart) { + throw new ResourceError({ + message: 'Failed to normalize website URL', + hint: 'Provide a valid absolute HTTPS URL for website resources.' + }); + } + + const start = new URL(normalizedStart); + const scopePath = scopePathFromStartUrl(start); + const robotsRules = await fetchRobotsRules(start.origin, args.quiet); + const sitemapUrls = await fetchSitemapUrls(start, args.quiet); + + const queue: CrawlQueueItem[] = []; + const visited = new Set(); + const pages: CrawledPage[] = []; + + const enqueue = (url: string, depth: number) => { + if (visited.has(url)) return; + if (depth > args.maxDepth) return; + const parsed = new URL(url); + if (!isInScope(parsed, start.origin, scopePath)) return; + if (hasBinaryExtension(parsed)) return; + if (!isPathAllowedByRobots(parsed.pathname, robotsRules)) return; + visited.add(url); + queue.push({ url, depth }); + }; + + enqueue(normalizedStart, 0); + for (const rawUrl of sitemapUrls) { + const normalized = normalizeUrl(rawUrl, normalizedStart); + if (!normalized) continue; + enqueue(normalized, 1); + } + + while (queue.length > 0 && pages.length < args.maxPages) { + const current = queue.shift(); + if (!current) break; + + const page = await fetchPage(current.url, args.quiet); + if (!page) continue; + + if (!page.meta.nofollow && current.depth < args.maxDepth) { + for (const link of page.links) { + enqueue(link, current.depth + 1); + } + } + + if (page.meta.noindex) continue; + + pages.push({ + url: current.url, + title: page.title, + markdown: page.markdown, + headings: page.headings, + fetchedAt: new Date().toISOString() + }); + } + + if (pages.length === 0) { + throw new ResourceError({ + message: `No indexable pages found for ${args.startUrl}`, + hint: 'The website may block crawling via robots.txt/meta tags, or no HTML pages were reachable from the provided URL.' + }); + } + + return { pages, scopePath }; +}; + +const sanitizeSegment = (segment: string) => { + const cleaned = segment + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, '-') + .replace(/^-+|-+$/g, ''); + return cleaned || 'index'; +}; + +const pageUrlToFilePath = (pageUrl: string) => { + const parsed = new URL(pageUrl); + const trimmed = parsed.pathname.replace(/\/+$/, ''); + if (trimmed.length === 0) return 'pages/index.md'; + + const rawSegments = trimmed.split('/').filter(Boolean).map(sanitizeSegment); + + if (rawSegments.length === 0) return 'pages/index.md'; + + const last = rawSegments[rawSegments.length - 1] ?? 'index'; + const normalizedLast = last.replace(/\.(?:html|htm)$/i, '') || 'index'; + rawSegments[rawSegments.length - 1] = normalizedLast; + + return `pages/${rawSegments.join('/')}.md`; +}; + +const buildSnapshot = async (args: { + targetPath: string; + startUrl: string; + maxPages: number; + maxDepth: number; + quiet: boolean; +}) => { + await fs.mkdir(path.join(args.targetPath, 'pages'), { recursive: true }); + + const crawl = await crawlWebsite({ + startUrl: args.startUrl, + maxPages: args.maxPages, + maxDepth: args.maxDepth, + quiet: args.quiet + }); + + const indexEntries: WebsiteManifestPage[] = []; + + for (const page of crawl.pages) { + const filePath = pageUrlToFilePath(page.url); + const absolutePath = path.join(args.targetPath, filePath); + await fs.mkdir(path.dirname(absolutePath), { recursive: true }); + await Bun.write(absolutePath, page.markdown); + indexEntries.push({ + url: page.url, + title: page.title, + filePath, + fetchedAt: page.fetchedAt + }); + } + + const indexContent = `${indexEntries.map((entry) => JSON.stringify(entry)).join('\n')}\n`; + await Bun.write(path.join(args.targetPath, INDEX_FILE), indexContent); + + const manifest: WebsiteManifest = { + version: 1, + url: args.startUrl, + scopePath: crawl.scopePath, + crawledAt: new Date().toISOString(), + maxPages: args.maxPages, + maxDepth: args.maxDepth, + pageCount: indexEntries.length, + pages: indexEntries + }; + + await Bun.write(path.join(args.targetPath, MANIFEST_FILE), JSON.stringify(manifest, null, 2)); + return manifest; +}; + +const ensureWebsiteResource = async (config: BtcaWebsiteResourceArgs) => { + const urlValidation = validateWebsiteUrl(config.url); + if (!urlValidation.success) { + throw new ResourceError({ + message: urlValidation.error, + hint: 'Website resources require a valid public HTTPS URL (no localhost/private IPs).' + }); + } + + const resourceKey = resourceNameToKey(config.name); + const localPath = path.join(config.resourcesDirectoryPath, resourceKey); + const tempPath = `${localPath}.tmp-${crypto.randomUUID()}`; + + const ensureDir = await Result.tryPromise(() => + fs.mkdir(config.resourcesDirectoryPath, { recursive: true }) + ); + ensureDir.match({ + ok: () => undefined, + err: (cause) => { + throw new ResourceError({ + message: 'Failed to create resources directory', + hint: 'Check that you have write permissions to the btca data directory.', + cause + }); + } + }); + + const existingManifest = await readManifest(localPath); + const hasExistingSnapshot = await hasSnapshotFiles(localPath); + if ( + existingManifest && + hasExistingSnapshot && + isManifestFresh(existingManifest, config.ttlHours) + ) { + Metrics.info('resource.website.cache.hit', { + name: config.name, + url: config.url, + pageCount: existingManifest.pageCount + }); + return localPath; + } + + const crawlResult = await Result.tryPromise(async () => { + await fs.rm(tempPath, { recursive: true, force: true }); + await fs.mkdir(tempPath, { recursive: true }); + return buildSnapshot({ + targetPath: tempPath, + startUrl: config.url, + maxPages: config.maxPages, + maxDepth: config.maxDepth, + quiet: config.quiet + }); + }); + + return crawlResult.match({ + ok: async (manifestPromise) => { + const manifest = await manifestPromise; + await fs.rm(localPath, { recursive: true, force: true }); + await fs.rename(tempPath, localPath); + Metrics.info('resource.website.crawled', { + name: config.name, + url: config.url, + pageCount: manifest.pageCount, + maxPages: config.maxPages, + maxDepth: config.maxDepth + }); + return localPath; + }, + err: async (cause) => { + await fs.rm(tempPath, { recursive: true, force: true }); + + if (existingManifest && hasExistingSnapshot) { + Metrics.error('resource.website.crawl_failed_fallback', { + name: config.name, + url: config.url, + error: Metrics.errorInfo(cause) + }); + return localPath; + } + + throw new ResourceError({ + message: `Failed to crawl website resource "${config.name}"`, + hint: `${CommonHints.CHECK_NETWORK} Verify the URL is reachable and allows crawling.`, + cause + }); + } + }); +}; + +export const loadWebsiteResource = async ( + config: BtcaWebsiteResourceArgs +): Promise => { + const maxPages = Math.min(Math.max(config.maxPages, 1), LIMITS.WEBSITE_MAX_PAGES_MAX); + const maxDepth = Math.min(Math.max(config.maxDepth, 0), LIMITS.WEBSITE_MAX_DEPTH_MAX); + const ttlHours = Math.min(Math.max(config.ttlHours, 1), LIMITS.WEBSITE_TTL_HOURS_MAX); + + const localPath = await Metrics.span( + 'resource.website.ensure', + () => + ensureWebsiteResource({ + ...config, + maxPages, + maxDepth, + ttlHours + }), + { resource: config.name } + ); + + return { + _tag: 'fs-based', + name: config.name, + fsName: resourceNameToKey(config.name), + type: 'website', + repoSubPaths: [], + specialAgentInstructions: config.specialAgentInstructions, + getAbsoluteDirectoryPath: async () => localPath + }; +}; diff --git a/apps/server/src/resources/index.ts b/apps/server/src/resources/index.ts index 1d863948..8dec014b 100644 --- a/apps/server/src/resources/index.ts +++ b/apps/server/src/resources/index.ts @@ -2,9 +2,21 @@ export { ResourceError } from './helpers.ts'; export { Resources } from './service.ts'; export { GitResourceSchema, + LocalResourceSchema, + WebsiteResourceSchema, ResourceDefinitionSchema, isGitResource, + isLocalResource, + isWebsiteResource, type GitResource, + type LocalResource, + type WebsiteResource, type ResourceDefinition } from './schema.ts'; -export { FS_RESOURCE_SYSTEM_NOTE, type BtcaFsResource, type BtcaGitResourceArgs } from './types.ts'; +export { + FS_RESOURCE_SYSTEM_NOTE, + type BtcaFsResource, + type BtcaGitResourceArgs, + type BtcaLocalResourceArgs, + type BtcaWebsiteResourceArgs +} from './types.ts'; diff --git a/apps/server/src/resources/schema.test.ts b/apps/server/src/resources/schema.test.ts new file mode 100644 index 00000000..883433c7 --- /dev/null +++ b/apps/server/src/resources/schema.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from 'bun:test'; + +import { ResourceDefinitionSchema, WebsiteResourceSchema } from './schema.ts'; + +describe('Resource schema', () => { + it('accepts valid website resources and applies defaults', () => { + const result = ResourceDefinitionSchema.safeParse({ + type: 'website', + name: 'public-docs', + url: 'https://docs.example.com/docs' + }); + + expect(result.success).toBe(true); + if (!result.success) return; + if (result.data.type !== 'website') return; + expect(result.data.maxPages).toBe(200); + expect(result.data.maxDepth).toBe(3); + expect(result.data.ttlHours).toBe(24); + }); + + it('rejects non-HTTPS website urls', () => { + const result = WebsiteResourceSchema.safeParse({ + type: 'website', + name: 'public-docs', + url: 'http://docs.example.com/docs' + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.issues[0]?.message).toContain('HTTPS'); + }); + + it('rejects private/local website urls', () => { + const result = WebsiteResourceSchema.safeParse({ + type: 'website', + name: 'public-docs', + url: 'https://localhost/docs' + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.issues[0]?.message).toContain('localhost or private IP'); + }); + + it('enforces bounds for maxPages/maxDepth/ttlHours', () => { + const result = WebsiteResourceSchema.safeParse({ + type: 'website', + name: 'public-docs', + url: 'https://docs.example.com/docs', + maxPages: 0, + maxDepth: -1, + ttlHours: 0 + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.issues.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/apps/server/src/resources/schema.ts b/apps/server/src/resources/schema.ts index 0cbe203a..d28a9650 100644 --- a/apps/server/src/resources/schema.ts +++ b/apps/server/src/resources/schema.ts @@ -90,6 +90,42 @@ const GitUrlSchema = z { message: 'Git URL must not point to localhost or private IP addresses' } ); +const WebsiteUrlSchema = z + .string() + .min(1, 'Website URL cannot be empty') + .refine( + (url) => { + const parsed = parseUrl(url); + return parsed ? parsed.protocol === 'https:' : false; + }, + { message: 'Website URL must be a valid HTTPS URL' } + ) + .refine( + (url) => { + const parsed = parseUrl(url); + if (!parsed) return true; + return !parsed.username && !parsed.password; + }, + { message: 'Website URL must not contain embedded credentials' } + ) + .refine( + (url) => { + const parsed = parseUrl(url); + if (!parsed) return true; + const hostname = parsed.hostname.toLowerCase(); + return !( + hostname === 'localhost' || + hostname.startsWith('127.') || + hostname.startsWith('192.168.') || + hostname.startsWith('10.') || + hostname.match(/^172\.(1[6-9]|2[0-9]|3[0-1])\./) || + hostname === '::1' || + hostname === '0.0.0.0' + ); + }, + { message: 'Website URL must not point to localhost or private IP addresses' } + ); + /** * Branch name field with security validation. */ @@ -175,13 +211,40 @@ export const LocalResourceSchema = z.object({ specialNotes: SpecialNotesSchema }); +export const WebsiteResourceSchema = z.object({ + type: z.literal('website'), + name: ResourceNameSchema, + url: WebsiteUrlSchema, + maxPages: z + .number() + .int('maxPages must be an integer') + .min(1, 'maxPages must be at least 1') + .max(LIMITS.WEBSITE_MAX_PAGES_MAX, `maxPages cannot exceed ${LIMITS.WEBSITE_MAX_PAGES_MAX}`) + .default(LIMITS.WEBSITE_DEFAULT_MAX_PAGES), + maxDepth: z + .number() + .int('maxDepth must be an integer') + .min(0, 'maxDepth must be at least 0') + .max(LIMITS.WEBSITE_MAX_DEPTH_MAX, `maxDepth cannot exceed ${LIMITS.WEBSITE_MAX_DEPTH_MAX}`) + .default(LIMITS.WEBSITE_DEFAULT_MAX_DEPTH), + ttlHours: z + .number() + .int('ttlHours must be an integer') + .min(1, 'ttlHours must be at least 1') + .max(LIMITS.WEBSITE_TTL_HOURS_MAX, `ttlHours cannot exceed ${LIMITS.WEBSITE_TTL_HOURS_MAX}`) + .default(LIMITS.WEBSITE_DEFAULT_TTL_HOURS), + specialNotes: SpecialNotesSchema +}); + export const ResourceDefinitionSchema = z.discriminatedUnion('type', [ GitResourceSchema, - LocalResourceSchema + LocalResourceSchema, + WebsiteResourceSchema ]); export type GitResource = z.infer; export type LocalResource = z.infer; +export type WebsiteResource = z.infer; export type ResourceDefinition = z.infer; export const isGitResource = (value: ResourceDefinition): value is GitResource => @@ -189,3 +252,6 @@ export const isGitResource = (value: ResourceDefinition): value is GitResource = export const isLocalResource = (value: ResourceDefinition): value is LocalResource => value.type === 'local'; + +export const isWebsiteResource = (value: ResourceDefinition): value is WebsiteResource => + value.type === 'website'; diff --git a/apps/server/src/resources/service.ts b/apps/server/src/resources/service.ts index c9214c9d..8f07235c 100644 --- a/apps/server/src/resources/service.ts +++ b/apps/server/src/resources/service.ts @@ -2,13 +2,21 @@ import { Config } from '../config/index.ts'; import { ResourceError, resourceNameToKey } from './helpers.ts'; import { loadGitResource } from './impls/git.ts'; +import { loadWebsiteResource } from './impls/website.ts'; import { isGitResource, + isWebsiteResource, type ResourceDefinition, type GitResource, - type LocalResource + type LocalResource, + type WebsiteResource } from './schema.ts'; -import type { BtcaFsResource, BtcaGitResourceArgs, BtcaLocalResourceArgs } from './types.ts'; +import type { + BtcaFsResource, + BtcaGitResourceArgs, + BtcaLocalResourceArgs, + BtcaWebsiteResourceArgs +} from './types.ts'; export namespace Resources { export type Service = { @@ -50,6 +58,22 @@ export namespace Resources { specialAgentInstructions: definition.specialNotes ?? '' }); + const definitionToWebsiteArgs = ( + definition: WebsiteResource, + resourcesDirectory: string, + quiet: boolean + ): BtcaWebsiteResourceArgs => ({ + type: 'website', + name: definition.name, + url: definition.url, + maxPages: definition.maxPages, + maxDepth: definition.maxDepth, + ttlHours: definition.ttlHours, + resourcesDirectoryPath: resourcesDirectory, + specialAgentInstructions: definition.specialNotes ?? '', + quiet + }); + const loadLocalResource = (args: BtcaLocalResourceArgs): BtcaFsResource => ({ _tag: 'fs-based', name: args.name, @@ -75,9 +99,13 @@ export namespace Resources { if (isGitResource(definition)) { return loadGitResource(definitionToGitArgs(definition, config.resourcesDirectory, quiet)); - } else { - return loadLocalResource(definitionToLocalArgs(definition)); } + if (isWebsiteResource(definition)) { + return loadWebsiteResource( + definitionToWebsiteArgs(definition, config.resourcesDirectory, quiet) + ); + } + return loadLocalResource(definitionToLocalArgs(definition)); } }; }; diff --git a/apps/server/src/resources/types.ts b/apps/server/src/resources/types.ts index 9f2409b3..70293f34 100644 --- a/apps/server/src/resources/types.ts +++ b/apps/server/src/resources/types.ts @@ -5,7 +5,7 @@ export type BtcaFsResource = { readonly _tag: 'fs-based'; readonly name: string; readonly fsName: string; - readonly type: 'git' | 'local'; + readonly type: 'git' | 'local' | 'website'; readonly repoSubPaths: readonly string[]; readonly specialAgentInstructions: string; readonly getAbsoluteDirectoryPath: () => Promise; @@ -28,3 +28,15 @@ export type BtcaLocalResourceArgs = { readonly path: string; readonly specialAgentInstructions: string; }; + +export type BtcaWebsiteResourceArgs = { + readonly type: 'website'; + readonly name: string; + readonly url: string; + readonly maxPages: number; + readonly maxDepth: number; + readonly ttlHours: number; + readonly resourcesDirectoryPath: string; + readonly specialAgentInstructions: string; + readonly quiet: boolean; +}; diff --git a/apps/server/src/resources/website.e2e.test.ts b/apps/server/src/resources/website.e2e.test.ts new file mode 100644 index 00000000..6a0cbe1a --- /dev/null +++ b/apps/server/src/resources/website.e2e.test.ts @@ -0,0 +1,203 @@ +import { afterEach, beforeEach, describe, expect, it } from 'bun:test'; +import { promises as fs } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { Collections } from '../collections/service.ts'; +import { clearVirtualCollectionMetadata } from '../collections/virtual-metadata.ts'; +import { Config } from '../config/index.ts'; +import { Context } from '../context/index.ts'; +import { startServer, type ServerInstance } from '../index.ts'; +import { GlobTool } from '../tools/glob.ts'; +import { GrepTool } from '../tools/grep.ts'; +import { ListTool } from '../tools/list.ts'; +import { ReadTool } from '../tools/read.ts'; +import { VirtualFs } from '../vfs/virtual-fs.ts'; + +import { Resources } from './service.ts'; + +type MockRoute = { + status?: number; + headers?: Record; + body?: string; +}; + +type MockRoutes = Record; + +describe('Website Resource E2E', () => { + let tempDir = ''; + let projectDir = ''; + let originalCwd = ''; + let originalHome: string | undefined; + let originalFetch: typeof fetch; + let server: ServerInstance | null = null; + let lastVfsId: string | undefined; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'btca-website-e2e-')); + projectDir = path.join(tempDir, 'project'); + await fs.mkdir(projectDir, { recursive: true }); + + originalCwd = process.cwd(); + originalHome = process.env.HOME; + originalFetch = globalThis.fetch; + + process.env.HOME = tempDir; + process.chdir(projectDir); + }); + + afterEach(async () => { + if (lastVfsId) { + VirtualFs.dispose(lastVfsId); + clearVirtualCollectionMetadata(lastVfsId); + lastVfsId = undefined; + } + + if (server) { + server.stop(); + server = null; + } + + globalThis.fetch = originalFetch; + process.chdir(originalCwd); + process.env.HOME = originalHome; + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + const installMockFetch = (routes: MockRoutes) => { + globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => { + const url = + typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url; + + if (url.startsWith('http://localhost:') || url.startsWith('http://127.0.0.1:')) { + return originalFetch(input as Parameters[0], init); + } + + const route = routes[url]; + if (!route) return new Response('not found', { status: 404 }); + return new Response(route.body ?? '', { + status: route.status ?? 200, + headers: route.headers + }); + }) as typeof fetch; + }; + + it('adds website resource through API and reads crawled snapshot through tools', async () => { + await Bun.write( + path.join(projectDir, 'btca.config.jsonc'), + JSON.stringify( + { + $schema: 'https://btca.dev/btca.schema.json', + provider: 'opencode', + model: 'claude-haiku-4-5', + resources: [] + }, + null, + 2 + ) + ); + + installMockFetch({ + 'https://docs.example.com/robots.txt': { + body: 'User-agent: *\nDisallow: /docs/private\n' + }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: ` + https://docs.example.com/docs/getting-started + https://docs.example.com/docs/private + ` + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: ` + Docs Home +
+

Docs Home

+

Welcome to docs.

+ Start + Private +
+ + ` + }, + 'https://docs.example.com/docs/getting-started': { + headers: { 'content-type': 'text/html' }, + body: ` + Getting Started +
+

Getting Started

+

Install and run.

+
+ + ` + }, + 'https://docs.example.com/docs/private': { + headers: { 'content-type': 'text/html' }, + body: 'Private

blocked

' + } + }); + + server = await startServer({ port: 0, quiet: true }); + + const addResponse = await fetch(`${server.url}/config/resources`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ + type: 'website', + name: 'docsSite', + url: 'https://docs.example.com/docs', + maxPages: 25, + maxDepth: 4, + ttlHours: 12, + specialNotes: 'Integration test docs resource' + }) + }); + expect(addResponse.status).toBe(201); + + const addJson = (await addResponse.json()) as { + type: string; + maxPages: number; + maxDepth: number; + }; + expect(addJson.type).toBe('website'); + expect(addJson.maxPages).toBe(25); + expect(addJson.maxDepth).toBe(4); + + const listResponse = await fetch(`${server.url}/resources`); + expect(listResponse.status).toBe(200); + const listJson = (await listResponse.json()) as { + resources: Array<{ name: string; type: string; url?: string; ttlHours?: number }>; + }; + const website = listJson.resources.find((resource) => resource.name === 'docsSite'); + expect(website).toBeDefined(); + expect(website?.type).toBe('website'); + expect(website?.url).toBe('https://docs.example.com/docs'); + expect(website?.ttlHours).toBe(12); + + await Context.run({ requestId: crypto.randomUUID(), txDepth: 0 }, async () => { + const config = await Config.load(); + const resources = Resources.create(config); + const collections = Collections.create({ config, resources }); + const collection = await collections.load({ resourceNames: ['docsSite'], quiet: true }); + lastVfsId = collection.vfsId; + + const context = { basePath: collection.path, vfsId: collection.vfsId }; + const listToolResult = await ListTool.execute({ path: '.' }, context); + expect(listToolResult.output).toContain('docsSite/'); + + const globResult = await GlobTool.execute({ pattern: '**/*.md' }, context); + expect(globResult.output).toContain('docsSite/pages/docs/getting-started.md'); + expect(globResult.output).not.toContain('docsSite/pages/docs/private.md'); + + const grepResult = await GrepTool.execute({ pattern: 'Install and run' }, context); + expect(grepResult.output).toContain('docsSite/pages/docs/getting-started.md'); + + const readResult = await ReadTool.execute( + { path: 'docsSite/pages/docs/getting-started.md' }, + context + ); + expect(readResult.output).toContain('Source: https://docs.example.com/docs/getting-started'); + }); + }); +}); diff --git a/apps/server/src/validation/index.ts b/apps/server/src/validation/index.ts index f4d559b5..cf483f97 100644 --- a/apps/server/src/validation/index.ts +++ b/apps/server/src/validation/index.ts @@ -51,7 +51,14 @@ export const LIMITS = { /** Maximum length for questions (includes conversation history when formatted) */ QUESTION_MAX: 100_000, /** Maximum number of resources per request */ - MAX_RESOURCES_PER_REQUEST: 20 + MAX_RESOURCES_PER_REQUEST: 20, + /** Default and maximum controls for website resources */ + WEBSITE_DEFAULT_MAX_PAGES: 200, + WEBSITE_MAX_PAGES_MAX: 5000, + WEBSITE_DEFAULT_MAX_DEPTH: 3, + WEBSITE_MAX_DEPTH_MAX: 10, + WEBSITE_DEFAULT_TTL_HOURS: 24, + WEBSITE_TTL_HOURS_MAX: 24 * 30 } as const; // ───────────────────────────────────────────────────────────────────────────── diff --git a/apps/web/src/convex/_generated/api.d.ts b/apps/web/src/convex/_generated/api.d.ts index ab7ffc1d..664092bd 100644 --- a/apps/web/src/convex/_generated/api.d.ts +++ b/apps/web/src/convex/_generated/api.d.ts @@ -8,72 +8,68 @@ * @module */ -import type * as analytics from "../analytics.js"; -import type * as analyticsEvents from "../analyticsEvents.js"; -import type * as apiHelpers from "../apiHelpers.js"; -import type * as authHelpers from "../authHelpers.js"; -import type * as clerkApiKeys from "../clerkApiKeys.js"; -import type * as clerkApiKeysQueries from "../clerkApiKeysQueries.js"; -import type * as cli from "../cli.js"; -import type * as cliInternal from "../cliInternal.js"; -import type * as crons from "../crons.js"; -import type * as http from "../http.js"; -import type * as instances_actions from "../instances/actions.js"; -import type * as instances_mutations from "../instances/mutations.js"; -import type * as instances_queries from "../instances/queries.js"; -import type * as mcp from "../mcp.js"; -import type * as mcpInternal from "../mcpInternal.js"; -import type * as mcpQuestions from "../mcpQuestions.js"; -import type * as messages from "../messages.js"; -import type * as migrations from "../migrations.js"; -import type * as projects from "../projects.js"; -import type * as resources from "../resources.js"; -import type * as scheduled_queries from "../scheduled/queries.js"; -import type * as scheduled_updates from "../scheduled/updates.js"; -import type * as scheduled_versionCheck from "../scheduled/versionCheck.js"; -import type * as seed from "../seed.js"; -import type * as streamSessions from "../streamSessions.js"; -import type * as threadTitle from "../threadTitle.js"; -import type * as threads from "../threads.js"; -import type * as usage from "../usage.js"; -import type * as users from "../users.js"; +import type * as analytics from '../analytics.js'; +import type * as analyticsEvents from '../analyticsEvents.js'; +import type * as apiHelpers from '../apiHelpers.js'; +import type * as authHelpers from '../authHelpers.js'; +import type * as clerkApiKeys from '../clerkApiKeys.js'; +import type * as clerkApiKeysQueries from '../clerkApiKeysQueries.js'; +import type * as cli from '../cli.js'; +import type * as cliInternal from '../cliInternal.js'; +import type * as crons from '../crons.js'; +import type * as http from '../http.js'; +import type * as instances_actions from '../instances/actions.js'; +import type * as instances_mutations from '../instances/mutations.js'; +import type * as instances_queries from '../instances/queries.js'; +import type * as mcp from '../mcp.js'; +import type * as mcpInternal from '../mcpInternal.js'; +import type * as mcpQuestions from '../mcpQuestions.js'; +import type * as messages from '../messages.js'; +import type * as migrations from '../migrations.js'; +import type * as projects from '../projects.js'; +import type * as resources from '../resources.js'; +import type * as scheduled_queries from '../scheduled/queries.js'; +import type * as scheduled_updates from '../scheduled/updates.js'; +import type * as scheduled_versionCheck from '../scheduled/versionCheck.js'; +import type * as seed from '../seed.js'; +import type * as streamSessions from '../streamSessions.js'; +import type * as threadTitle from '../threadTitle.js'; +import type * as threads from '../threads.js'; +import type * as usage from '../usage.js'; +import type * as users from '../users.js'; -import type { - ApiFromModules, - FilterApi, - FunctionReference, -} from "convex/server"; +import type { ApiFromModules, FilterApi, FunctionReference } from 'convex/server'; declare const fullApi: ApiFromModules<{ - analytics: typeof analytics; - analyticsEvents: typeof analyticsEvents; - apiHelpers: typeof apiHelpers; - authHelpers: typeof authHelpers; - clerkApiKeys: typeof clerkApiKeys; - clerkApiKeysQueries: typeof clerkApiKeysQueries; - cli: typeof cli; - cliInternal: typeof cliInternal; - crons: typeof crons; - http: typeof http; - "instances/actions": typeof instances_actions; - "instances/mutations": typeof instances_mutations; - "instances/queries": typeof instances_queries; - mcp: typeof mcp; - mcpInternal: typeof mcpInternal; - mcpQuestions: typeof mcpQuestions; - messages: typeof messages; - migrations: typeof migrations; - projects: typeof projects; - resources: typeof resources; - "scheduled/queries": typeof scheduled_queries; - "scheduled/updates": typeof scheduled_updates; - "scheduled/versionCheck": typeof scheduled_versionCheck; - seed: typeof seed; - streamSessions: typeof streamSessions; - threadTitle: typeof threadTitle; - threads: typeof threads; - usage: typeof usage; - users: typeof users; + analytics: typeof analytics; + analyticsEvents: typeof analyticsEvents; + apiHelpers: typeof apiHelpers; + authHelpers: typeof authHelpers; + clerkApiKeys: typeof clerkApiKeys; + clerkApiKeysQueries: typeof clerkApiKeysQueries; + cli: typeof cli; + cliInternal: typeof cliInternal; + crons: typeof crons; + http: typeof http; + 'instances/actions': typeof instances_actions; + 'instances/mutations': typeof instances_mutations; + 'instances/queries': typeof instances_queries; + mcp: typeof mcp; + mcpInternal: typeof mcpInternal; + mcpQuestions: typeof mcpQuestions; + messages: typeof messages; + migrations: typeof migrations; + projects: typeof projects; + resources: typeof resources; + 'scheduled/queries': typeof scheduled_queries; + 'scheduled/updates': typeof scheduled_updates; + 'scheduled/versionCheck': typeof scheduled_versionCheck; + seed: typeof seed; + streamSessions: typeof streamSessions; + threadTitle: typeof threadTitle; + threads: typeof threads; + usage: typeof usage; + users: typeof users; }>; /** @@ -84,10 +80,7 @@ declare const fullApi: ApiFromModules<{ * const myFunctionReference = api.myModule.myFunction; * ``` */ -export declare const api: FilterApi< - typeof fullApi, - FunctionReference ->; +export declare const api: FilterApi>; /** * A utility for referencing Convex functions in your app's internal API. @@ -97,96 +90,88 @@ export declare const api: FilterApi< * const myFunctionReference = internal.myModule.myFunction; * ``` */ -export declare const internal: FilterApi< - typeof fullApi, - FunctionReference ->; +export declare const internal: FilterApi>; export declare const components: { - migrations: { - lib: { - cancel: FunctionReference< - "mutation", - "internal", - { name: string }, - { - batchSize?: number; - cursor?: string | null; - error?: string; - isDone: boolean; - latestEnd?: number; - latestStart: number; - name: string; - next?: Array; - processed: number; - state: "inProgress" | "success" | "failed" | "canceled" | "unknown"; - } - >; - cancelAll: FunctionReference< - "mutation", - "internal", - { sinceTs?: number }, - Array<{ - batchSize?: number; - cursor?: string | null; - error?: string; - isDone: boolean; - latestEnd?: number; - latestStart: number; - name: string; - next?: Array; - processed: number; - state: "inProgress" | "success" | "failed" | "canceled" | "unknown"; - }> - >; - clearAll: FunctionReference< - "mutation", - "internal", - { before?: number }, - null - >; - getStatus: FunctionReference< - "query", - "internal", - { limit?: number; names?: Array }, - Array<{ - batchSize?: number; - cursor?: string | null; - error?: string; - isDone: boolean; - latestEnd?: number; - latestStart: number; - name: string; - next?: Array; - processed: number; - state: "inProgress" | "success" | "failed" | "canceled" | "unknown"; - }> - >; - migrate: FunctionReference< - "mutation", - "internal", - { - batchSize?: number; - cursor?: string | null; - dryRun: boolean; - fnHandle: string; - name: string; - next?: Array<{ fnHandle: string; name: string }>; - oneBatchOnly?: boolean; - }, - { - batchSize?: number; - cursor?: string | null; - error?: string; - isDone: boolean; - latestEnd?: number; - latestStart: number; - name: string; - next?: Array; - processed: number; - state: "inProgress" | "success" | "failed" | "canceled" | "unknown"; - } - >; - }; - }; + migrations: { + lib: { + cancel: FunctionReference< + 'mutation', + 'internal', + { name: string }, + { + batchSize?: number; + cursor?: string | null; + error?: string; + isDone: boolean; + latestEnd?: number; + latestStart: number; + name: string; + next?: Array; + processed: number; + state: 'inProgress' | 'success' | 'failed' | 'canceled' | 'unknown'; + } + >; + cancelAll: FunctionReference< + 'mutation', + 'internal', + { sinceTs?: number }, + Array<{ + batchSize?: number; + cursor?: string | null; + error?: string; + isDone: boolean; + latestEnd?: number; + latestStart: number; + name: string; + next?: Array; + processed: number; + state: 'inProgress' | 'success' | 'failed' | 'canceled' | 'unknown'; + }> + >; + clearAll: FunctionReference<'mutation', 'internal', { before?: number }, null>; + getStatus: FunctionReference< + 'query', + 'internal', + { limit?: number; names?: Array }, + Array<{ + batchSize?: number; + cursor?: string | null; + error?: string; + isDone: boolean; + latestEnd?: number; + latestStart: number; + name: string; + next?: Array; + processed: number; + state: 'inProgress' | 'success' | 'failed' | 'canceled' | 'unknown'; + }> + >; + migrate: FunctionReference< + 'mutation', + 'internal', + { + batchSize?: number; + cursor?: string | null; + dryRun: boolean; + fnHandle: string; + name: string; + next?: Array<{ fnHandle: string; name: string }>; + oneBatchOnly?: boolean; + }, + { + batchSize?: number; + cursor?: string | null; + error?: string; + isDone: boolean; + latestEnd?: number; + latestStart: number; + name: string; + next?: Array; + processed: number; + state: 'inProgress' | 'success' | 'failed' | 'canceled' | 'unknown'; + } + >; + }; + }; }; diff --git a/apps/web/src/convex/_generated/api.js b/apps/web/src/convex/_generated/api.js index 44bf9858..24593c74 100644 --- a/apps/web/src/convex/_generated/api.js +++ b/apps/web/src/convex/_generated/api.js @@ -8,7 +8,7 @@ * @module */ -import { anyApi, componentsGeneric } from "convex/server"; +import { anyApi, componentsGeneric } from 'convex/server'; /** * A utility for referencing Convex functions in your app's API. diff --git a/apps/web/src/convex/_generated/dataModel.d.ts b/apps/web/src/convex/_generated/dataModel.d.ts index f97fd194..5428df6f 100644 --- a/apps/web/src/convex/_generated/dataModel.d.ts +++ b/apps/web/src/convex/_generated/dataModel.d.ts @@ -9,13 +9,13 @@ */ import type { - DataModelFromSchemaDefinition, - DocumentByName, - TableNamesInDataModel, - SystemTableNames, -} from "convex/server"; -import type { GenericId } from "convex/values"; -import schema from "../schema.js"; + DataModelFromSchemaDefinition, + DocumentByName, + TableNamesInDataModel, + SystemTableNames +} from 'convex/server'; +import type { GenericId } from 'convex/values'; +import schema from '../schema.js'; /** * The names of all of your Convex tables. @@ -27,10 +27,7 @@ export type TableNames = TableNamesInDataModel; * * @typeParam TableName - A string literal type of the table name (like "users"). */ -export type Doc = DocumentByName< - DataModel, - TableName ->; +export type Doc = DocumentByName; /** * An identifier for a document in Convex. @@ -45,8 +42,7 @@ export type Doc = DocumentByName< * * @typeParam TableName - A string literal type of the table name (like "users"). */ -export type Id = - GenericId; +export type Id = GenericId; /** * A type describing your Convex data model. diff --git a/apps/web/src/convex/_generated/server.d.ts b/apps/web/src/convex/_generated/server.d.ts index bec05e68..1cc047ef 100644 --- a/apps/web/src/convex/_generated/server.d.ts +++ b/apps/web/src/convex/_generated/server.d.ts @@ -9,17 +9,17 @@ */ import { - ActionBuilder, - HttpActionBuilder, - MutationBuilder, - QueryBuilder, - GenericActionCtx, - GenericMutationCtx, - GenericQueryCtx, - GenericDatabaseReader, - GenericDatabaseWriter, -} from "convex/server"; -import type { DataModel } from "./dataModel.js"; + ActionBuilder, + HttpActionBuilder, + MutationBuilder, + QueryBuilder, + GenericActionCtx, + GenericMutationCtx, + GenericQueryCtx, + GenericDatabaseReader, + GenericDatabaseWriter +} from 'convex/server'; +import type { DataModel } from './dataModel.js'; /** * Define a query in this Convex app's public API. @@ -29,7 +29,7 @@ import type { DataModel } from "./dataModel.js"; * @param func - The query function. It receives a {@link QueryCtx} as its first argument. * @returns The wrapped query. Include this as an `export` to name it and make it accessible. */ -export declare const query: QueryBuilder; +export declare const query: QueryBuilder; /** * Define a query that is only accessible from other Convex functions (but not from the client). @@ -39,7 +39,7 @@ export declare const query: QueryBuilder; * @param func - The query function. It receives a {@link QueryCtx} as its first argument. * @returns The wrapped query. Include this as an `export` to name it and make it accessible. */ -export declare const internalQuery: QueryBuilder; +export declare const internalQuery: QueryBuilder; /** * Define a mutation in this Convex app's public API. @@ -49,7 +49,7 @@ export declare const internalQuery: QueryBuilder; * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. */ -export declare const mutation: MutationBuilder; +export declare const mutation: MutationBuilder; /** * Define a mutation that is only accessible from other Convex functions (but not from the client). @@ -59,7 +59,7 @@ export declare const mutation: MutationBuilder; * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. */ -export declare const internalMutation: MutationBuilder; +export declare const internalMutation: MutationBuilder; /** * Define an action in this Convex app's public API. @@ -72,7 +72,7 @@ export declare const internalMutation: MutationBuilder; * @param func - The action. It receives an {@link ActionCtx} as its first argument. * @returns The wrapped action. Include this as an `export` to name it and make it accessible. */ -export declare const action: ActionBuilder; +export declare const action: ActionBuilder; /** * Define an action that is only accessible from other Convex functions (but not from the client). @@ -80,7 +80,7 @@ export declare const action: ActionBuilder; * @param func - The function. It receives an {@link ActionCtx} as its first argument. * @returns The wrapped function. Include this as an `export` to name it and make it accessible. */ -export declare const internalAction: ActionBuilder; +export declare const internalAction: ActionBuilder; /** * Define an HTTP action. diff --git a/apps/web/src/convex/_generated/server.js b/apps/web/src/convex/_generated/server.js index bf3d25ad..a18aa285 100644 --- a/apps/web/src/convex/_generated/server.js +++ b/apps/web/src/convex/_generated/server.js @@ -9,14 +9,14 @@ */ import { - actionGeneric, - httpActionGeneric, - queryGeneric, - mutationGeneric, - internalActionGeneric, - internalMutationGeneric, - internalQueryGeneric, -} from "convex/server"; + actionGeneric, + httpActionGeneric, + queryGeneric, + mutationGeneric, + internalActionGeneric, + internalMutationGeneric, + internalQueryGeneric +} from 'convex/server'; /** * Define a query in this Convex app's public API. diff --git a/apps/web/src/lib/types/index.ts b/apps/web/src/lib/types/index.ts index aa55ae64..9e44fa7d 100644 --- a/apps/web/src/lib/types/index.ts +++ b/apps/web/src/lib/types/index.ts @@ -19,11 +19,15 @@ import type { BtcaChunk, AssistantContent } from '@btca/shared'; // Resource types export interface Resource { name: string; - type: 'git' | 'local'; + type: 'git' | 'local' | 'website'; url?: string; branch?: string; path?: string; searchPath?: string; + searchPaths?: string[]; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; specialNotes?: string; } diff --git a/apps/web/static/btca.schema.json b/apps/web/static/btca.schema.json index 467babe2..5163fbee 100644 --- a/apps/web/static/btca.schema.json +++ b/apps/web/static/btca.schema.json @@ -55,8 +55,11 @@ { "$ref": "#/$defs/gitResource" }, - { + { "$ref": "#/$defs/localResource" + }, + { + "$ref": "#/$defs/websiteResource" } ] }, @@ -134,6 +137,51 @@ }, "required": ["type", "name", "path"], "additionalProperties": false + }, + "websiteResource": { + "type": "object", + "title": "Website Resource", + "description": "A website snapshot crawled into a local cache for searching", + "properties": { + "type": { + "type": "string", + "const": "website", + "description": "Resource type identifier" + }, + "name": { + "type": "string", + "description": "Unique name for this resource" + }, + "url": { + "type": "string", + "description": "Website root URL to crawl", + "format": "uri" + }, + "maxPages": { + "type": "integer", + "minimum": 1, + "default": 200, + "description": "Maximum number of pages to crawl" + }, + "maxDepth": { + "type": "integer", + "minimum": 0, + "default": 3, + "description": "Maximum crawl depth from the start URL" + }, + "ttlHours": { + "type": "integer", + "minimum": 1, + "default": 24, + "description": "Hours before the cached website snapshot is refreshed" + }, + "specialNotes": { + "type": "string", + "description": "Additional context or notes about this resource for the AI" + } + }, + "required": ["type", "name", "url"], + "additionalProperties": false } } } diff --git a/apps/web/static/docs/example-btca.config.jsonc b/apps/web/static/docs/example-btca.config.jsonc index 6cf8a9de..2a2d1ab4 100644 --- a/apps/web/static/docs/example-btca.config.jsonc +++ b/apps/web/static/docs/example-btca.config.jsonc @@ -33,6 +33,15 @@ "type": "local", "path": "/absolute/path/to/project", "specialNotes": "This project's source code for self-reference." + }, + { + "name": "mydocs", + "type": "website", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24, + "specialNotes": "Public docs website indexed into a local searchable snapshot." } ], diff --git a/btca.config.jsonc b/btca.config.jsonc index c6703c9d..bb7336c9 100644 --- a/btca.config.jsonc +++ b/btca.config.jsonc @@ -1,138 +1,146 @@ { - "$schema": "https://btca.dev/btca.schema.json", - "dataDirectory": ".btca", - "resources": [ - { - "type": "git", - "name": "runed", - "url": "https://github.com/svecosystem/runed", - "branch": "main" - }, - { - "type": "git", - "name": "justBash", - "url": "https://github.com/vercel-labs/just-bash", - "branch": "main" - }, - { - "type": "git", - "name": "autumn", - "url": "https://github.com/useautumn/typescript", - "branch": "main", - "specialNotes": "this is the TS SDK and cli for autumn. the cli is in atmn/ and the important sdk stuff is in package/" - }, - { - "type": "git", - "name": "convexWorkpools", - "url": "https://github.com/get-convex/workpool", - "branch": "main", - "specialNotes": "This is a Convex component that does work pools. Work pools are basically background jobs with proper queuing setup, retries, and all that stuff. " - }, - { - "type": "git", - "name": "daytona", - "url": "https://github.com/daytonaio/daytona", - "branch": "main", - "specialNotes": "this is the full daytona codebase. focus on the guides and examples for answers" - }, - { - "type": "git", - "name": "svelte", - "url": "https://github.com/sveltejs/svelte.dev", - "branch": "main", - "searchPaths": [ - "apps/svelte.dev" - ], - "specialNotes": "Svelte docs website. Focus on content directory for markdown documentation." - }, - { - "type": "git", - "name": "svelteKit", - "url": "https://github.com/sveltejs/kit", - "branch": "main", - "searchPath": "documentation" - }, - { - "type": "git", - "name": "tailwind", - "url": "https://github.com/tailwindlabs/tailwindcss.com", - "branch": "main", - "searchPath": "src/docs" - }, - { - "type": "git", - "name": "hono", - "url": "https://github.com/honojs/website", - "branch": "main", - "searchPath": "docs" - }, - { - "type": "git", - "name": "zod", - "url": "https://github.com/colinhacks/zod", - "branch": "main", - "searchPath": "packages/docs/content" - }, - { - "type": "git", - "name": "solidJs", - "url": "https://github.com/solidjs/solid-docs", - "branch": "main", - "searchPath": "src/routes" - }, - { - "type": "git", - "name": "commander", - "url": "https://github.com/tj/commander.js", - "branch": "master", - "searchPath": "docs" - }, - { - "type": "git", - "name": "vite", - "url": "https://github.com/vitejs/vite", - "branch": "main", - "searchPath": "docs" - }, - { - "type": "git", - "name": "opencode", - "url": "https://github.com/anomalyco/opencode", - "branch": "dev" - }, - { - "type": "git", - "name": "clerk", - "url": "https://github.com/clerk/javascript", - "branch": "main" - }, - { - "type": "git", - "name": "convexJs", - "url": "https://github.com/get-convex/convex-js", - "branch": "main" - }, - { - "type": "git", - "name": "convexDocs", - "url": "https://github.com/get-convex/convex-docs", - "branch": "main", - "specialNotes": "Official Convex documentation. Use for HTTP actions, queries, mutations, actions, schema, etc." - }, - { - "type": "git", - "name": "daytonaSdk", - "url": "https://github.com/daytonaio/sdk", - "branch": "main", - "specialNotes": "Daytona TypeScript SDK. Use for sandbox creation, management, and API reference." - }, - { - "type": "git", - "name": "convex", - "url": "https://github.com/get-convex/convex-js", - "branch": "main" - } - ], - "model": "gpt-5.3-codex", - "provider": "openai" -} \ No newline at end of file + "$schema": "https://btca.dev/btca.schema.json", + "dataDirectory": ".btca", + "resources": [ + { + "type": "git", + "name": "runed", + "url": "https://github.com/svecosystem/runed", + "branch": "main" + }, + { + "type": "git", + "name": "justBash", + "url": "https://github.com/vercel-labs/just-bash", + "branch": "main" + }, + { + "type": "git", + "name": "autumn", + "url": "https://github.com/useautumn/typescript", + "branch": "main", + "specialNotes": "this is the TS SDK and cli for autumn. the cli is in atmn/ and the important sdk stuff is in package/" + }, + { + "type": "git", + "name": "convexWorkpools", + "url": "https://github.com/get-convex/workpool", + "branch": "main", + "specialNotes": "This is a Convex component that does work pools. Work pools are basically background jobs with proper queuing setup, retries, and all that stuff. " + }, + { + "type": "git", + "name": "daytona", + "url": "https://github.com/daytonaio/daytona", + "branch": "main", + "specialNotes": "this is the full daytona codebase. focus on the guides and examples for answers" + }, + { + "type": "git", + "name": "svelte", + "url": "https://github.com/sveltejs/svelte.dev", + "branch": "main", + "searchPaths": ["apps/svelte.dev"], + "specialNotes": "Svelte docs website. Focus on content directory for markdown documentation." + }, + { + "type": "git", + "name": "svelteKit", + "url": "https://github.com/sveltejs/kit", + "branch": "main", + "searchPath": "documentation" + }, + { + "type": "git", + "name": "tailwind", + "url": "https://github.com/tailwindlabs/tailwindcss.com", + "branch": "main", + "searchPath": "src/docs" + }, + { + "type": "git", + "name": "hono", + "url": "https://github.com/honojs/website", + "branch": "main", + "searchPath": "docs" + }, + { + "type": "git", + "name": "zod", + "url": "https://github.com/colinhacks/zod", + "branch": "main", + "searchPath": "packages/docs/content" + }, + { + "type": "git", + "name": "solidJs", + "url": "https://github.com/solidjs/solid-docs", + "branch": "main", + "searchPath": "src/routes" + }, + { + "type": "git", + "name": "commander", + "url": "https://github.com/tj/commander.js", + "branch": "master", + "searchPath": "docs" + }, + { + "type": "git", + "name": "vite", + "url": "https://github.com/vitejs/vite", + "branch": "main", + "searchPath": "docs" + }, + { + "type": "git", + "name": "opencode", + "url": "https://github.com/anomalyco/opencode", + "branch": "dev" + }, + { + "type": "git", + "name": "clerk", + "url": "https://github.com/clerk/javascript", + "branch": "main" + }, + { + "type": "git", + "name": "convexJs", + "url": "https://github.com/get-convex/convex-js", + "branch": "main" + }, + { + "type": "git", + "name": "convexDocs", + "url": "https://github.com/get-convex/convex-docs", + "branch": "main", + "specialNotes": "Official Convex documentation. Use for HTTP actions, queries, mutations, actions, schema, etc." + }, + { + "type": "git", + "name": "daytonaSdk", + "url": "https://github.com/daytonaio/sdk", + "branch": "main", + "specialNotes": "Daytona TypeScript SDK. Use for sandbox creation, management, and API reference." + }, + { + "type": "git", + "name": "convex", + "url": "https://github.com/get-convex/convex-js", + "branch": "main" + }, + { + "type": "website", + "name": "btcaDocs", + "url": "https://docs.btca.dev/" + }, + { + "type": "website", + "name": "btcaWebsite", + "url": "https://btca.dev/" + } + ], + "model": "claude-haiku-4-5", + "provider": "opencode" +} diff --git a/bun.lock b/bun.lock index 5938e4fe..e2c28553 100644 --- a/bun.lock +++ b/bun.lock @@ -77,6 +77,7 @@ "@btca/shared": "workspace:*", "ai": "^6.0.49", "better-result": "^2.6.0", + "cheerio": "^1.2.0", "hono": "^4.7.11", "just-bash": "^2.7.0", "opencode-ai": "^1.1.36", @@ -153,7 +154,7 @@ "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.37", "", { "dependencies": { "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-tEgcJPw+a6obbF+SHrEiZsx3DNxOHqeY8bK4IpiNsZ8YPZD141R34g3lEAaQnmNN5mGsEJ8SXoEDabuzi8wFJQ=="], - "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.35", "", { "dependencies": { "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-9aRTVM1P1u4yUIjBpco/WCF1WXr/DgWKuDYgLLHdENS8kiEuxDOPJuGbc/6+7EwQ6ZqSh0UOgeqvHfGJfU23Qg=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.36", "", { "dependencies": { "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2r1Q6azvqMYxQ1hqfWZmWg4+8MajoldD/ty65XdhCaCoBfvDu7trcvxXDfTSU+3/wZ1JIDky46SWYFOHnTbsBw=="], "@ai-sdk/google": ["@ai-sdk/google@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-qQuvcbDqDPZojtoT45UFCQVH2w3m6KJKKjqJduUsvhN5ZqOXste0h4HgHK8hwGuDfv96Jr9QQEpspbgp6iu5Uw=="], @@ -1105,7 +1106,7 @@ "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="], - "ai": ["ai@6.0.72", "", { "dependencies": { "@ai-sdk/gateway": "3.0.35", "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-D3TzDX6LzYL8qwi1A0rLnmuUexqDcCu4LSg77hcDHsqNRkaGspGItkz1U3RnN3ojv31XQYI9VmoWpkj44uvIUA=="], + "ai": ["ai@6.0.73", "", { "dependencies": { "@ai-sdk/gateway": "3.0.36", "@ai-sdk/provider": "3.0.7", "@ai-sdk/provider-utils": "4.0.13", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-p2/ICXIjAM4+bIFHEkAB+l58zq+aTmxAkotsb6doNt/CEms72zt6gxv2ky1fQDwU4ecMOcmMh78VJUSEKECzlg=="], "alien-signals": ["alien-signals@2.0.6", "", {}, "sha512-P3TxJSe31bUHBiblg59oU1PpaWPtmxF9GhJ/cB7OkgJ0qN/ifFSKUI25/v8ZhsT+lIG6ac8DpTOplXxORX6F3Q=="], @@ -1177,6 +1178,8 @@ "bn.js": ["bn.js@5.2.2", "", {}, "sha512-v2YAxEmKaBLahNwE1mjp4WON6huMNeuDvagFZW+ASCuA/ku0bXR9hSMw0XpiqMoA3+rmnyck/tPRSFQkoC9Cuw=="], + "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], + "borsh": ["borsh@0.7.0", "", { "dependencies": { "bn.js": "^5.2.0", "bs58": "^4.0.0", "text-encoding-utf-8": "^1.0.2" } }, "sha512-CLCsZGIBCFnPtkNnieW/a8wmreDmfUtjU2m9yHrzPXIlNbqVs0AQrSatSG6vdNYUqdc83tkQi2eHfF98ubzQLA=="], "bowser": ["bowser@2.13.1", "", {}, "sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw=="], @@ -1241,6 +1244,10 @@ "chardet": ["chardet@2.1.1", "", {}, "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ=="], + "cheerio": ["cheerio@1.2.0", "", { "dependencies": { "cheerio-select": "^2.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.2.2", "encoding-sniffer": "^0.2.1", "htmlparser2": "^10.1.0", "parse5": "^7.3.0", "parse5-htmlparser2-tree-adapter": "^7.1.0", "parse5-parser-stream": "^7.1.2", "undici": "^7.19.0", "whatwg-mimetype": "^4.0.0" } }, "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg=="], + + "cheerio-select": ["cheerio-select@2.1.0", "", { "dependencies": { "boolbase": "^1.0.0", "css-select": "^5.1.0", "css-what": "^6.1.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.0.1" } }, "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g=="], + "chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "chownr": ["chownr@2.0.0", "", {}, "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ=="], @@ -1293,8 +1300,12 @@ "crypto-js": ["crypto-js@4.2.0", "", {}, "sha512-KALDyEYgpY+Rlob/iriUtjV6d5Eq+Y191A5g4UqLAi8CyGP9N1+FdVbkc1SxKc2r4YAYqG8JzO2KGL+AizD70Q=="], + "css-select": ["css-select@5.2.2", "", { "dependencies": { "boolbase": "^1.0.0", "css-what": "^6.1.0", "domhandler": "^5.0.2", "domutils": "^3.0.1", "nth-check": "^2.0.1" } }, "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw=="], + "css-tree": ["css-tree@3.1.0", "", { "dependencies": { "mdn-data": "2.12.2", "source-map-js": "^1.0.1" } }, "sha512-0eW44TGN5SQXU1mWSkKwFstI/22X2bG1nYzZTYMAWjylYURhse752YgbE4Cx46AC+bAvI+/dYTPRk1LqSUnu6w=="], + "css-what": ["css-what@6.2.2", "", {}, "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA=="], + "cssesc": ["cssesc@3.0.0", "", { "bin": { "cssesc": "bin/cssesc" } }, "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg=="], "cssstyle": ["cssstyle@5.3.7", "", { "dependencies": { "@asamuzakjp/css-color": "^4.1.1", "@csstools/css-syntax-patches-for-csstree": "^1.0.21", "css-tree": "^3.1.0", "lru-cache": "^11.2.4" } }, "sha512-7D2EPVltRrsTkhpQmksIu+LxeWAIEk6wRDMJ1qljlv+CKHJM+cJLlfhWIzNA44eAsHXSNe3+vO6DW1yCYx8SuQ=="], @@ -1343,8 +1354,16 @@ "dijkstrajs": ["dijkstrajs@1.0.3", "", {}, "sha512-qiSlmBq9+BCdCA/L46dw8Uy93mloxsPSbwnm5yrKn2vMPiy8KyAskTF6zuV/j5BMsmOGZDPs7KjU+mjb670kfA=="], + "dom-serializer": ["dom-serializer@2.0.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.2", "entities": "^4.2.0" } }, "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg=="], + + "domelementtype": ["domelementtype@2.3.0", "", {}, "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw=="], + + "domhandler": ["domhandler@5.0.3", "", { "dependencies": { "domelementtype": "^2.3.0" } }, "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w=="], + "dompurify": ["dompurify@3.3.1", "", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q=="], + "domutils": ["domutils@3.2.2", "", { "dependencies": { "dom-serializer": "^2.0.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3" } }, "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw=="], + "dotenv": ["dotenv@17.2.4", "", {}, "sha512-mudtfb4zRB4bVvdj0xRo+e6duH1csJRM8IukBqfTRvHotn9+LBXB8ynAidP9zHqoRC/fsllXgk4kCKlR21fIhw=="], "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], @@ -1357,11 +1376,13 @@ "encodeurl": ["encodeurl@2.0.0", "", {}, "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg=="], + "encoding-sniffer": ["encoding-sniffer@0.2.1", "", { "dependencies": { "iconv-lite": "^0.6.3", "whatwg-encoding": "^3.1.1" } }, "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw=="], + "end-of-stream": ["end-of-stream@1.4.5", "", { "dependencies": { "once": "^1.4.0" } }, "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg=="], "enhanced-resolve": ["enhanced-resolve@5.19.0", "", { "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.3.0" } }, "sha512-phv3E1Xl4tQOShqSte26C7Fl84EwUdZsyOuSSk9qtAGyyQs2s3jJzComh+Abf4g187lUUAvH+H26omrqia2aGg=="], - "entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], + "entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], "error-ex": ["error-ex@1.3.4", "", { "dependencies": { "is-arrayish": "^0.2.1" } }, "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ=="], @@ -1529,6 +1550,8 @@ "html-void-elements": ["html-void-elements@3.0.0", "", {}, "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg=="], + "htmlparser2": ["htmlparser2@10.1.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.2.2", "entities": "^7.0.1" } }, "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ=="], + "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], "http-proxy-agent": ["http-proxy-agent@7.0.2", "", { "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" } }, "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig=="], @@ -1537,7 +1560,7 @@ "humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="], - "iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="], + "iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], "idb-keyval": ["idb-keyval@6.2.1", "", {}, "sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg=="], @@ -1651,7 +1674,7 @@ "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], - "just-bash": ["just-bash@2.9.4", "", { "dependencies": { "compressjs": "^1.0.3", "diff": "^8.0.2", "fast-xml-parser": "^5.3.3", "file-type": "^21.2.0", "ini": "^6.0.0", "minimatch": "^10.1.1", "modern-tar": "^0.7.3", "papaparse": "^5.5.3", "pyodide": "^0.27.0", "re2js": "^1.2.1", "smol-toml": "^1.6.0", "sprintf-js": "^1.1.3", "sql.js": "^1.13.0", "turndown": "^7.2.2", "yaml": "^2.8.2" }, "optionalDependencies": { "@mongodb-js/zstd": "^7.0.0", "node-liblzma": "^2.0.3" }, "bin": { "just-bash": "dist/bin/just-bash.js", "just-bash-shell": "dist/bin/shell/shell.js" } }, "sha512-jyS8UA0yO89hCyHRxBTCBnfkLUBc+hT67gX6L616pbn9DCp1cfs9kdiwmKKK7+uc+eAMDcXzHUuYBK5oR3WYgg=="], + "just-bash": ["just-bash@2.9.5", "", { "dependencies": { "compressjs": "^1.0.3", "diff": "^8.0.2", "fast-xml-parser": "^5.3.3", "file-type": "^21.2.0", "ini": "^6.0.0", "minimatch": "^10.1.1", "modern-tar": "^0.7.3", "papaparse": "^5.5.3", "pyodide": "^0.27.0", "re2js": "^1.2.1", "smol-toml": "^1.6.0", "sprintf-js": "^1.1.3", "sql.js": "^1.13.0", "turndown": "^7.2.2", "yaml": "^2.8.2" }, "optionalDependencies": { "@mongodb-js/zstd": "^7.0.0", "node-liblzma": "^2.0.3" }, "bin": { "just-bash": "dist/bin/just-bash.js", "just-bash-shell": "dist/bin/shell/shell.js" } }, "sha512-ZYc4wVxE+jLDgjcwVVqKOT2hM8DG+gUUKMZAhfUULjbP9wT8U9V0uItA8tREpl2VnDLjzrxxK6jV7vbY/TLhNw=="], "kleur": ["kleur@4.1.5", "", {}, "sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ=="], @@ -1813,6 +1836,8 @@ "normalize-path": ["normalize-path@3.0.0", "", {}, "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA=="], + "nth-check": ["nth-check@2.1.1", "", { "dependencies": { "boolbase": "^1.0.0" } }, "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w=="], + "nullthrows": ["nullthrows@1.1.1", "", {}, "sha512-2vPPEi+Z7WqML2jZYddDIfy5Dqb0r2fze2zTxNNknZaFpVHU3mFB3R+DWeJWGVx0ecvttSGlJTI+WG+8Z4cDWw=="], "ob1": ["ob1@0.83.3", "", { "dependencies": { "flow-enums-runtime": "^0.0.6" } }, "sha512-egUxXCDwoWG06NGCS5s5AdcpnumHKJlfd3HH06P3m9TEMwwScfcY35wpQxbm9oHof+dM/lVH9Rfyu1elTVelSA=="], @@ -1831,29 +1856,29 @@ "open": ["open@10.2.0", "", { "dependencies": { "default-browser": "^5.2.1", "define-lazy-prop": "^3.0.0", "is-inside-container": "^1.0.0", "wsl-utils": "^0.1.0" } }, "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA=="], - "opencode-ai": ["opencode-ai@1.1.52", "", { "optionalDependencies": { "opencode-darwin-arm64": "1.1.52", "opencode-darwin-x64": "1.1.52", "opencode-darwin-x64-baseline": "1.1.52", "opencode-linux-arm64": "1.1.52", "opencode-linux-arm64-musl": "1.1.52", "opencode-linux-x64": "1.1.52", "opencode-linux-x64-baseline": "1.1.52", "opencode-linux-x64-baseline-musl": "1.1.52", "opencode-linux-x64-musl": "1.1.52", "opencode-windows-x64": "1.1.52", "opencode-windows-x64-baseline": "1.1.52" }, "bin": { "opencode": "bin/opencode" } }, "sha512-1LCYgW0ZL9/zM0YwIVW6Wl30v3cSB7rMveQZCY2KV1+b4Ne+rF0cCugV50ldkmeMaeSWJR7/KMLxLI1TtqotGQ=="], + "opencode-ai": ["opencode-ai@1.1.53", "", { "optionalDependencies": { "opencode-darwin-arm64": "1.1.53", "opencode-darwin-x64": "1.1.53", "opencode-darwin-x64-baseline": "1.1.53", "opencode-linux-arm64": "1.1.53", "opencode-linux-arm64-musl": "1.1.53", "opencode-linux-x64": "1.1.53", "opencode-linux-x64-baseline": "1.1.53", "opencode-linux-x64-baseline-musl": "1.1.53", "opencode-linux-x64-musl": "1.1.53", "opencode-windows-x64": "1.1.53", "opencode-windows-x64-baseline": "1.1.53" }, "bin": { "opencode": "bin/opencode" } }, "sha512-COjsVMI9a2IwLytk6ahxygDwxDs+1/rQg9bfruRWLOG42bIWeZkNZ67AfhZDOL1lUkKHYie1ZFrPKUZ9WGAtZQ=="], - "opencode-darwin-arm64": ["opencode-darwin-arm64@1.1.52", "", { "os": "darwin", "cpu": "arm64" }, "sha512-+VmpNcnH3Ym0WJh577i8NBvHEc93BAaArNBt4SJ7f1TvDNCbdtm1ySaPcmJhYb6PVTPAOt9PDxjN1C7u02RCHA=="], + "opencode-darwin-arm64": ["opencode-darwin-arm64@1.1.53", "", { "os": "darwin", "cpu": "arm64" }, "sha512-X61cD9KmvXoQ2aqc+zZWBpMMVvOZ9bFWTLNAZGC6NMZMF+rxyplpSUUccgptlytBOXwyTV7gke6vQkuV5tDMAA=="], - "opencode-darwin-x64": ["opencode-darwin-x64@1.1.52", "", { "os": "darwin", "cpu": "x64" }, "sha512-0t3ajIga7nNKohFY4MjhLEWSvbS9vN+h6JAR2R1xvqz4JCXypjTe16LKnda20Ak9+ETtmo3kww4jJWtL0Yy3bg=="], + "opencode-darwin-x64": ["opencode-darwin-x64@1.1.53", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ql5+Ed1fLaFPGvo+b9R1grbL2+RyPpXp6nh66X0Z0eVillK876b9L+n5GWnDuGPlwhAf4ycKTYqGPfcxmGmINw=="], - "opencode-darwin-x64-baseline": ["opencode-darwin-x64-baseline@1.1.52", "", { "os": "darwin", "cpu": "x64" }, "sha512-wX9zNdmPEFGraztJXOKpBhUoYn8o2chu00kj2fJ40QqYItCPIOSzbJHf1+ntegp8aj+q5ggVuaAu12VE8oDutg=="], + "opencode-darwin-x64-baseline": ["opencode-darwin-x64-baseline@1.1.53", "", { "os": "darwin", "cpu": "x64" }, "sha512-5JZ7kR7u7xxNGRq23dKla/UCedMCYEUGtp6yf7sFHF1Oqh0aeeuBnioqTvPpeKY/4zGGdwwpEeA30hs2b9EOEg=="], - "opencode-linux-arm64": ["opencode-linux-arm64@1.1.52", "", { "os": "linux", "cpu": "arm64" }, "sha512-jLq/e+7Zm1zss2/YmGxan16VftF4sShJgjM8ihAAH9eINvDg46g6+yDykxk87bKv+gb00B12OINPHFdMUxMDCg=="], + "opencode-linux-arm64": ["opencode-linux-arm64@1.1.53", "", { "os": "linux", "cpu": "arm64" }, "sha512-4HP160A3bUoIElcDYlM98U91yybbrnnC1hB7AQ86+PE+KgGcTjlxtT/n3OKf1CZOehOTzyLSNDp1ovbACl5mwQ=="], - "opencode-linux-arm64-musl": ["opencode-linux-arm64-musl@1.1.52", "", { "os": "linux", "cpu": "arm64" }, "sha512-XsIUS1+uyRn3/a17isis4OrhZj0GFH3A5osAWXVLq7GFUtgL2EWasQrT6CLkvHX9IJlScTllyFio//vIciRFhw=="], + "opencode-linux-arm64-musl": ["opencode-linux-arm64-musl@1.1.53", "", { "os": "linux", "cpu": "arm64" }, "sha512-lGR0gMk4TrBgwLUZaJgbp4XPC3HvQYF6MqkoWuqoIqSNAM6X/50HVGyefcea4PQ2Pcq58Pjh7ccoy+AYAdZNBg=="], - "opencode-linux-x64": ["opencode-linux-x64@1.1.52", "", { "os": "linux", "cpu": "x64" }, "sha512-8lxMKlcLdqawLSlpg1BHsWI5BCr468cQNU8DHslW6U3fWYrjh2LRrpjRpr1sTl6YCHGfAoRCxyPrEnq3Ejuidg=="], + "opencode-linux-x64": ["opencode-linux-x64@1.1.53", "", { "os": "linux", "cpu": "x64" }, "sha512-pPwMgD+w0iyoIRArrfR4AQHA8J+FkK9vBQCIKAuji/ouOVoSHkS3+0iGcoyKRH0JP9IwjyH12nk5ip8jC2ss9Q=="], - "opencode-linux-x64-baseline": ["opencode-linux-x64-baseline@1.1.52", "", { "os": "linux", "cpu": "x64" }, "sha512-UI9Bid68PH3tXatcIViTsC74eNQ3ByRfe3iwgCzbBFcvmfWeSA98GQB9AU0qgkUzVrDF/ktoOVxRRxrvt4t13A=="], + "opencode-linux-x64-baseline": ["opencode-linux-x64-baseline@1.1.53", "", { "os": "linux", "cpu": "x64" }, "sha512-Kr1fi7q9BafRftS2q2yPSjR+UuJLffWw1qryCcqj6W4xzXapBfINjuPxv9iCaYg0MORVDu3Ds3c1xFaUzssvWg=="], - "opencode-linux-x64-baseline-musl": ["opencode-linux-x64-baseline-musl@1.1.52", "", { "os": "linux", "cpu": "x64" }, "sha512-wSZPXXmCe0+sQWGd9alZ4F6Foksv3rg24RKh2ioK1UOOZ2IqBV0uy79jOy8Eqw2riWpCqAFweHMS0O5EUZuMYg=="], + "opencode-linux-x64-baseline-musl": ["opencode-linux-x64-baseline-musl@1.1.53", "", { "os": "linux", "cpu": "x64" }, "sha512-O4x4SX0sjeeEz1Sy6qnirIUYBRFoziojtT+ac4yXdpJ+1yLK9Qu2IbO37hoVyb8Eh/CuEqQrLK/R9i8qCsqYPQ=="], - "opencode-linux-x64-musl": ["opencode-linux-x64-musl@1.1.52", "", { "os": "linux", "cpu": "x64" }, "sha512-3jVnrsqW+yL67HEJR7L9oQfFTkQxOSW1J7+aawNZdKPViBM1Vd+WiG3lCVuYrwyaLwuWXMuyNwucHD8COmh5cw=="], + "opencode-linux-x64-musl": ["opencode-linux-x64-musl@1.1.53", "", { "os": "linux", "cpu": "x64" }, "sha512-S5b/a7dgS4tLG4lTHE81p7unBIW+6fM+ghHUozw1lCBgsx2oi8fDoos4+TyoYpnZgJ7m9WfL2BCc6Q7V8DDedA=="], - "opencode-windows-x64": ["opencode-windows-x64@1.1.52", "", { "os": "win32", "cpu": "x64" }, "sha512-8YC11jMBiZwAXBSnVIiKGRoAPHpLaC5rJPCqtR5LR88z7f3Gd94UN9tAkEuYkujNkRDRGy0EXCxIYlL5lEU2LA=="], + "opencode-windows-x64": ["opencode-windows-x64@1.1.53", "", { "os": "win32", "cpu": "x64" }, "sha512-GdSZhScRi9ydb3QawwRFlaLAwd8Wvd7uizOKRVUnbDfgZPpUbMn/QAAl4JCBsBTVYn4HDHpouWzJSxJTm1VVKg=="], - "opencode-windows-x64-baseline": ["opencode-windows-x64-baseline@1.1.52", "", { "os": "win32", "cpu": "x64" }, "sha512-G1qw4nR6ZixGYFQn93vR4cgwCXFgJuBPf1u7F4Qv/247qZ4DkFjRxRQX2v04HmpyNdBgdf1y9PED2qVKsrUg/A=="], + "opencode-windows-x64-baseline": ["opencode-windows-x64-baseline@1.1.53", "", { "os": "win32", "cpu": "x64" }, "sha512-yV32IbIfUtyOEl7VYAIB+XJ9+KCPXHE4GJt2poDwxfpFvilPlmF+xl83dcm2mcyuXj98KGTGgG7q6+OIQyO6ig=="], "ox": ["ox@0.6.9", "", { "dependencies": { "@adraffy/ens-normalize": "^1.10.1", "@noble/curves": "^1.6.0", "@noble/hashes": "^1.5.0", "@scure/bip32": "^1.5.0", "@scure/bip39": "^1.4.0", "abitype": "^1.0.6", "eventemitter3": "5.0.1" }, "peerDependencies": { "typescript": ">=5.4.0" }, "optionalPeers": ["typescript"] }, "sha512-wi5ShvzE4eOcTwQVsIPdFr+8ycyX+5le/96iAJutaZAvCes1J0+RvpEPg5QDPDiaR0XQQAvZVl7AwqQcINuUug=="], @@ -1879,7 +1904,11 @@ "parse-passwd": ["parse-passwd@1.0.0", "", {}, "sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q=="], - "parse5": ["parse5@8.0.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA=="], + "parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="], + + "parse5-htmlparser2-tree-adapter": ["parse5-htmlparser2-tree-adapter@7.1.0", "", { "dependencies": { "domhandler": "^5.0.3", "parse5": "^7.0.0" } }, "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g=="], + + "parse5-parser-stream": ["parse5-parser-stream@7.1.2", "", { "dependencies": { "parse5": "^7.0.0" } }, "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow=="], "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="], @@ -2229,6 +2258,8 @@ "uint8array-extras": ["uint8array-extras@1.5.0", "", {}, "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A=="], + "undici": ["undici@7.20.0", "", {}, "sha512-MJZrkjyd7DeC+uPZh+5/YaMDxFiiEEaDgbUSVMXayofAkDWF1088CDo+2RPg7B1BuS1qf1vgNE7xqwPxE0DuSQ=="], + "unist-util-is": ["unist-util-is@6.0.1", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g=="], "unist-util-position": ["unist-util-position@5.0.0", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA=="], @@ -2281,6 +2312,8 @@ "webidl-conversions": ["webidl-conversions@8.0.1", "", {}, "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ=="], + "whatwg-encoding": ["whatwg-encoding@3.1.1", "", { "dependencies": { "iconv-lite": "0.6.3" } }, "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ=="], + "whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="], "whatwg-mimetype": ["whatwg-mimetype@4.0.0", "", {}, "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg=="], @@ -2391,6 +2424,8 @@ "@inquirer/expand/@inquirer/type": ["@inquirer/type@3.0.10", "", { "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-BvziSRxfz5Ov8ch0z/n3oijRSEcEsHnhggm4xFZe93DHcUCTlutlq9Ox4SVENAfcRD22UQq7T/atg9Wr3k09eA=="], + "@inquirer/external-editor/iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="], + "@inquirer/input/@inquirer/core": ["@inquirer/core@10.3.2", "", { "dependencies": { "@inquirer/ansi": "^1.0.2", "@inquirer/figures": "^1.0.15", "@inquirer/type": "^3.0.10", "cli-width": "^4.1.0", "mute-stream": "^2.0.0", "signal-exit": "^4.1.0", "wrap-ansi": "^6.2.0", "yoctocolors-cjs": "^2.1.3" }, "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-43RTuEbfP8MbKzedNqBrlhhNKVwoK//vUFNW3Q3vZ88BLcrs4kYpGg+B2mm5p2K/HfygoCxuKwJJiv8PbGmE0A=="], "@inquirer/input/@inquirer/type": ["@inquirer/type@3.0.10", "", { "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-BvziSRxfz5Ov8ch0z/n3oijRSEcEsHnhggm4xFZe93DHcUCTlutlq9Ox4SVENAfcRD22UQq7T/atg9Wr3k09eA=="], @@ -2529,8 +2564,6 @@ "babel-plugin-jsx-dom-expressions/@babel/helper-module-imports": ["@babel/helper-module-imports@7.18.6", "", { "dependencies": { "@babel/types": "^7.18.6" } }, "sha512-0NFvs3VkuSYbFi1x2Vd6tKrywq+z/cLeYC/RJNFrIX/30Bf5aiGYbtvGXolEktzJH8o5E5KJ3tT+nkxuuZFVlA=="], - "babel-plugin-jsx-dom-expressions/parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="], - "btca-server/zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], "chrome-launcher/is-wsl": ["is-wsl@2.2.0", "", { "dependencies": { "is-docker": "^2.0.0" } }, "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww=="], @@ -2563,6 +2596,8 @@ "glob/minipass": ["minipass@4.2.8", "", {}, "sha512-fNzuVyifolSLFL4NzpF+wEF4qrgqaaKX0haXPQEdQ7NKAN+WecoKMHV09YcuL/DHxrUsYQOK3MiuDf7Ip2OXfQ=="], + "htmlparser2/entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], + "http-errors/statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], "inquirer/@inquirer/ansi": ["@inquirer/ansi@1.0.2", "", {}, "sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ=="], @@ -2593,6 +2628,8 @@ "jest-worker/supports-color": ["supports-color@8.1.1", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q=="], + "jsdom/parse5": ["parse5@8.0.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA=="], + "lighthouse-logger/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], "lru-cache/yallist": ["yallist@3.1.1", "", {}, "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="], @@ -2611,8 +2648,12 @@ "ox/@noble/hashes": ["@noble/hashes@1.8.0", "", {}, "sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A=="], + "parse5/entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], + "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + "path-scurry/minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="], + "pixelmatch/pngjs": ["pngjs@6.0.0", "", {}, "sha512-TRzzuFRRmEoSW/p1KVAmiOgPco2Irlah+bGFCeNfJXxxYGwSw7YwAOAcd7X28K/m5bjBWKsC29KyoMfHbypayg=="], "postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], @@ -2793,6 +2834,8 @@ "jest-validate/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + "jsdom/parse5/entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], + "lighthouse-logger/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], "metro/chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], From 17171318e6099c6b67f09babdb84a886c33c2763 Mon Sep 17 00:00:00 2001 From: Ben Davis Date: Thu, 5 Feb 2026 14:11:59 -0800 Subject: [PATCH 2/5] better web resource setup for now... --- apps/docs/btca.spec.md | 1 + .../src/resources/impls/website.test.ts | 132 ++++++ apps/server/src/resources/impls/website.ts | 408 ++++++++++++++++-- btca.config.jsonc | 4 +- 4 files changed, 503 insertions(+), 42 deletions(-) diff --git a/apps/docs/btca.spec.md b/apps/docs/btca.spec.md index b6771cfe..01d5251a 100644 --- a/apps/docs/btca.spec.md +++ b/apps/docs/btca.spec.md @@ -245,6 +245,7 @@ Behavior: - If `--type` omitted, auto‑detects URL vs path. - Git URLs are normalized to base repo when GitHub. - Website resources require an absolute HTTPS URL. +- Website crawling may probe for markdown-friendly variants by appending `.md` and `/.md` to page paths (and follow same-origin redirects) to improve extraction on SPA docs sites. - Local paths are resolved to absolute paths. ### 4.4 `btca remove [name]` diff --git a/apps/server/src/resources/impls/website.test.ts b/apps/server/src/resources/impls/website.test.ts index 17923719..dcc68763 100644 --- a/apps/server/src/resources/impls/website.test.ts +++ b/apps/server/src/resources/impls/website.test.ts @@ -211,4 +211,136 @@ describe('Website Resource', () => { expect(await fallback.getAbsoluteDirectoryPath()).toBe(resourcePath); expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); }); + + it('prefers markdown variants when available (.md then /.md) and preserves markdown formatting', async () => { + const calls = withMockFetch({ + 'https://docs.btca.dev/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.btca.dev/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.btca.dev/guides/cli-reference.md': { status: 404, body: 'not found' }, + 'https://docs.btca.dev/guides/cli-reference/.md': { + headers: { 'content-type': 'text/markdown' }, + body: '# CLI Reference\n\n- foo\n- bar\n' + }, + 'https://docs.btca.dev/guides/cli-reference': { + headers: { 'content-type': 'text/html' }, + body: 'SPA Shell' + } + }); + + const resource = await loadWebsiteResource({ + ...baseArgs(), + name: 'btca-docs', + url: 'https://docs.btca.dev/guides/cli-reference', + maxPages: 1, + maxDepth: 0 + }); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + const pagePath = path.join(resourcePath, 'pages/guides/cli-reference.md'); + expect(await Bun.file(pagePath).exists()).toBe(true); + + const content = await Bun.file(pagePath).text(); + expect(content).toContain('Source: https://docs.btca.dev/guides/cli-reference'); + expect(content).toContain('# CLI Reference'); + expect(content).toContain('\n- foo\n- bar\n'); + + expect(calls).toContain('https://docs.btca.dev/guides/cli-reference.md'); + expect(calls).toContain('https://docs.btca.dev/guides/cli-reference/.md'); + }); + + it('only probes markdown variants once per origin when unsupported', async () => { + const calls = withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: ` + Docs +
+ A + B +
+ + ` + }, + 'https://docs.example.com/docs/a': { + headers: { 'content-type': 'text/html' }, + body: 'A

A

' + }, + 'https://docs.example.com/docs/b': { + headers: { 'content-type': 'text/html' }, + body: 'B

B

' + } + }); + + const resource = await loadWebsiteResource({ + ...baseArgs(), + name: 'no-md', + url: 'https://docs.example.com/docs', + maxPages: 10, + maxDepth: 1 + }); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/a.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/b.md')).exists()).toBe(true); + + const probeCalls = calls.filter((url) => url.endsWith('.md') || url.endsWith('/.md')); + expect(probeCalls.length).toBe(2); + }); + + it('follows redirects for markdown-variant URLs', async () => { + let dotMdCalls = 0; + const calls = withMockFetch({ + 'https://bun.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://bun.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://bun.com/docs/runtime/binary-data.md': () => { + dotMdCalls += 1; + if (dotMdCalls === 1) { + return { + headers: { 'content-type': 'text/plain' }, + body: 'not markdown' + }; + } + return { + headers: { 'content-type': 'text/markdown' }, + body: '# Binary Data\n\nHello\n' + }; + }, + 'https://bun.com/docs/runtime/binary-data/.md': { + status: 302, + headers: { location: '/docs/runtime/binary-data.md' } + }, + 'https://bun.com/docs/runtime/binary-data': { + headers: { 'content-type': 'text/html' }, + body: 'Shell