diff --git a/apps/cli/src/client/index.ts b/apps/cli/src/client/index.ts index 909e4f37..550ec3d9 100644 --- a/apps/cli/src/client/index.ts +++ b/apps/cli/src/client/index.ts @@ -185,7 +185,17 @@ export interface LocalResourceInput { specialNotes?: string; } -export type ResourceInput = GitResourceInput | LocalResourceInput; +export interface WebsiteResourceInput { + type: 'website'; + name: string; + url: string; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; + specialNotes?: string; +} + +export type ResourceInput = GitResourceInput | LocalResourceInput | WebsiteResourceInput; /** * Add a new resource diff --git a/apps/cli/src/commands/add.ts b/apps/cli/src/commands/add.ts index e096ffeb..7663fdf8 100644 --- a/apps/cli/src/commands/add.ts +++ b/apps/cli/src/commands/add.ts @@ -1,24 +1,24 @@ import { Result } from 'better-result'; import { Command } from 'commander'; -import * as readline from 'readline'; import path from 'node:path'; -import { ensureServer } from '../server/manager.ts'; +import * as readline from 'readline'; + import { addResource, BtcaError } from '../client/index.ts'; import { dim } from '../lib/utils/colors.ts'; +import { ensureServer } from '../server/manager.ts'; interface GitHubUrlParts { owner: string; repo: string; } -/** - * Parse a GitHub URL and extract owner/repo. - */ -function parseGitHubUrl(url: string): GitHubUrlParts | null { - // Handle various GitHub URL formats: - // - https://github.com/owner/repo - // - https://github.com/owner/repo.git - // - github.com/owner/repo +type ResourceType = 'git' | 'local' | 'website'; + +const DEFAULT_WEBSITE_MAX_PAGES = 200; +const DEFAULT_WEBSITE_MAX_DEPTH = 3; +const DEFAULT_WEBSITE_TTL_HOURS = 24; + +const parseGitHubUrl = (url: string): GitHubUrlParts | null => { const patterns = [ /^https?:\/\/github\.com\/([^/]+)\/([^/]+?)(\.git)?$/, /^github\.com\/([^/]+)\/([^/]+?)(\.git)?$/ @@ -35,21 +35,15 @@ function parseGitHubUrl(url: string): GitHubUrlParts | null { } return null; -} +}; -/** - * Normalize GitHub URL to standard format. - */ -function normalizeGitHubUrl(url: string): string { +const normalizeGitHubUrl = (url: string) => { const parts = parseGitHubUrl(url); if (!parts) return url; return `https://github.com/${parts.owner}/${parts.repo}`; -} +}; -/** - * Format an error for display, including hint if available. - */ -function formatError(error: unknown): string { +const formatError = (error: unknown): string => { if (error instanceof BtcaError) { let output = `Error: ${error.message}`; if (error.hint) { @@ -58,52 +52,28 @@ function formatError(error: unknown): string { return output; } return `Error: ${error instanceof Error ? error.message : String(error)}`; -} +}; -/** - * Create a readline interface for prompts. - */ -function createRl(): readline.Interface { - return readline.createInterface({ - input: process.stdin, - output: process.stdout - }); -} +const createRl = () => readline.createInterface({ input: process.stdin, output: process.stdout }); -/** - * Prompt for input with a default value. - */ -async function promptInput( - rl: readline.Interface, - question: string, - defaultValue?: string -): Promise { - return new Promise((resolve) => { +const promptInput = async (rl: readline.Interface, question: string, defaultValue?: string) => + new Promise((resolve) => { const defaultHint = defaultValue ? ` ${dim(`(${defaultValue})`)}` : ''; rl.question(`${question}${defaultHint}: `, (answer) => { const value = answer.trim(); resolve(value || defaultValue || ''); }); }); -} -/** - * Prompt for confirmation (y/n). - */ -async function promptConfirm(rl: readline.Interface, question: string): Promise { - return new Promise((resolve) => { +const promptConfirm = async (rl: readline.Interface, question: string) => + new Promise((resolve) => { rl.question(`${question} ${dim('(y/n)')}: `, (answer) => { resolve(answer.trim().toLowerCase() === 'y'); }); }); -} -/** - * Prompt for repeated entries (search paths). - */ -async function promptRepeated(rl: readline.Interface, itemName: string): Promise { +const promptRepeated = async (rl: readline.Interface, itemName: string) => { const items: string[] = []; - console.log(`\nEnter ${itemName} one at a time. Press Enter with empty input when done.`); while (true) { @@ -113,47 +83,61 @@ async function promptRepeated(rl: readline.Interface, itemName: string): Promise } return items; -} +}; -/** - * Prompt for single selection from a list. - */ -async function promptSelect( +const promptSelect = async ( question: string, options: { label: string; value: T }[] -): Promise { - return new Promise((resolve, reject) => { - const rl = readline.createInterface({ - input: process.stdin, - output: process.stdout - }); +): Promise => + new Promise((resolve, reject) => { + const rl = createRl(); console.log(`\n${question}\n`); - options.forEach((opt, idx) => { - console.log(` ${idx + 1}) ${opt.label}`); + options.forEach((option, index) => { + console.log(` ${index + 1}) ${option.label}`); }); console.log(''); rl.question('Enter number: ', (answer) => { rl.close(); - const num = parseInt(answer.trim(), 10); - if (isNaN(num) || num < 1 || num > options.length) { + const selection = Number.parseInt(answer.trim(), 10); + if (!Number.isFinite(selection) || selection < 1 || selection > options.length) { reject(new Error('Invalid selection')); return; } - resolve(options[num - 1]!.value); + resolve(options[selection - 1]!.value); }); }); -} -/** - * Interactive wizard for adding a git resource. - */ -async function addGitResourceWizard( +const parseRequiredInt = (raw: string, field: string, min: number) => { + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed < min) { + throw new Error(`${field} must be an integer >= ${min}`); + } + return parsed; +}; + +const defaultWebsiteNameFromUrl = (url: string) => { + const result = Result.try(() => new URL(url)); + return result.match({ + ok: (parsed) => { + const host = parsed.hostname.split('.').filter(Boolean); + const base = host.length > 1 ? host[host.length - 2] : (host[0] ?? 'website'); + const slug = (parsed.pathname.split('/').filter(Boolean).join('-') || '').replace( + /[^a-zA-Z0-9._-]/g, + '' + ); + return slug ? `${base}-${slug}` : base; + }, + err: () => 'website' + }); +}; + +const addGitResourceWizard = async ( url: string, options: { global?: boolean }, globalOpts: { server?: string; port?: number } | undefined -): Promise { +) => { const urlParts = parseGitHubUrl(url); if (!urlParts) { console.error('Error: Invalid GitHub URL.'); @@ -162,61 +146,40 @@ async function addGitResourceWizard( } const normalizedUrl = normalizeGitHubUrl(url); - console.log('\n--- Add Git Resource ---\n'); console.log(`Repository: ${normalizedUrl}`); const rl = createRl(); - const result = await Result.tryPromise(async () => { - // Step 1: URL (prefilled, confirm) const finalUrl = await promptInput(rl, 'URL', normalizedUrl); - - // Step 2: Name (default = repo name) - const defaultName = urlParts.repo; - const name = await promptInput(rl, 'Name', defaultName); - - // Step 3: Branch (default = main) + const name = await promptInput(rl, 'Name', urlParts.repo); const branch = await promptInput(rl, 'Branch', 'main'); - - // Step 4: Search paths (optional, repeated) const wantSearchPaths = await promptConfirm( rl, 'Do you want to add search paths (subdirectories to focus on)?' ); const searchPaths = wantSearchPaths ? await promptRepeated(rl, 'Search path') : []; - - // Step 5: Notes (optional) const notes = await promptInput(rl, 'Notes (optional)'); - rl.close(); - // Summary console.log('\n--- Summary ---\n'); - console.log(` Type: git`); + console.log(' Type: git'); console.log(` Name: ${name}`); console.log(` URL: ${finalUrl}`); console.log(` Branch: ${branch}`); - if (searchPaths.length > 0) { - console.log(` Search: ${searchPaths.join(', ')}`); - } - if (notes) { - console.log(` Notes: ${notes}`); - } + if (searchPaths.length > 0) console.log(` Search: ${searchPaths.join(', ')}`); + if (notes) console.log(` Notes: ${notes}`); console.log(` Config: ${options.global ? 'global' : 'project'}`); console.log(''); - // Confirm const confirmRl = createRl(); const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); confirmRl.close(); - if (!confirmed) { console.log('\nCancelled.'); process.exit(0); } - // Add the resource via server const server = await ensureServer({ serverUrl: globalOpts?.server, port: globalOpts?.port, @@ -244,65 +207,43 @@ async function addGitResourceWizard( }); rl.close(); + if (Result.isError(result)) throw result.error; +}; - if (Result.isError(result)) { - throw result.error; - } -} - -/** - * Interactive wizard for adding a local resource. - */ -async function addLocalResourceWizard( +const addLocalResourceWizard = async ( localPath: string, options: { global?: boolean }, globalOpts: { server?: string; port?: number } | undefined -): Promise { - // Resolve the path +) => { const resolvedPath = path.isAbsolute(localPath) ? localPath : path.resolve(process.cwd(), localPath); - console.log('\n--- Add Local Resource ---\n'); console.log(`Directory: ${resolvedPath}`); const rl = createRl(); - const result = await Result.tryPromise(async () => { - // Step 1: Path (prefilled, confirm) const finalPath = await promptInput(rl, 'Path', resolvedPath); - - // Step 2: Name (default = directory name) - const defaultName = path.basename(finalPath); - const name = await promptInput(rl, 'Name', defaultName); - - // Step 3: Notes (optional) + const name = await promptInput(rl, 'Name', path.basename(finalPath)); const notes = await promptInput(rl, 'Notes (optional)'); - rl.close(); - // Summary console.log('\n--- Summary ---\n'); - console.log(` Type: local`); + console.log(' Type: local'); console.log(` Name: ${name}`); console.log(` Path: ${finalPath}`); - if (notes) { - console.log(` Notes: ${notes}`); - } + if (notes) console.log(` Notes: ${notes}`); console.log(` Config: ${options.global ? 'global' : 'project'}`); console.log(''); - // Confirm const confirmRl = createRl(); const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); confirmRl.close(); - if (!confirmed) { console.log('\nCancelled.'); process.exit(0); } - // Add the resource via server const server = await ensureServer({ serverUrl: globalOpts?.server, port: globalOpts?.port, @@ -317,28 +258,108 @@ async function addLocalResourceWizard( }); server.stop(); - console.log(`\nAdded resource: ${name}`); console.log('\nYou can now use this resource:'); console.log(` btca ask -r ${name} -q "your question"`); }); rl.close(); + if (Result.isError(result)) throw result.error; +}; - if (Result.isError(result)) { - throw result.error; - } -} +const addWebsiteResourceWizard = async ( + websiteUrl: string, + options: { global?: boolean }, + globalOpts: { server?: string; port?: number } | undefined +) => { + console.log('\n--- Add Website Resource ---\n'); + console.log(`Website: ${websiteUrl}`); + + const rl = createRl(); + const result = await Result.tryPromise(async () => { + const finalUrl = await promptInput(rl, 'URL', websiteUrl); + const name = await promptInput(rl, 'Name', defaultWebsiteNameFromUrl(finalUrl)); + const maxPages = parseRequiredInt( + await promptInput(rl, 'Max Pages', String(DEFAULT_WEBSITE_MAX_PAGES)), + 'maxPages', + 1 + ); + const maxDepth = parseRequiredInt( + await promptInput(rl, 'Max Depth', String(DEFAULT_WEBSITE_MAX_DEPTH)), + 'maxDepth', + 0 + ); + const ttlHours = parseRequiredInt( + await promptInput(rl, 'TTL Hours', String(DEFAULT_WEBSITE_TTL_HOURS)), + 'ttlHours', + 1 + ); + const notes = await promptInput(rl, 'Notes (optional)'); + rl.close(); + + console.log('\n--- Summary ---\n'); + console.log(' Type: website'); + console.log(` Name: ${name}`); + console.log(` URL: ${finalUrl}`); + console.log(` Max Pages: ${maxPages}`); + console.log(` Max Depth: ${maxDepth}`); + console.log(` TTL Hours: ${ttlHours}`); + if (notes) console.log(` Notes: ${notes}`); + console.log(` Config: ${options.global ? 'global' : 'project'}`); + console.log(''); + + const confirmRl = createRl(); + const confirmed = await promptConfirm(confirmRl, 'Add this resource?'); + confirmRl.close(); + if (!confirmed) { + console.log('\nCancelled.'); + process.exit(0); + } + + const server = await ensureServer({ + serverUrl: globalOpts?.server, + port: globalOpts?.port, + quiet: true + }); + + await addResource(server.url, { + type: 'website', + name, + url: finalUrl, + maxPages, + maxDepth, + ttlHours, + ...(notes && { specialNotes: notes }) + }); + + server.stop(); + console.log(`\nAdded resource: ${name}`); + console.log('\nYou can now use this resource:'); + console.log(` btca ask -r ${name} -q "your question"`); + }); + + rl.close(); + if (Result.isError(result)) throw result.error; +}; export const addCommand = new Command('add') - .description('Add a resource (git repository or local directory)') - .argument('[url-or-path]', 'GitHub repository URL or local directory path') + .description('Add a resource (git repository, website, or local directory)') + .argument('[url-or-path]', 'GitHub repository URL, website URL, or local directory path') .option('-g, --global', 'Add to global config instead of project config') .option('-n, --name ', 'Resource name') .option('-b, --branch ', 'Git branch (default: main)') .option('-s, --search-path ', 'Search paths within repo (can specify multiple)') + .option('--max-pages ', 'Max pages for website crawl', (value) => + Number.parseInt(value, 10) + ) + .option('--max-depth ', 'Max depth for website crawl', (value) => + Number.parseInt(value, 10) + ) + .option('--ttl-hours ', 'Website cache TTL in hours', (value) => + Number.parseInt(value, 10) + ) .option('--notes ', 'Special notes for the agent') - .option('-t, --type ', 'Resource type: git or local (auto-detected if not specified)') + .option('-t, --type ', 'Resource type: git, website, or local (auto-detected if omitted)') .action( async ( urlOrPath: string | undefined, @@ -347,6 +368,9 @@ export const addCommand = new Command('add') name?: string; branch?: string; searchPath?: string[]; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; notes?: string; type?: string; }, @@ -355,12 +379,12 @@ export const addCommand = new Command('add') const globalOpts = command.parent?.opts() as { server?: string; port?: number } | undefined; const result = await Result.tryPromise(async () => { - // If no argument provided, start interactive wizard if (!urlOrPath) { - const resourceType = await promptSelect<'git' | 'local'>( + const resourceType = await promptSelect( 'What type of resource do you want to add?', [ { label: 'Git repository', value: 'git' }, + { label: 'Website', value: 'website' }, { label: 'Local directory', value: 'local' } ] ); @@ -374,40 +398,46 @@ export const addCommand = new Command('add') process.exit(1); } await addGitResourceWizard(url, options, globalOpts); - } else { - const localPath = await promptInput(rl, 'Local path'); + return; + } + if (resourceType === 'website') { + const url = await promptInput(rl, 'Website URL'); rl.close(); - if (!localPath) { - console.error('Error: Path is required.'); + if (!url) { + console.error('Error: URL is required.'); process.exit(1); } - await addLocalResourceWizard(localPath, options, globalOpts); + await addWebsiteResourceWizard(url, options, globalOpts); + return; } + + const localPath = await promptInput(rl, 'Local path'); + rl.close(); + if (!localPath) { + console.error('Error: Path is required.'); + process.exit(1); + } + await addLocalResourceWizard(localPath, options, globalOpts); return; } - // Determine type from argument or explicit flag - let resourceType: 'git' | 'local' = 'git'; - + let resourceType: ResourceType = 'git'; if (options.type) { - if (options.type !== 'git' && options.type !== 'local') { - console.error('Error: --type must be "git" or "local"'); + if (options.type !== 'git' && options.type !== 'website' && options.type !== 'local') { + console.error('Error: --type must be "git", "website", or "local"'); process.exit(1); } - resourceType = options.type as 'git' | 'local'; + resourceType = options.type as ResourceType; } else { - // Auto-detect: if it looks like a URL, it's git; otherwise local - const isUrl = + const looksLikeUrl = urlOrPath.startsWith('http://') || urlOrPath.startsWith('https://') || urlOrPath.startsWith('github.com/') || urlOrPath.includes('github.com/'); - resourceType = isUrl ? 'git' : 'local'; + resourceType = looksLikeUrl ? 'git' : 'local'; } - // If all required options provided via flags, skip wizard if (options.name && resourceType === 'git' && parseGitHubUrl(urlOrPath)) { - // Non-interactive git add const normalizedUrl = normalizeGitHubUrl(urlOrPath); const server = await ensureServer({ serverUrl: globalOpts?.server, @@ -427,7 +457,6 @@ export const addCommand = new Command('add') }); server.stop(); - console.log(`Added git resource: ${options.name}`); if (resource.type === 'git' && resource.url !== normalizedUrl) { console.log(` URL normalized: ${resource.url}`); @@ -435,8 +464,46 @@ export const addCommand = new Command('add') return; } + if (options.name && resourceType === 'website') { + if (!urlOrPath.startsWith('http://') && !urlOrPath.startsWith('https://')) { + console.error('Error: website resources require an absolute URL.'); + process.exit(1); + } + const server = await ensureServer({ + serverUrl: globalOpts?.server, + port: globalOpts?.port, + quiet: true + }); + + const maxPages = options.maxPages ?? DEFAULT_WEBSITE_MAX_PAGES; + const maxDepth = options.maxDepth ?? DEFAULT_WEBSITE_MAX_DEPTH; + const ttlHours = options.ttlHours ?? DEFAULT_WEBSITE_TTL_HOURS; + if (!Number.isFinite(maxPages) || maxPages < 1) { + throw new Error('maxPages must be an integer >= 1'); + } + if (!Number.isFinite(maxDepth) || maxDepth < 0) { + throw new Error('maxDepth must be an integer >= 0'); + } + if (!Number.isFinite(ttlHours) || ttlHours < 1) { + throw new Error('ttlHours must be an integer >= 1'); + } + + await addResource(server.url, { + type: 'website', + name: options.name, + url: urlOrPath, + maxPages, + maxDepth, + ttlHours, + ...(options.notes && { specialNotes: options.notes }) + }); + + server.stop(); + console.log(`Added website resource: ${options.name}`); + return; + } + if (options.name && resourceType === 'local') { - // Non-interactive local add const resolvedPath = path.isAbsolute(urlOrPath) ? urlOrPath : path.resolve(process.cwd(), urlOrPath); @@ -458,12 +525,15 @@ export const addCommand = new Command('add') return; } - // Interactive wizard based on type if (resourceType === 'git') { await addGitResourceWizard(urlOrPath, options, globalOpts); - } else { - await addLocalResourceWizard(urlOrPath, options, globalOpts); + return; + } + if (resourceType === 'website') { + await addWebsiteResourceWizard(urlOrPath, options, globalOpts); + return; } + await addLocalResourceWizard(urlOrPath, options, globalOpts); }); if (Result.isError(result)) { diff --git a/apps/cli/src/commands/remove.ts b/apps/cli/src/commands/remove.ts index 2e992f61..a8175b0e 100644 --- a/apps/cli/src/commands/remove.ts +++ b/apps/cli/src/commands/remove.ts @@ -25,7 +25,17 @@ interface LocalResource { specialNotes?: string; } -type ResourceDefinition = GitResource | LocalResource; +interface WebsiteResource { + type: 'website'; + name: string; + url: string; + maxPages?: number; + maxDepth?: number; + ttlHours?: number; + specialNotes?: string; +} + +type ResourceDefinition = GitResource | LocalResource | WebsiteResource; const isGitResource = (r: ResourceDefinition): r is GitResource => r.type === 'git'; @@ -42,7 +52,7 @@ async function selectSingleResource(resources: ResourceDefinition[]): Promise { - const location = isGitResource(r) ? r.url : r.path; + const location = isGitResource(r) ? r.url : r.type === 'website' ? r.url : r.path; console.log(` ${idx + 1}. ${r.name} ${dim(`(${location})`)}`); }); console.log(''); diff --git a/apps/cli/src/commands/resources.ts b/apps/cli/src/commands/resources.ts index 02ef5779..46598451 100644 --- a/apps/cli/src/commands/resources.ts +++ b/apps/cli/src/commands/resources.ts @@ -47,6 +47,13 @@ export const resourcesCommand = new Command('resources') console.log(` Search Path: ${r.searchPath}`); } if (r.specialNotes) console.log(` Notes: ${r.specialNotes}`); + } else if (r.type === 'website') { + console.log(` ${r.name} (website)`); + console.log(` URL: ${r.url}`); + console.log(` Max Pages: ${r.maxPages ?? 200}`); + console.log(` Max Depth: ${r.maxDepth ?? 3}`); + console.log(` TTL Hours: ${r.ttlHours ?? 24}`); + if (r.specialNotes) console.log(` Notes: ${r.specialNotes}`); } else { console.log(` ${r.name} (local)`); console.log(` Path: ${r.path}`); diff --git a/apps/cli/src/tui/components/input-section.tsx b/apps/cli/src/tui/components/input-section.tsx index 5733061b..c3e0f48b 100644 --- a/apps/cli/src/tui/components/input-section.tsx +++ b/apps/cli/src/tui/components/input-section.tsx @@ -90,7 +90,7 @@ export const InputSection: Component = () => { // Validate resources - require at least one @mention OR existing thread resources if (parsed.repos.length === 0 && existingResources.length === 0) { - messages.addSystemMessage('Use @reponame to add context. Example: @svelte How do I...?'); + messages.addSystemMessage('Use @resource to add context. Example: @svelte How do I...?'); return; } if (!parsed.question.trim()) { @@ -108,7 +108,7 @@ export const InputSection: Component = () => { } if (invalidRepos.length > 0) { messages.addSystemMessage( - `Repo(s) not found: ${invalidRepos.join(', ')}. Configure resources with "btca add".` + `Resource(s) not found: ${invalidRepos.join(', ')}. Configure resources with "btca add".` ); return; } diff --git a/apps/cli/src/tui/components/main-input.tsx b/apps/cli/src/tui/components/main-input.tsx index 442a22a6..ab6e3b13 100644 --- a/apps/cli/src/tui/components/main-input.tsx +++ b/apps/cli/src/tui/components/main-input.tsx @@ -40,7 +40,7 @@ export const MainInput: Component = (props) => { if (props.isStreaming) { return 'press esc to cancel'; } - return '@repo question... or / for commands'; + return '@resource question... or / for commands'; }; const getPartValueLength = (p: InputState[number]) => diff --git a/apps/cli/src/tui/components/repo-mention-palette.tsx b/apps/cli/src/tui/components/repo-mention-palette.tsx index f0a27d7c..96211c3d 100644 --- a/apps/cli/src/tui/components/repo-mention-palette.tsx +++ b/apps/cli/src/tui/components/repo-mention-palette.tsx @@ -130,7 +130,7 @@ export const RepoMentionPalette: Component = (props) => padding: 1 }} > - + {(repo, i) => { const actualIndex = () => visibleRange().start + i(); diff --git a/apps/cli/src/tui/components/status-bar.tsx b/apps/cli/src/tui/components/status-bar.tsx index df382da9..51e73e5a 100644 --- a/apps/cli/src/tui/components/status-bar.tsx +++ b/apps/cli/src/tui/components/status-bar.tsx @@ -55,10 +55,10 @@ export const StatusBar: Component = (props) => { // Show different help based on whether we have thread resources if (props.threadResources.length > 0) { - return ' Ask follow-up or [@repo] to add context [/] Commands [Ctrl+Q] Quit'; + return ' Ask follow-up or [@resource] to add context [/] Commands [Ctrl+Q] Quit'; } - return ' [@repo] Ask question [/] Commands [Ctrl+Q] Quit'; + return ' [@resource] Ask question [/] Commands [Ctrl+Q] Quit'; }; const getResourcesLabel = () => { diff --git a/apps/cli/src/tui/services.ts b/apps/cli/src/tui/services.ts index a1c6c14f..687ec0c2 100644 --- a/apps/cli/src/tui/services.ts +++ b/apps/cli/src/tui/services.ts @@ -38,20 +38,20 @@ export interface ModelUpdateResult { export const services = { /** - * Get all resources as Repos (only git resources for now) + * Get mentionable resources for the TUI. */ getRepos: async (): Promise => { const client = createClient(getServerUrl()); const { resources } = await getResources(client); return resources - .filter((r) => r.type === 'git') + .filter((r) => r.type === 'git' || r.type === 'website') .map((r) => ({ name: r.name, url: r.url ?? '', - branch: r.branch ?? 'main', + branch: r.type === 'git' ? (r.branch ?? 'main') : 'website', specialNotes: r.specialNotes ?? undefined, - searchPath: r.searchPath ?? undefined, - searchPaths: r.searchPaths ?? undefined + searchPath: r.type === 'git' ? (r.searchPath ?? undefined) : undefined, + searchPaths: r.type === 'git' ? (r.searchPaths ?? undefined) : undefined })); }, diff --git a/apps/docs/api-reference/local/config-resources.mdx b/apps/docs/api-reference/local/config-resources.mdx index b3a3d266..aa638ce5 100644 --- a/apps/docs/api-reference/local/config-resources.mdx +++ b/apps/docs/api-reference/local/config-resources.mdx @@ -3,4 +3,4 @@ title: 'Add a resource' openapi: 'POST /config/resources' --- -Adds a git or local resource to the current config. +Adds a git, local, or website resource to the current config. diff --git a/apps/docs/api-reference/local/resources.mdx b/apps/docs/api-reference/local/resources.mdx index 8ab21986..f3da82f8 100644 --- a/apps/docs/api-reference/local/resources.mdx +++ b/apps/docs/api-reference/local/resources.mdx @@ -3,4 +3,4 @@ title: 'List resources' openapi: 'GET /resources' --- -Lists all configured resources from the active config. +Lists all configured resources from the active config (git, local, and website). diff --git a/apps/docs/btca.spec.md b/apps/docs/btca.spec.md index 96365dae..01d5251a 100644 --- a/apps/docs/btca.spec.md +++ b/apps/docs/btca.spec.md @@ -138,6 +138,14 @@ Example: "type": "local", "name": "internal-docs", "path": "/abs/path/docs" + }, + { + "type": "website", + "name": "public-docs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 } ] } @@ -217,7 +225,7 @@ REPL supports `@resource` mentions. ### 4.3 `btca add [url-or-path]` -Add a git repo or local directory resource. +Add a git repo, website, or local directory resource. Options: @@ -225,14 +233,19 @@ Options: - `-n, --name ` - `-b, --branch ` (default `main`) - `-s, --search-path ` +- `--max-pages ` (website, default `200`) +- `--max-depth ` (website, default `3`) +- `--ttl-hours ` (website, default `24`) - `--notes ` -- `-t, --type ` +- `-t, --type ` Behavior: - If no argument, interactive wizard. - If `--type` omitted, auto‑detects URL vs path. - Git URLs are normalized to base repo when GitHub. +- Website resources require an absolute HTTPS URL. +- Website crawling may probe for markdown-friendly variants by appending `.md` and `/.md` to page paths (and follow same-origin redirects) to improve extraction on SPA docs sites. - Local paths are resolved to absolute paths. ### 4.4 `btca remove [name]` @@ -520,6 +533,15 @@ Response: "type": "local", "path": "/abs/path/docs", "specialNotes": null + }, + { + "name": "public-docs", + "type": "website", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24, + "specialNotes": null } ] } @@ -623,6 +645,19 @@ Request (local): { "type": "local", "name": "docs", "path": "/abs/path/docs" } ``` +Request (website): + +```json +{ + "type": "website", + "name": "public-docs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 +} +``` + Response: the created resource (GitHub URLs normalized to base repo). ### 6.10 `DELETE /config/resources` @@ -763,6 +798,13 @@ Git URL validation: - No localhost or private IPs - GitHub URLs normalized to base repo +Website URL validation: + +- HTTPS only +- No embedded credentials +- No localhost or private IPs +- Defaults: `maxPages=200`, `maxDepth=3`, `ttlHours=24` + --- ## 9. Remote Cloud API (used by CLI) diff --git a/apps/docs/guides/cli-reference.mdx b/apps/docs/guides/cli-reference.mdx index 1ddc3f22..78c5c3fc 100644 --- a/apps/docs/guides/cli-reference.mdx +++ b/apps/docs/guides/cli-reference.mdx @@ -31,7 +31,7 @@ The REPL supports `@resource` mentions. ## `btca add [url-or-path]` -Adds a git repo or local directory resource. +Adds a git repo, website, or local directory resource. Options: @@ -40,9 +40,12 @@ Options: - `-b, --branch ` sets a branch (default `main`). - `-s, --search-path ` sets one or more search paths. - `--notes ` sets special notes. -- `-t, --type ` forces the resource type. +- `-t, --type ` forces the resource type. +- `--max-pages ` sets a website crawl page cap (default `200`). +- `--max-depth ` sets a website crawl depth cap (default `3`). +- `--ttl-hours ` sets website cache TTL hours (default `24`). -Behavior: If no argument is provided, the CLI starts an interactive wizard. When `--type` is omitted, it auto-detects URL vs path. GitHub URLs are normalized to the base repo. Local paths are resolved to absolute paths. +Behavior: If no argument is provided, the CLI starts an interactive wizard. When `--type` is omitted, it auto-detects URL vs path (`git` for URLs, `local` for paths). GitHub URLs are normalized to the base repo. Local paths are resolved to absolute paths. Example (local path): diff --git a/apps/docs/guides/configuration.mdx b/apps/docs/guides/configuration.mdx index c5f5fba6..6a3dab42 100644 --- a/apps/docs/guides/configuration.mdx +++ b/apps/docs/guides/configuration.mdx @@ -42,6 +42,14 @@ Example: "branch": "main", "searchPath": "apps/svelte.dev", "specialNotes": "Focus on docs content" + }, + { + "type": "website", + "name": "myDocs", + "url": "https://example.com/docs", + "maxPages": 200, + "maxDepth": 3, + "ttlHours": 24 } ] } @@ -104,6 +112,8 @@ Example: - Question length: max 100,000 chars - Resources per request: max 20 - Git URL: HTTPS only, no embedded credentials, no localhost/private IPs +- Website URL: HTTPS only, no embedded credentials, no localhost/private IPs +- Website defaults: `maxPages=200`, `maxDepth=3`, `ttlHours=24` - GitHub URLs are normalized to the base repo. ## Known gaps diff --git a/apps/server/package.json b/apps/server/package.json index 304abc89..ff1ed30d 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -58,6 +58,7 @@ "@btca/shared": "workspace:*", "ai": "^6.0.49", "better-result": "^2.6.0", + "cheerio": "^1.2.0", "hono": "^4.7.11", "just-bash": "^2.7.0", "opencode-ai": "^1.1.36", diff --git a/apps/server/src/collections/service.ts b/apps/server/src/collections/service.ts index ba9affb0..4f45b51f 100644 --- a/apps/server/src/collections/service.ts +++ b/apps/server/src/collections/service.ts @@ -7,7 +7,7 @@ import { Transaction } from '../context/transaction.ts'; import { CommonHints, getErrorHint, getErrorMessage } from '../errors.ts'; import { Metrics } from '../metrics/index.ts'; import { Resources } from '../resources/service.ts'; -import { isGitResource } from '../resources/schema.ts'; +import { isGitResource, isWebsiteResource } from '../resources/schema.ts'; import { FS_RESOURCE_SYSTEM_NOTE, type BtcaFsResource } from '../resources/types.ts'; import { CollectionError, getCollectionKey, type CollectionResult } from './types.ts'; import { VirtualFs } from '../vfs/virtual-fs.ts'; @@ -150,6 +150,26 @@ export namespace Collections { repoSubPaths: args.resource.repoSubPaths, loadedAt: args.loadedAt }; + if (isWebsiteResource(args.definition)) { + const manifestResult = await Result.tryPromise(() => + Bun.file(path.join(args.resourcePath, '.btca-website-manifest.json')).text() + ); + const crawledAt = manifestResult.match({ + ok: (content) => { + const parsedResult = Result.try(() => JSON.parse(content) as { crawledAt?: string }); + return parsedResult.match({ + ok: (parsed) => parsed.crawledAt, + err: () => undefined + }); + }, + err: () => undefined + }); + return { + ...base, + url: args.definition.url, + crawledAt + }; + } if (!isGitResource(args.definition)) return base; const commit = await getGitHeadHash(args.resourcePath); return { diff --git a/apps/server/src/collections/virtual-metadata.ts b/apps/server/src/collections/virtual-metadata.ts index ab33ce38..f25a22a8 100644 --- a/apps/server/src/collections/virtual-metadata.ts +++ b/apps/server/src/collections/virtual-metadata.ts @@ -1,12 +1,13 @@ export type VirtualResourceMetadata = { name: string; fsName: string; - type: 'git' | 'local'; + type: 'git' | 'local' | 'website'; path: string; repoSubPaths: readonly string[]; url?: string; branch?: string; commit?: string; + crawledAt?: string; loadedAt: string; }; diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index 3af8eacd..9618dc8a 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -11,7 +11,11 @@ import { Context } from './context/index.ts'; import { getErrorMessage, getErrorTag, getErrorHint } from './errors.ts'; import { Metrics } from './metrics/index.ts'; import { Resources } from './resources/service.ts'; -import { GitResourceSchema, LocalResourceSchema } from './resources/schema.ts'; +import { + GitResourceSchema, + LocalResourceSchema, + WebsiteResourceSchema +} from './resources/schema.ts'; import { StreamService } from './stream/service.ts'; import type { BtcaStreamMetaEvent } from './stream/types.ts'; import { LIMITS, normalizeGitHubUrl } from './validation/index.ts'; @@ -141,9 +145,35 @@ const AddLocalResourceRequestSchema = z.object({ specialNotes: LocalResourceSchema.shape.specialNotes }); +const AddWebsiteResourceRequestSchema = z.object({ + type: z.literal('website'), + name: WebsiteResourceSchema.shape.name, + url: WebsiteResourceSchema.shape.url, + maxPages: z.coerce + .number() + .int('maxPages must be an integer') + .min(1) + .max(LIMITS.WEBSITE_MAX_PAGES_MAX) + .optional(), + maxDepth: z.coerce + .number() + .int('maxDepth must be an integer') + .min(0) + .max(LIMITS.WEBSITE_MAX_DEPTH_MAX) + .optional(), + ttlHours: z.coerce + .number() + .int('ttlHours must be an integer') + .min(1) + .max(LIMITS.WEBSITE_TTL_HOURS_MAX) + .optional(), + specialNotes: WebsiteResourceSchema.shape.specialNotes +}); + const AddResourceRequestSchema = z.discriminatedUnion('type', [ AddGitResourceRequestSchema, - AddLocalResourceRequestSchema + AddLocalResourceRequestSchema, + AddWebsiteResourceRequestSchema ]); const RemoveResourceRequestSchema = z.object({ @@ -263,14 +293,24 @@ const createApp = (deps: { searchPaths: r.searchPaths ?? null, specialNotes: r.specialNotes ?? null }; - } else { + } + if (r.type === 'website') { return { name: r.name, type: r.type, - path: r.path, + url: r.url, + maxPages: r.maxPages, + maxDepth: r.maxDepth, + ttlHours: r.ttlHours, specialNotes: r.specialNotes ?? null }; } + return { + name: r.name, + type: r.type, + path: r.path, + specialNotes: r.specialNotes ?? null + }; }) }); }) @@ -407,16 +447,28 @@ const createApp = (deps: { }; const added = await config.addResource(resource); return c.json(added, 201); - } else { + } + if (decoded.type === 'website') { const resource = { - type: 'local' as const, + type: 'website' as const, name: decoded.name, - path: decoded.path, + url: decoded.url, + maxPages: decoded.maxPages ?? LIMITS.WEBSITE_DEFAULT_MAX_PAGES, + maxDepth: decoded.maxDepth ?? LIMITS.WEBSITE_DEFAULT_MAX_DEPTH, + ttlHours: decoded.ttlHours ?? LIMITS.WEBSITE_DEFAULT_TTL_HOURS, ...(decoded.specialNotes && { specialNotes: decoded.specialNotes }) }; const added = await config.addResource(resource); return c.json(added, 201); } + const resource = { + type: 'local' as const, + name: decoded.name, + path: decoded.path, + ...(decoded.specialNotes && { specialNotes: decoded.specialNotes }) + }; + const added = await config.addResource(resource); + return c.json(added, 201); }) // DELETE /config/resources - Remove a resource diff --git a/apps/server/src/resources/impls/website.test.ts b/apps/server/src/resources/impls/website.test.ts new file mode 100644 index 00000000..dcc68763 --- /dev/null +++ b/apps/server/src/resources/impls/website.test.ts @@ -0,0 +1,346 @@ +import { afterEach, beforeEach, describe, expect, it } from 'bun:test'; +import { promises as fs } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { GlobTool } from '../../tools/glob.ts'; +import { GrepTool } from '../../tools/grep.ts'; +import { ListTool } from '../../tools/list.ts'; +import { ReadTool } from '../../tools/read.ts'; +import { VirtualFs } from '../../vfs/virtual-fs.ts'; +import { loadWebsiteResource } from './website.ts'; + +const FIXTURE_URL = 'https://docs.example.com/docs'; + +type MockResponseInit = { + status?: number; + headers?: Record; + body?: string; +}; + +type MockRoutes = Record MockResponseInit)>; + +describe('Website Resource', () => { + let tempDir = ''; + let originalFetch: typeof fetch; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'btca-website-test-')); + originalFetch = globalThis.fetch; + }); + + afterEach(async () => { + globalThis.fetch = originalFetch; + VirtualFs.disposeAll(); + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + const withMockFetch = (routes: MockRoutes, fallback?: () => never) => { + const calls: string[] = []; + globalThis.fetch = (async (input: string | URL | Request) => { + const url = + typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url; + calls.push(url); + const route = routes[url]; + if (!route) { + if (fallback) fallback(); + return new Response('not found', { status: 404 }); + } + const response = typeof route === 'function' ? route() : route; + return new Response(response.body ?? '', { + status: response.status ?? 200, + headers: response.headers + }); + }) as typeof fetch; + return calls; + }; + + const baseArgs = () => ({ + type: 'website' as const, + name: 'docs-site', + url: FIXTURE_URL, + maxPages: 10, + maxDepth: 3, + ttlHours: 24, + resourcesDirectoryPath: tempDir, + specialAgentInstructions: '', + quiet: true + }); + + it('rejects non-HTTPS website URLs', async () => { + expect( + loadWebsiteResource({ + ...baseArgs(), + url: 'http://docs.example.com/docs' + }) + ).rejects.toThrow(); + }); + + it('crawls website pages, respects robots, and supports tools over snapshot files', async () => { + withMockFetch({ + 'https://docs.example.com/robots.txt': { + body: 'User-agent: *\nDisallow: /docs/private\n' + }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: ` + https://docs.example.com/docs/getting-started + https://docs.example.com/docs/private + ` + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: ` + Docs Home +
+

Docs Home

+

Welcome to docs.

+ Start + Private +
+ + ` + }, + 'https://docs.example.com/docs/getting-started': { + headers: { 'content-type': 'text/html' }, + body: ` + Getting Started +
+

Getting Started

+

Install and run.

+
+ + ` + }, + 'https://docs.example.com/docs/private': { + headers: { 'content-type': 'text/html' }, + body: 'Private

blocked

' + } + }); + + const resource = await loadWebsiteResource(baseArgs()); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/getting-started.md')).exists()).toBe( + true + ); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/private.md')).exists()).toBe(false); + + const indexLines = (await Bun.file(path.join(resourcePath, '_index.jsonl')).text()) + .split('\n') + .filter(Boolean); + expect(indexLines.length).toBe(2); + + const vfsId = VirtualFs.create(); + await VirtualFs.mkdir('/', { recursive: true }, vfsId); + await VirtualFs.importDirectoryFromDisk({ + sourcePath: resourcePath, + destinationPath: '/docs-site', + vfsId + }); + + const context = { basePath: '/', vfsId }; + const listResult = await ListTool.execute({ path: '.' }, context); + expect(listResult.output).toContain('docs-site'); + + const globResult = await GlobTool.execute({ pattern: '**/*.md' }, context); + expect(globResult.output).toContain('docs-site/pages/docs/getting-started.md'); + + const grepResult = await GrepTool.execute({ pattern: 'Getting Started' }, context); + expect(grepResult.output).toContain('docs-site/pages/docs/getting-started.md'); + + const readResult = await ReadTool.execute({ path: 'docs-site/pages/docs.md' }, context); + expect(readResult.output).toContain('Source: https://docs.example.com/docs'); + + VirtualFs.dispose(vfsId); + }); + + it('uses cached snapshot when still fresh', async () => { + const initialCalls = withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: 'Docs

cached

' + } + }); + + const resource = await loadWebsiteResource(baseArgs()); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + expect(initialCalls.length).toBeGreaterThan(0); + + const cachedCalls = withMockFetch({}, () => { + throw new Error('fetch should not be called for fresh cache'); + }); + const cached = await loadWebsiteResource(baseArgs()); + expect(await cached.getAbsoluteDirectoryPath()).toBe(resourcePath); + expect(cachedCalls.length).toBe(0); + }); + + it('falls back to stale cache when re-crawl fails', async () => { + withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: 'Docs

cached

' + } + }); + + const seeded = await loadWebsiteResource({ ...baseArgs(), ttlHours: 1 }); + const resourcePath = await seeded.getAbsoluteDirectoryPath(); + const manifestPath = path.join(resourcePath, '.btca-website-manifest.json'); + const manifest = JSON.parse(await Bun.file(manifestPath).text()) as { crawledAt: string }; + manifest.crawledAt = new Date(Date.now() - 48 * 60 * 60 * 1000).toISOString(); + await Bun.write(manifestPath, JSON.stringify(manifest)); + + withMockFetch({ + 'https://docs.example.com/robots.txt': () => { + throw new Error('network failure'); + } + }); + + const fallback = await loadWebsiteResource({ ...baseArgs(), ttlHours: 1 }); + expect(await fallback.getAbsoluteDirectoryPath()).toBe(resourcePath); + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + }); + + it('prefers markdown variants when available (.md then /.md) and preserves markdown formatting', async () => { + const calls = withMockFetch({ + 'https://docs.btca.dev/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.btca.dev/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.btca.dev/guides/cli-reference.md': { status: 404, body: 'not found' }, + 'https://docs.btca.dev/guides/cli-reference/.md': { + headers: { 'content-type': 'text/markdown' }, + body: '# CLI Reference\n\n- foo\n- bar\n' + }, + 'https://docs.btca.dev/guides/cli-reference': { + headers: { 'content-type': 'text/html' }, + body: 'SPA Shell' + } + }); + + const resource = await loadWebsiteResource({ + ...baseArgs(), + name: 'btca-docs', + url: 'https://docs.btca.dev/guides/cli-reference', + maxPages: 1, + maxDepth: 0 + }); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + const pagePath = path.join(resourcePath, 'pages/guides/cli-reference.md'); + expect(await Bun.file(pagePath).exists()).toBe(true); + + const content = await Bun.file(pagePath).text(); + expect(content).toContain('Source: https://docs.btca.dev/guides/cli-reference'); + expect(content).toContain('# CLI Reference'); + expect(content).toContain('\n- foo\n- bar\n'); + + expect(calls).toContain('https://docs.btca.dev/guides/cli-reference.md'); + expect(calls).toContain('https://docs.btca.dev/guides/cli-reference/.md'); + }); + + it('only probes markdown variants once per origin when unsupported', async () => { + const calls = withMockFetch({ + 'https://docs.example.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://docs.example.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://docs.example.com/docs': { + headers: { 'content-type': 'text/html' }, + body: ` + Docs +
+ A + B +
+ + ` + }, + 'https://docs.example.com/docs/a': { + headers: { 'content-type': 'text/html' }, + body: 'A

A

' + }, + 'https://docs.example.com/docs/b': { + headers: { 'content-type': 'text/html' }, + body: 'B

B

' + } + }); + + const resource = await loadWebsiteResource({ + ...baseArgs(), + name: 'no-md', + url: 'https://docs.example.com/docs', + maxPages: 10, + maxDepth: 1 + }); + const resourcePath = await resource.getAbsoluteDirectoryPath(); + + expect(await Bun.file(path.join(resourcePath, 'pages/docs.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/a.md')).exists()).toBe(true); + expect(await Bun.file(path.join(resourcePath, 'pages/docs/b.md')).exists()).toBe(true); + + const probeCalls = calls.filter((url) => url.endsWith('.md') || url.endsWith('/.md')); + expect(probeCalls.length).toBe(2); + }); + + it('follows redirects for markdown-variant URLs', async () => { + let dotMdCalls = 0; + const calls = withMockFetch({ + 'https://bun.com/robots.txt': { body: 'User-agent: *\nAllow: /\n' }, + 'https://bun.com/sitemap.xml': { + headers: { 'content-type': 'application/xml' }, + body: '' + }, + 'https://bun.com/docs/runtime/binary-data.md': () => { + dotMdCalls += 1; + if (dotMdCalls === 1) { + return { + headers: { 'content-type': 'text/plain' }, + body: 'not markdown' + }; + } + return { + headers: { 'content-type': 'text/markdown' }, + body: '# Binary Data\n\nHello\n' + }; + }, + 'https://bun.com/docs/runtime/binary-data/.md': { + status: 302, + headers: { location: '/docs/runtime/binary-data.md' } + }, + 'https://bun.com/docs/runtime/binary-data': { + headers: { 'content-type': 'text/html' }, + body: 'Shell