diff --git a/src/ai/historian.ts b/src/ai/historian.ts index 5798adb..7038f51 100644 --- a/src/ai/historian.ts +++ b/src/ai/historian.ts @@ -62,6 +62,6 @@ export class Historian extends HistorianBase { writeFileSync(filePath, content); this.savedFiles.add(filePath); - tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`); + tag('operation').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`); } } diff --git a/src/ai/historian/codeceptjs.ts b/src/ai/historian/codeceptjs.ts index 6f63696..8b14779 100644 --- a/src/ai/historian/codeceptjs.ts +++ b/src/ai/historian/codeceptjs.ts @@ -102,7 +102,7 @@ export function WithCodeceptJS(Base: T) { writeFileSync(filePath, lines.join('\n')); this.savedFiles.add(filePath); - tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`); + tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`); return filePath; } diff --git a/src/ai/historian/experience.ts b/src/ai/historian/experience.ts index 9d52f17..71a1ccd 100644 --- a/src/ai/historian/experience.ts +++ b/src/ai/historian/experience.ts @@ -56,7 +56,7 @@ export function WithExperience(Base: T) { await this.stopScreencast(); - tag('substep').log(`Historian saved session for: ${task.description}`); + tag('operation').log(`Historian saved session for: ${task.description}`); } private async reportSession(test: Test, steps: SessionStep[]): Promise { diff --git a/src/ai/historian/playwright.ts b/src/ai/historian/playwright.ts index ac45592..892e66f 100644 --- a/src/ai/historian/playwright.ts +++ b/src/ai/historian/playwright.ts @@ -140,7 +140,7 @@ export function WithPlaywright(Base: T) { writeFileSync(filePath, lines.join('\n')); this.savedFiles.add(filePath); - tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`); + tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`); return filePath; } diff --git a/src/ai/historian/screencast.ts b/src/ai/historian/screencast.ts index 15e03f0..eda1d87 100644 --- a/src/ai/historian/screencast.ts +++ b/src/ai/historian/screencast.ts @@ -92,7 +92,7 @@ export function WithScreencast(Base: T) { this.screencastTask = test?._explorbotTest || null; this.screencastLastChapter = null; } catch (err) { - tag('substep').log(`Screencast start failed: ${(err as Error).message}`); + tag('operation').log(`Screencast start failed: ${(err as Error).message}`); } } @@ -116,7 +116,7 @@ export function WithScreencast(Base: T) { try { await this.screencastPage.screencast.stop(); } catch (err) { - tag('substep').log(`Screencast stop failed: ${(err as Error).message}`); + tag('operation').log(`Screencast stop failed: ${(err as Error).message}`); } this.screencastActive = false; this.screencastPage = null; @@ -126,7 +126,7 @@ export function WithScreencast(Base: T) { if (path) { this.savedFiles.add(path); task?.addArtifact?.(path); - tag('substep').log(`Saved screencast: ${relativeToCwd(path)}`); + tag('operation').log(`Saved screencast: ${relativeToCwd(path)}`); } } }; diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts index 92c24f2..ba78398 100644 --- a/src/ai/navigator.ts +++ b/src/ai/navigator.ts @@ -206,7 +206,7 @@ class Navigator implements Agent { if (!actionResult.isInsideIframe) { const successful = this.experienceTracker.getSuccessfulExperience(actionResult); if (successful.length > 0) { - tag('substep').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`); + tag('operation').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`); experience = `\nPast successful recipes recorded from prior runs for this page. Prefer these solutions first if they match the goal.\n\n${successful.join('\n\n')}\n`; } } @@ -307,7 +307,7 @@ class Navigator implements Agent { stop(); return; } - tag('substep').log('Feeding failures back to AI for a new batch...'); + tag('operation').log('Feeding failures back to AI for a new batch...'); let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n'; if (batchFailures.length > 0) { const lines = batchFailures @@ -633,7 +633,7 @@ class Navigator implements Agent { const cachedVerification = actionResult.getVerification(message); if (cachedVerification !== null) { - tag('substep').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`); + tag('operation').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`); return { verified: cachedVerification, successfulCodes: [], assertionSteps: [], totalAttempted: 0 }; } @@ -654,7 +654,7 @@ class Navigator implements Agent { const toc = this.experienceTracker.getExperienceTableOfContents(actionResult); if (toc.length > 0) { const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0); - tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`); + tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`); experience = renderExperienceToc(toc); } } diff --git a/src/ai/pilot.ts b/src/ai/pilot.ts index 86854ad..e92f4be 100644 --- a/src/ai/pilot.ts +++ b/src/ai/pilot.ts @@ -104,7 +104,7 @@ export class Pilot implements Agent { const schema = z.object({ decision: z.enum(['pass', 'fail', 'continue', 'skipped']).describe('pass = test succeeded, fail = test failed, continue = tester should keep going, skipped = scenario is irrelevant OR systematic execution failures prevented testing'), - reason: z.string().describe('What happened and why (1-2 sentences). Do NOT repeat the decision status (e.g. "scenario goal achieved/not achieved") — just explain the evidence. For continue: explain why rejected and suggest alternatives.'), + reason: z.string().describe('Concise user-facing reason, maximum 1 short sentence and 120 characters. Do NOT repeat the decision status; explain only the evidence. For continue: explain why rejected and suggest alternatives.'), guidance: z.string().nullable().describe('Required for "continue": specific actionable instruction for the tester — what exactly to verify, retry differently, or complete next. Be concrete.'), requestVerification: z .string() @@ -177,7 +177,7 @@ export class Pilot implements Agent { } } - tag('info').log(`Pilot: ${result.decision} — ${result.reason}`); + tag('info').log(`Pilot: ${result.decision} - ${result.reason}`); task.summary = result.reason; const verdictState = screenshotState || currentState; @@ -221,7 +221,7 @@ export class Pilot implements Agent { const schema = z.object({ decision: z.enum(['allow', 'fail', 'continue', 'skipped']).describe('allow = reset proceeds, fail = test failed (stop looping), continue = veto reset, tester should act on current page instead, skipped = scenario is irrelevant or cannot be executed'), - reason: z.string().describe('What evidence justifies this decision (1-2 sentences). Do not restate the decision.'), + reason: z.string().describe('Concise evidence-only reason, maximum 1 short sentence and 120 characters. Do not restate the decision.'), guidance: z.string().nullable().describe('Required for "continue": concrete instruction for what the tester should do instead of resetting (e.g. which tool to call, what to verify).'), }); @@ -388,8 +388,9 @@ export class Pilot implements Agent { - "continue": tester hasn't completed the goal; provide concrete guidance (which tool, what to check). If a verify() asserted a state that was ALREADY TRUE before the test, it proves nothing — reject. - reason field: do NOT restate the decision ("scenario goal achieved/not achieved"). State what happened — - what was verified, what failed, what evidence was found. + reason field: one short sentence, maximum 120 characters. Do NOT restate the decision + ("scenario goal achieved/not achieved"). State what happened: what was verified, what failed, + or what evidence was found. `; } @@ -1017,6 +1018,8 @@ export class Pilot implements Agent { Response format: PROGRESS: <1 sentence assessment> NEXT: + + Keep user-facing reasons concise: one short sentence, maximum 120 characters, evidence only, no repeated verdict wording. `; } } diff --git a/src/ai/planner.ts b/src/ai/planner.ts index e523f95..fd7ee8a 100644 --- a/src/ai/planner.ts +++ b/src/ai/planner.ts @@ -36,7 +36,7 @@ const TasksSchema = z.object({ scenario: z.string().describe('A single sentence describing what to test'), priority: z.enum(['critical', 'important', 'high', 'normal', 'low']).describe('Priority of the task based on business importance'), startUrl: z.string().nullable().describe('Start URL for the test if different from plan URL (only for tests on visited subpages)'), - steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Click on Login button", "Enter username in email field", "Submit the form"). Keep steps atomic and actionable.'), + steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Open the form", "Enter required data", "Submit the form"). Keep steps atomic and actionable.'), expectedOutcomes: z .array(z.string()) .describe('List of expected outcomes that can be verified. Each outcome should be simple, specific, and easy to check (e.g., "Success message appears", "URL changes to /dashboard", "Form field shows error"). Keep outcomes atomic - do not combine multiple checks into one.'), @@ -226,9 +226,7 @@ export class Planner extends PlannerBase implements Agent { } } - const availableStyles = Object.keys(getStyles()).join(', '); tag('success').log(`Planning complete! ${this.currentPlan.tests.length} tests in plan: ${this.currentPlan.title}`); - tag('info').log(`Planning style: ${this.lastStyleName} (available: ${availableStyles})`); if (state.url) registerPlan(state.url, this.currentPlan, feature, state.hash); diff --git a/src/ai/provider.ts b/src/ai/provider.ts index 30065d6..2a1a007 100644 --- a/src/ai/provider.ts +++ b/src/ai/provider.ts @@ -294,8 +294,11 @@ export class Provider { } throw new ContextLengthError(error.message || error.toString()); } - tag('error').log(error.message || error.toString()); - throw new AiError(error.message || error.toString()); + const message = error.message || error.toString(); + if (message !== 'No response text from AI') { + tag('error').log(message); + } + throw new AiError(message); } } @@ -376,7 +379,6 @@ export class Provider { } catch (error: any) { clearActivity(); if (error?.message?.includes('Tool choice is required')) { - tag('warning').log('Model completed without calling a tool, returning empty result'); return { text: '', toolCalls: [], toolResults: [], response: { messages: [] }, usage: null }; } if (error?.name === 'AbortError') throw error; diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts index 5d3c4cf..1db107e 100644 --- a/src/ai/researcher.ts +++ b/src/ai/researcher.ts @@ -151,7 +151,7 @@ export class Researcher extends ResearcherBase implements Agent { if (!deep && !force) { const similar = await findSimilarResearch(combinedHtml); if (similar) { - tag('substep').log('Similar research found, reusing cached result'); + tag('operation').log('Similar research found, reusing cached result'); if (stateHash) saveResearch(stateHash, similar, combinedHtml); tag('multiline').log(formatResearchSummary(similar)); tag('success').log('Research complete (reused)'); @@ -316,10 +316,10 @@ export class Researcher extends ResearcherBase implements Agent { tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze })); tag('success').log('Research complete'); - if (researchFile) tag('substep').log(`Research file saved to: ${researchFile}`); + if (researchFile) tag('operation').log(`Research file saved to: ${researchFile}`); if (this.actionResult?.screenshotFile) { const screenshotPath = outputPath('states', this.actionResult.screenshotFile); - tag('substep').log(`UI screenshot: file://${screenshotPath}`); + tag('operation').log(`UI screenshot: file://${screenshotPath}`); } await this.hooksRunner.runAfterHook('researcher', state.url); @@ -467,7 +467,7 @@ export class Researcher extends ResearcherBase implements Agent { .filter((k) => !!k) .join('\n\n'); - tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`); + tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`); knowledge = ` Here is relevant knowledge for this page: diff --git a/src/ai/researcher/locators.ts b/src/ai/researcher/locators.ts index fea3c46..1c1fae6 100644 --- a/src/ai/researcher/locators.ts +++ b/src/ai/researcher/locators.ts @@ -80,7 +80,7 @@ export function WithLocators(Base: T) { } } - tag('substep').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`); + tag('operation').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`); } async fixBrokenSections(result: ResearchResult, conversation: Conversation): Promise { diff --git a/src/ai/session-analyst.ts b/src/ai/session-analyst.ts index a53caa3..eb1ddd1 100644 --- a/src/ai/session-analyst.ts +++ b/src/ai/session-analyst.ts @@ -41,6 +41,9 @@ export class SessionAnalyst implements Agent { Crucial distinction: "the app misbehaved" vs "the automation could not interact with the app". ONLY the first is a Defect. If the automation gives up before the app responds — timeout, retries exhausted, dead loop / loop detected, could not click or find an element — that is an Execution issue regardless of what the log calls it. Failure inside the automation ≠ failure inside the product. + The action log is more authoritative than the scenario title. If the actual submitted data, page state, or action sequence does not match the scenario title, classify it as Execution issue and do not list that scenario under What works. Do NOT infer a product Defect or UX issue from behavior caused by incorrect test data or an automation mismatch. + Negative test data is valid when it matches a negative scenario. Do not call intentionally invalid input wrong data when the scenario expects rejection or validation feedback. + A solitary failure where adjacent tests on the same feature passed → Execution, not Defect. ## Severity (defects only) @@ -76,7 +79,7 @@ export class SessionAnalyst implements Agent { ## Brevity rules - - Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Banned words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation". + - Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Never use these words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation". - What works: feature name + test refs. NO parentheticals, NO caveats. If there's a caveat, the entry doesn't belong here. - Defect title is the BUG ("Search returns non-matching results"), never the scenario name. - Reproduce steps are imperative one-liners drawn from the log. diff --git a/src/ai/task-agent.ts b/src/ai/task-agent.ts index efe918a..69c3090 100644 --- a/src/ai/task-agent.ts +++ b/src/ai/task-agent.ts @@ -44,7 +44,7 @@ export abstract class TaskAgent { .filter((k) => !!k) .join('\n\n'); - tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`); + tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`); return dedent` Here is relevant knowledge for this page: @@ -61,7 +61,7 @@ export abstract class TaskAgent { const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0); debugLog(`injecting experience TOC (${toc.length} files, ${totalSections} sections)`); - tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`); + tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`); return renderExperienceToc(toc); } diff --git a/src/commands/context-aria-command.ts b/src/commands/context-aria-command.ts index 5f0df85..c6ec255 100644 --- a/src/commands/context-aria-command.ts +++ b/src/commands/context-aria-command.ts @@ -17,6 +17,6 @@ export class ContextAriaCommand extends BaseCommand { throw new Error('No ARIA snapshot available for current page'); } - tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`); + tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`, { maxLines: 10 }); } } diff --git a/src/commands/explore-command.ts b/src/commands/explore-command.ts index 3dcec70..09c04bc 100644 --- a/src/commands/explore-command.ts +++ b/src/commands/explore-command.ts @@ -7,7 +7,6 @@ import { type Plan, type Test, TestResult } from '../test-plan.js'; import { getCliName } from '../utils/cli-name.ts'; import { ErrorPageError } from '../utils/error-page.ts'; import { tag } from '../utils/logger.js'; -import { jsonToTable } from '../utils/markdown-parser.js'; import { type NextStepSection, printNextSteps, relativeToCwd } from '../utils/next-steps.ts'; import { safeFilename } from '../utils/strings.ts'; import { BaseCommand, type Suggestion } from './base-command.js'; @@ -88,14 +87,14 @@ export class ExploreCommand extends BaseCommand { const t = tests[i]; lines.push(` ${String(i + 1).padStart(2)}. [${this.originLabel(t)}] [${t.priority.padEnd(9)}] ${t.scenario}`); } - tag('multiline').log(lines.join('\n')); + tag('multiline').log(lines.join('\n'), { maxLines: 24 }); } private async runFreshMode(mainUrl: string | undefined, feature: string | undefined, styles?: string[]): Promise { await this.runAllStyles(mainUrl, feature, undefined, undefined, styles); + this.rememberCurrentPlan(); const mainPlan = this.explorBot.getCurrentPlan(); if (!mainPlan) return; - this.completedPlans.push(mainPlan); if (feature || this.isLimitReached()) return; @@ -270,6 +269,7 @@ export class ExploreCommand extends BaseCommand { const styleList = styles ?? Object.keys(getStyles()); let fresh = true; for (const style of styleList) { + if (this.isLimitReached()) break; if (!fresh && pageUrl && !this.dryRun) { await this.explorBot.visit(pageUrl); } @@ -278,10 +278,19 @@ export class ExploreCommand extends BaseCommand { if (this.dryRun) opts.noSave = true; await this.planWithRetry(feature, opts, pageUrl); await this.runPendingTests(); + this.rememberCurrentPlan(); fresh = false; } } + private rememberCurrentPlan(): void { + const plan = this.explorBot.getCurrentPlan(); + if (!plan) return; + if (this.completedPlans.includes(plan)) return; + if (plan.tests.every((test) => test.startTime == null)) return; + this.completedPlans.push(plan); + } + private async planWithRetry(feature: string | undefined, opts: { fresh: boolean; style: string; extend?: Plan; completedPlans?: Plan[]; noSave?: boolean }, pageUrl?: string): Promise { const before = new Set(this.explorBot.getCurrentPlan()?.tests ?? []); @@ -401,34 +410,63 @@ export class ExploreCommand extends BaseCommand { if (allTests.length === 0) return; - const hasSubPages = this.completedPlans.length > 1; + const hasSubPages = new Set(this.completedPlans.map((plan) => plan.title)).size > 1; const hasOrigin = this.oldTestRefs.size > 0; - const rows = allTests.map(({ test, planTitle }, index) => { + const completed = allTests.map(({ test, planTitle }, index) => { const durationMs = test.getDurationMs(); const duration = durationMs != null ? `${(durationMs / 1000).toFixed(1)}s` : '-'; let status = 'failed'; if (test.isSuccessful) status = 'passed'; else if (test.isSkipped) status = 'skipped'; - const row: Record = { - '#': String(index + 1), - Status: status, - Title: test.scenario.replace(/\|/g, '-'), - Priority: test.priority, - Time: duration, - Steps: String(Object.keys(test.notes).length), + return { + index: index + 1, + status, + title: test.scenario.replace(/\s+/g, ' ').trim(), + priority: test.priority, + duration, + durationMs: durationMs ?? 0, + steps: Object.keys(test.notes).length, + origin: hasOrigin ? this.originLabel(test) : '', + planTitle: hasSubPages ? planTitle : '', }; - if (hasOrigin) { - row.Origin = this.originLabel(test); + }); + const passed = completed.filter((t) => t.status === 'passed').length; + const failed = completed.filter((t) => t.status === 'failed').length; + const skipped = completed.filter((t) => t.status === 'skipped').length; + const totalSeconds = completed.reduce((sum, t) => sum + t.durationMs, 0) / 1000; + const lines = [`Results: ${passed} passed, ${failed} failed, ${skipped} skipped - ${formatDuration(totalSeconds)}`]; + + const failedTests = completed.filter((t) => t.status === 'failed'); + if (failedTests.length > 0) { + lines.push('', 'Failed tests:'); + for (const test of failedTests) { + lines.push(` #${test.index} [${test.priority}] ${test.title} (${test.duration}, ${test.steps} steps)`); } - if (hasSubPages) { - row.Plan = planTitle; + } + + const slowTests = completed + .filter((t) => t.durationMs >= 1000) + .sort((a, b) => b.durationMs - a.durationMs) + .slice(0, 3); + if (slowTests.length > 0) { + lines.push('', 'Slowest tests:'); + for (const test of slowTests) { + lines.push(` #${test.index} ${test.duration} - ${test.title}`); } - return row; - }); - const columns = ['#', 'Status', 'Title', 'Priority', 'Time', 'Steps']; - if (hasOrigin) columns.push('Origin'); - if (hasSubPages) columns.push('Plan'); - tag('multiline').log(jsonToTable(rows, columns)); + } + + const detailLines = completed + .map((test) => { + const details = [test.origin, test.planTitle].filter(Boolean).join(' - '); + return details ? ` #${test.index} ${details}` : ''; + }) + .filter(Boolean); + if (detailLines.length > 0) { + lines.push('', 'Details:'); + lines.push(...detailLines); + } + + tag('multiline').log(lines.join('\n')); tag('info').log(`${figureSet.tick} ${allTests.length} tests completed`); } @@ -463,8 +501,8 @@ export class ExploreCommand extends BaseCommand { } if (screencasts.length > 0) { - const commands = screencasts.map((f) => ({ label: '', command: relativeToCwd(f) })); const screencastDir = relativeToCwd(outputPath('screencasts')); + const commands = [{ label: 'Folder', command: screencastDir }]; const planSlugs = [...new Set(this.completedPlans.map((p) => safeFilename(p.title)).filter(Boolean))]; for (const slug of planSlugs) { commands.push({ label: 'Browse plan', command: `ls ${screencastDir}/${slug}-*` }); @@ -529,3 +567,10 @@ function parseRatio(s: string): number | null { if (Number.isNaN(n) || n < 0 || n > 1) return null; return n; } + +function formatDuration(seconds: number): string { + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const minutes = Math.floor(seconds / 60); + const remainingSeconds = Math.round(seconds % 60); + return `${minutes}m ${remainingSeconds}s`; +} diff --git a/src/commands/test-command.ts b/src/commands/test-command.ts index de3c301..4eeb866 100644 --- a/src/commands/test-command.ts +++ b/src/commands/test-command.ts @@ -69,7 +69,8 @@ export class TestCommand extends BaseCommand { tag('info').log(`Launching ${toExecute.length} test scenario(s).`); const tester = this.explorBot.agentTester(); - for (const test of toExecute) { + for (const [index, test] of toExecute.entries()) { + tag('info').log(`Starting test ${index + 1}/${toExecute.length}: ${test.scenario}`); await tester.test(test); } tag('success').log('Test execution finished'); diff --git a/src/components/App.tsx b/src/components/App.tsx index 5aca01d..d324ec1 100644 --- a/src/components/App.tsx +++ b/src/components/App.tsx @@ -14,7 +14,6 @@ import InputPane from './InputPane.js'; import InputReadline from './InputReadline.js'; import LogPane from './LogPane.js'; import PlanEditor from './PlanEditor.js'; -import PlanPane, { type PlanSummary } from './PlanPane.js'; import SessionTimer from './SessionTimer.js'; import StateTransitionPane from './StateTransitionPane.js'; import TaskPane, { WINDOW_SIZE } from './TaskPane.js'; @@ -158,42 +157,20 @@ export function App({ explorBot, initialShowInput = false, exitOnEmptyInput = fa const planRef = useRef>(undefined); const unsubscribeRef = useRef<(() => void) | undefined>(undefined); - const [completedPlans, setCompletedPlans] = useState([]); - const [activePlanInfo, setActivePlanInfo] = useState(null); useEffect(() => { - const makeSummary = (plan: NonNullable>): PlanSummary => { - const enabled = plan.tests.filter((t) => t.enabled); - return { - title: plan.title, - testCount: enabled.length, - passed: enabled.filter((t) => t.isSuccessful).length, - failed: enabled.filter((t) => t.hasFailed).length, - }; - }; - const subscribeToPlan = (plan: NonNullable>) => { if (unsubscribeRef.current) unsubscribeRef.current(); - if (planRef.current && planRef.current !== plan && planRef.current.tests.length > 0) { - const summary = makeSummary(planRef.current); - setCompletedPlans((prev) => { - if (prev.some((p) => p.title === summary.title)) return prev; - return [...prev, summary]; - }); - } - planRef.current = plan; tasksRef.current = [...plan.tests]; setTasks(tasksRef.current); setTaskScrollOffset(0); - setActivePlanInfo(makeSummary(plan)); let lastInProgressIdx = -1; unsubscribeRef.current = plan.onTestsChange((updatedTests) => { tasksRef.current = [...updatedTests]; setTasks(tasksRef.current); - setActivePlanInfo(makeSummary(plan)); const inProgressIdx = updatedTests.findIndex((t) => t.status === 'in_progress' && t.enabled); if (inProgressIdx >= 0 && inProgressIdx !== lastInProgressIdx) { lastInProgressIdx = inProgressIdx; @@ -211,17 +188,9 @@ export function App({ explorBot, initialShowInput = false, exitOnEmptyInput = fa subscribeToPlan(currentPlan); } else if (!currentPlan && planRef.current) { if (unsubscribeRef.current) unsubscribeRef.current(); - if (planRef.current.tests.length > 0) { - const summary = makeSummary(planRef.current); - setCompletedPlans((prev) => { - if (prev.some((p) => p.title === summary.title)) return prev; - return [...prev, summary]; - }); - } planRef.current = undefined; tasksRef.current = []; setTasks([]); - setActivePlanInfo(null); } }, 2000); @@ -389,8 +358,6 @@ export function App({ explorBot, initialShowInput = false, exitOnEmptyInput = fa )} - - ); } diff --git a/src/components/LogPane.tsx b/src/components/LogPane.tsx index b115453..d22fb7b 100644 --- a/src/components/LogPane.tsx +++ b/src/components/LogPane.tsx @@ -22,7 +22,6 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { const pendingLogsRef = React.useRef([]); const flushTimeoutRef = React.useRef | null>(null); - const MAX_MULTILINE_LINES = 16; const MAX_STEP_LINES = 8; const MAX_SUBSTEP_LINES = 6; @@ -115,6 +114,8 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { return { color: 'yellow' as const }; case 'debug': return { color: 'gray' as const, dimColor: true }; + case 'operation': + return { color: 'gray' as const, dimColor: true }; case 'substep': return { color: 'gray' as const, dimColor: true }; case 'step': @@ -146,7 +147,8 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { const cleaned = stripAnsi(dedent(log.content)); const parsed = parseMarkdownToTerminal(cleaned); const lines = parsed.split('\n'); - const truncated = lines.length > MAX_MULTILINE_LINES ? `${lines.slice(0, MAX_MULTILINE_LINES).join('\n')}\n... (${lines.length - MAX_MULTILINE_LINES} more lines)` : parsed; + const maxLines = log.maxLines || 16; + const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : parsed; return ( @@ -163,6 +165,7 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { type: 'multiline', content: `HTML Content:\n\n${markdown}`, timestamp: log.timestamp, + maxLines: 10, }; return renderLogEntry(multilineLog, index); @@ -182,6 +185,18 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { ); } + if (log.type === 'operation') { + return ( + + {lines.map((line, lineIndex) => ( + + {lineIndex === 0 ? `· ${line}` : ` ${line}`} + + ))} + + ); + } + if (log.type === 'step') { return ( @@ -212,7 +227,7 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { ); }; - const maxLogs = 100; + const maxLogs = 80; const visibleLogs = logs.length > maxLogs ? logs.slice(-maxLogs) : logs; return {visibleLogs.map((log, index) => renderLogEntry(log, index)).filter(Boolean)}; }); diff --git a/src/experience-tracker.ts b/src/experience-tracker.ts index 36744be..82ccb89 100644 --- a/src/experience-tracker.ts +++ b/src/experience-tracker.ts @@ -188,7 +188,7 @@ export class ExperienceTracker { const updatedContent = `${newEntry}\n\n${content}`; this.writeExperienceFile(stateHash, updatedContent, data); - tag('substep').log(` Added ACTION to: ${stateHash}.md`); + tag('operation').log(`Added ACTION to: ${stateHash}.md`); } writeFlow(state: ActionResult, body: string, relatedUrls?: string[]): void { @@ -218,7 +218,7 @@ export class ExperienceTracker { const updatedContent = `${body}\n${content}`; this.writeExperienceFile(stateHash, updatedContent, data); - tag('substep').log(`Added FLOW to: ${stateHash}.md`); + tag('operation').log(`Added FLOW to: ${stateHash}.md`); } getAllExperience(): ExperienceFile[] { diff --git a/src/explorbot.ts b/src/explorbot.ts index 2642fab..7a6b75e 100644 --- a/src/explorbot.ts +++ b/src/explorbot.ts @@ -487,7 +487,7 @@ export class ExplorBot { return; } - tag('multiline').log(markdown); + tag('multiline').log(markdown, { maxLines: 22 }); const filePath = this.agentSessionAnalyst().writeReport(markdown); tag('info').log(`Session report saved: ${relativeToCwd(filePath)}`); diff --git a/src/reporter.ts b/src/reporter.ts index 7d8900a..8b780f3 100644 --- a/src/reporter.ts +++ b/src/reporter.ts @@ -1,7 +1,7 @@ import { join } from 'node:path'; import { Client } from '@testomatio/reporter'; import type { Step } from '@testomatio/reporter/types/types.js'; -import { ConfigParser, outputPath } from './config.js'; +import { outputPath } from './config.js'; import type { ReporterConfig } from './config.js'; import type { StateManager } from './state-manager.js'; import { Stats } from './stats.js'; @@ -103,11 +103,13 @@ export class Reporter { } try { - this.client = new Client({ apiKey: process.env.TESTOMATIO || '', title: this.buildTitle() }); - const timeoutMs = Number(process.env.TESTOMATIO_TIMEOUT_MS || '15000'); - const timeoutPromise = new Promise<'timeout'>((resolve) => setTimeout(() => resolve('timeout'), timeoutMs)); + const result = await withQuietReporterLogs(async () => { + this.client = new Client({ apiKey: process.env.TESTOMATIO || '', title: this.buildTitle() }); + const timeoutMs = Number(process.env.TESTOMATIO_TIMEOUT_MS || '15000'); + const timeoutPromise = new Promise<'timeout'>((resolve) => setTimeout(() => resolve('timeout'), timeoutMs)); - const result = await Promise.race([this.client.createRun({ configuration: { exploratory: true } }).then(() => 'success' as const), timeoutPromise]); + return await Promise.race([this.client.createRun({ configuration: { exploratory: true } }).then(() => 'success' as const), timeoutPromise]); + }); if (result === 'timeout') { debugLog('Reporter run creation timed out'); @@ -294,7 +296,9 @@ export class Reporter { } try { - await this.client.updateRunStatus('finished'); + await withQuietReporterLogs(async () => { + await this.client.updateRunStatus('finished'); + }); this.isRunStarted = false; debugLog('Testomat.io run finished'); } catch (error) { @@ -340,3 +344,17 @@ export class Reporter { return; } } + +async function withQuietReporterLogs(fn: () => Promise): Promise { + const previousLevel = process.env.TESTOMATIO_LOG_LEVEL; + process.env.TESTOMATIO_LOG_LEVEL = 'ERROR'; + try { + return await fn(); + } finally { + if (previousLevel === undefined) { + process.env.TESTOMATIO_LOG_LEVEL = undefined; + } else { + process.env.TESTOMATIO_LOG_LEVEL = previousLevel; + } + } +} diff --git a/src/utils/log-filters.ts b/src/utils/log-filters.ts new file mode 100644 index 0000000..41206b4 --- /dev/null +++ b/src/utils/log-filters.ts @@ -0,0 +1,26 @@ +export class RecentStepFilter { + private recentStepKeys = new Map(); + + constructor(private ttlMs = 15000) {} + + shouldSuppress(content: string, now = Date.now()): boolean { + const key = normalizeStepCommand(content); + if (!key) return false; + + for (const [existingKey, timestamp] of this.recentStepKeys) { + if (now - timestamp > this.ttlMs) { + this.recentStepKeys.delete(existingKey); + } + } + + if (this.recentStepKeys.has(key)) return true; + this.recentStepKeys.set(key, now); + return false; + } +} + +function normalizeStepCommand(content: string): string | null { + const normalized = content.replace(/\s+/g, ' ').trim(); + if (!normalized.startsWith('I.')) return null; + return normalized.toLowerCase(); +} diff --git a/src/utils/logger.ts b/src/utils/logger.ts index 9765b98..503bee7 100644 --- a/src/utils/logger.ts +++ b/src/utils/logger.ts @@ -8,9 +8,10 @@ import { marked } from 'marked'; import stripAnsi from 'strip-ansi'; import { ConfigParser } from '../config.js'; import { Observability } from '../observability.ts'; +import { RecentStepFilter } from './log-filters.ts'; import { parseMarkdownToTerminal } from './markdown-terminal.ts'; -export type LogType = 'info' | 'success' | 'error' | 'warning' | 'debug' | 'substep' | 'step' | 'multiline' | 'html' | 'input'; +export type LogType = 'info' | 'success' | 'error' | 'warning' | 'debug' | 'substep' | 'operation' | 'step' | 'multiline' | 'html' | 'input'; export interface TaggedLogEntry { type: LogType; @@ -18,6 +19,7 @@ export interface TaggedLogEntry { timestamp?: Date; originalArgs?: any[]; namespace?: string; + maxLines?: number; } type LogEntry = TaggedLogEntry; @@ -77,6 +79,7 @@ const debugFilter = new DebugFilter(); class ConsoleDestination implements LogDestination { private verboseMode = false; private forceEnabled = false; + private recentSteps = new RecentStepFilter(); isEnabled(): boolean { return this.forceEnabled || !process.env.INK_RUNNING; @@ -93,6 +96,8 @@ class ConsoleDestination implements LogDestination { write(entry: TaggedLogEntry): void { if (entry.type === 'debug') return; if (entry.type === 'html') return; + if (entry.type === 'operation' && !this.verboseMode) return; + if (entry.type === 'step' && !this.verboseMode && this.recentSteps.shouldSuppress(entry.content)) return; let content = entry.content; if (entry.type === 'multiline') { const cleaned = stripAnsi(dedent(entry.content)); @@ -107,6 +112,8 @@ class ConsoleDestination implements LogDestination { content = chalk.yellow(content); } else if (entry.type === 'step') { content = chalk.gray(` ${content}`); + } else if (entry.type === 'operation') { + content = chalk.gray(` · ${content}`); } else if (entry.type === 'substep') { content = chalk.gray(` > ${content}`); } @@ -247,6 +254,7 @@ class ReactDestination implements LogDestination { } private shouldWrite(entry: TaggedLogEntry): boolean { + if (entry.type === 'operation' && !this.debugMode) return false; if (entry.type !== 'debug') return true; if (this.debugMode) return true; if (!entry.namespace) return true; @@ -306,7 +314,7 @@ class CaptainDestination implements LogDestination { stopCapture(): string[] { this.capturing = false; - const logs = this.entries.filter((e) => e.type !== 'debug' && e.type !== 'html' && e.type !== 'multiline').map((e) => `[${e.type}] ${e.content}`); + const logs = this.entries.filter((e) => e.type !== 'debug' && e.type !== 'html' && e.type !== 'multiline' && e.type !== 'operation').map((e) => `[${e.type}] ${e.content}`); this.entries = []; return logs; } @@ -432,6 +440,7 @@ class Logger { return; } + const options = this.extractLogOptions(type, args); let content = this.processArgs(args); if (type === 'step' && args[0]?.toCode) { content = args[0].toCode(); @@ -441,6 +450,7 @@ class Logger { content, timestamp: new Date(), originalArgs: args, + maxLines: options?.maxLines, }; if (this.file.isEnabled()) this.file.write(entry); @@ -480,6 +490,18 @@ class Logger { multiline(...args: any[]): void { this.log('multiline', ...args); } + + private extractLogOptions(type: LogType, args: any[]): { maxLines?: number } | null { + if (type !== 'multiline') return null; + const last = args[args.length - 1]; + if (!last || typeof last !== 'object' || Array.isArray(last)) return null; + if (!('maxLines' in last)) return null; + + args.pop(); + const maxLines = Number(last.maxLines); + if (!Number.isFinite(maxLines) || maxLines <= 0) return null; + return { maxLines }; + } } const logger = Logger.getInstance(); diff --git a/src/utils/next-steps.ts b/src/utils/next-steps.ts index 6f0ff26..9882a16 100644 --- a/src/utils/next-steps.ts +++ b/src/utils/next-steps.ts @@ -42,10 +42,5 @@ export function printNextSteps(sections: NextStepSection[]): void { blocks.push(lines.join('\n')); } - for (let i = 0; i < blocks.length; i++) { - if (i > 0) tag('info').log(''); - for (const line of blocks[i].split('\n')) { - tag('info').log(line); - } - } + tag('multiline').log(blocks.join('\n\n'), { maxLines: 18 }); } diff --git a/tests/unit/log-filters.test.ts b/tests/unit/log-filters.test.ts new file mode 100644 index 0000000..396b3d1 --- /dev/null +++ b/tests/unit/log-filters.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, test } from 'bun:test'; +import { RecentStepFilter } from '../../src/utils/log-filters.ts'; + +describe('log filters', () => { + test('deduplicates repeated identical step commands within ttl', () => { + const filter = new RecentStepFilter(15000); + expect(filter.shouldSuppress('I.fillField("Search", "query")', 1000)).toBe(false); + expect(filter.shouldSuppress('I.fillField("Search", "query")', 2000)).toBe(true); + expect(filter.shouldSuppress('I.click("Save", "toolbar")', 3000)).toBe(false); + expect(filter.shouldSuppress('I.click("Save", "toolbar")', 4000)).toBe(true); + }); + + test('keeps different locator variants visible', () => { + const filter = new RecentStepFilter(15000); + expect(filter.shouldSuppress('I.fillField("Search", "query")', 1000)).toBe(false); + expect(filter.shouldSuppress('I.fillField("role":"textbox","text":"Search", "query", "toolbar")', 2000)).toBe(false); + expect(filter.shouldSuppress('I.click("Save", "toolbar")', 3000)).toBe(false); + expect(filter.shouldSuppress('I.click("button.primary type="submit"", "toolbar")', 4000)).toBe(false); + }); + + test('allows repeated actions after ttl expires', () => { + const filter = new RecentStepFilter(15000); + expect(filter.shouldSuppress('I.click("Save")', 1000)).toBe(false); + expect(filter.shouldSuppress('I.click("Save")', 17000)).toBe(false); + }); +}); diff --git a/tests/unit/logger.test.ts b/tests/unit/logger.test.ts index be06d50..79a69f2 100644 --- a/tests/unit/logger.test.ts +++ b/tests/unit/logger.test.ts @@ -23,8 +23,8 @@ describe('Logger', () => { } process.env.INITIAL_CWD = '/tmp'; - process.env.INK_RUNNING = undefined; - process.env.DEBUG = undefined; + Reflect.deleteProperty(process.env, 'INK_RUNNING'); + Reflect.deleteProperty(process.env, 'DEBUG'); setVerboseMode(false); setPreserveConsoleLogs(false); @@ -69,6 +69,17 @@ describe('Logger', () => { logSubstep('Test substep message'); expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Test substep message')); }); + + it('should hide operation messages by default', () => { + tag('operation').log('Saved generated artifact'); + expect(consoleSpy).not.toHaveBeenCalled(); + }); + + it('should log operation messages in verbose mode', () => { + setVerboseMode(true); + tag('operation').log('Saved generated artifact'); + expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Saved generated artifact')); + }); }); describe('tagged logging', () => { @@ -286,6 +297,20 @@ describe('Logger', () => { multilineLogger.log('# Heading\n\nSome **bold** text'); expect(consoleSpy).toHaveBeenCalled(); }); + + it('should preserve multiline maxLines option for TUI rendering', () => { + process.env.INK_RUNNING = 'true'; + const entries: TaggedLogEntry[] = []; + const mockLogPane = (entry: TaggedLogEntry) => { + entries.push(entry); + }; + registerLogPane(mockLogPane); + + tag('multiline').log('Line 1\nLine 2', { maxLines: 1 }); + + unregisterLogPane(mockLogPane); + expect(entries.find((entry) => entry.type === 'multiline')?.maxLines).toBe(1); + }); }); describe('error handling', () => {