Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ai/historian.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,6 @@ export class Historian extends HistorianBase {

writeFileSync(filePath, content);
this.savedFiles.add(filePath);
tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
tag('operation').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
}
}
2 changes: 1 addition & 1 deletion src/ai/historian/codeceptjs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ export function WithCodeceptJS<T extends Constructor>(Base: T) {
writeFileSync(filePath, lines.join('\n'));
this.savedFiles.add(filePath);

tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
return filePath;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ai/historian/experience.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ export function WithExperience<T extends Constructor>(Base: T) {

await this.stopScreencast();

tag('substep').log(`Historian saved session for: ${task.description}`);
tag('operation').log(`Historian saved session for: ${task.description}`);
}

private async reportSession(test: Test, steps: SessionStep[]): Promise<void> {
Expand Down
2 changes: 1 addition & 1 deletion src/ai/historian/playwright.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ export function WithPlaywright<T extends Constructor>(Base: T) {
writeFileSync(filePath, lines.join('\n'));
this.savedFiles.add(filePath);

tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
return filePath;
}

Expand Down
6 changes: 3 additions & 3 deletions src/ai/historian/screencast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
this.screencastTask = test?._explorbotTest || null;
this.screencastLastChapter = null;
} catch (err) {
tag('substep').log(`Screencast start failed: ${(err as Error).message}`);
tag('operation').log(`Screencast start failed: ${(err as Error).message}`);
}
}

Expand All @@ -116,7 +116,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
try {
await this.screencastPage.screencast.stop();
} catch (err) {
tag('substep').log(`Screencast stop failed: ${(err as Error).message}`);
tag('operation').log(`Screencast stop failed: ${(err as Error).message}`);
}
this.screencastActive = false;
this.screencastPage = null;
Expand All @@ -126,7 +126,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
if (path) {
this.savedFiles.add(path);
task?.addArtifact?.(path);
tag('substep').log(`Saved screencast: ${relativeToCwd(path)}`);
tag('operation').log(`Saved screencast: ${relativeToCwd(path)}`);
}
}
};
Expand Down
8 changes: 4 additions & 4 deletions src/ai/navigator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class Navigator implements Agent {
if (!actionResult.isInsideIframe) {
const successful = this.experienceTracker.getSuccessfulExperience(actionResult);
if (successful.length > 0) {
tag('substep').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
tag('operation').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
experience = `<experience>\nPast successful recipes recorded from prior runs for this page. Prefer these solutions first if they match the goal.\n\n${successful.join('\n\n')}\n</experience>`;
}
}
Expand Down Expand Up @@ -307,7 +307,7 @@ class Navigator implements Agent {
stop();
return;
}
tag('substep').log('Feeding failures back to AI for a new batch...');
tag('operation').log('Feeding failures back to AI for a new batch...');
let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n';
if (batchFailures.length > 0) {
const lines = batchFailures
Expand Down Expand Up @@ -633,7 +633,7 @@ class Navigator implements Agent {

const cachedVerification = actionResult.getVerification(message);
if (cachedVerification !== null) {
tag('substep').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`);
tag('operation').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`);
return { verified: cachedVerification, successfulCodes: [], assertionSteps: [], totalAttempted: 0 };
}

Expand All @@ -654,7 +654,7 @@ class Navigator implements Agent {
const toc = this.experienceTracker.getExperienceTableOfContents(actionResult);
if (toc.length > 0) {
const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0);
tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`);
tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`);
experience = renderExperienceToc(toc);
}
}
Expand Down
13 changes: 8 additions & 5 deletions src/ai/pilot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ export class Pilot implements Agent {

const schema = z.object({
decision: z.enum(['pass', 'fail', 'continue', 'skipped']).describe('pass = test succeeded, fail = test failed, continue = tester should keep going, skipped = scenario is irrelevant OR systematic execution failures prevented testing'),
reason: z.string().describe('What happened and why (1-2 sentences). Do NOT repeat the decision status (e.g. "scenario goal achieved/not achieved") — just explain the evidence. For continue: explain why rejected and suggest alternatives.'),
reason: z.string().describe('Concise user-facing reason, maximum 1 short sentence and 120 characters. Do NOT repeat the decision status; explain only the evidence. For continue: explain why rejected and suggest alternatives.'),
guidance: z.string().nullable().describe('Required for "continue": specific actionable instruction for the tester — what exactly to verify, retry differently, or complete next. Be concrete.'),
requestVerification: z
.string()
Expand Down Expand Up @@ -177,7 +177,7 @@ export class Pilot implements Agent {
}
}

tag('info').log(`Pilot: ${result.decision} ${result.reason}`);
tag('info').log(`Pilot: ${result.decision} - ${result.reason}`);
task.summary = result.reason;

const verdictState = screenshotState || currentState;
Expand Down Expand Up @@ -221,7 +221,7 @@ export class Pilot implements Agent {

const schema = z.object({
decision: z.enum(['allow', 'fail', 'continue', 'skipped']).describe('allow = reset proceeds, fail = test failed (stop looping), continue = veto reset, tester should act on current page instead, skipped = scenario is irrelevant or cannot be executed'),
reason: z.string().describe('What evidence justifies this decision (1-2 sentences). Do not restate the decision.'),
reason: z.string().describe('Concise evidence-only reason, maximum 1 short sentence and 120 characters. Do not restate the decision.'),
guidance: z.string().nullable().describe('Required for "continue": concrete instruction for what the tester should do instead of resetting (e.g. which tool to call, what to verify).'),
});

Expand Down Expand Up @@ -388,8 +388,9 @@ export class Pilot implements Agent {
- "continue": tester hasn't completed the goal; provide concrete guidance (which tool, what to check).
If a verify() asserted a state that was ALREADY TRUE before the test, it proves nothing — reject.

reason field: do NOT restate the decision ("scenario goal achieved/not achieved"). State what happened —
what was verified, what failed, what evidence was found.
reason field: one short sentence, maximum 120 characters. Do NOT restate the decision
("scenario goal achieved/not achieved"). State what happened: what was verified, what failed,
or what evidence was found.
`;
}

Expand Down Expand Up @@ -1017,6 +1018,8 @@ export class Pilot implements Agent {
Response format:
PROGRESS: <1 sentence assessment>
NEXT: <specific actionable instruction for Tester>

Keep user-facing reasons concise: one short sentence, maximum 120 characters, evidence only, no repeated verdict wording.
`;
}
}
4 changes: 1 addition & 3 deletions src/ai/planner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const TasksSchema = z.object({
scenario: z.string().describe('A single sentence describing what to test'),
priority: z.enum(['critical', 'important', 'high', 'normal', 'low']).describe('Priority of the task based on business importance'),
startUrl: z.string().nullable().describe('Start URL for the test if different from plan URL (only for tests on visited subpages)'),
steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Click on Login button", "Enter username in email field", "Submit the form"). Keep steps atomic and actionable.'),
steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Open the form", "Enter required data", "Submit the form"). Keep steps atomic and actionable.'),
expectedOutcomes: z
.array(z.string())
.describe('List of expected outcomes that can be verified. Each outcome should be simple, specific, and easy to check (e.g., "Success message appears", "URL changes to /dashboard", "Form field shows error"). Keep outcomes atomic - do not combine multiple checks into one.'),
Expand Down Expand Up @@ -226,9 +226,7 @@ export class Planner extends PlannerBase implements Agent {
}
}

const availableStyles = Object.keys(getStyles()).join(', ');
tag('success').log(`Planning complete! ${this.currentPlan.tests.length} tests in plan: ${this.currentPlan.title}`);
tag('info').log(`Planning style: ${this.lastStyleName} (available: ${availableStyles})`);

if (state.url) registerPlan(state.url, this.currentPlan, feature, state.hash);

Expand Down
8 changes: 5 additions & 3 deletions src/ai/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,11 @@ export class Provider {
}
throw new ContextLengthError(error.message || error.toString());
}
tag('error').log(error.message || error.toString());
throw new AiError(error.message || error.toString());
const message = error.message || error.toString();
if (message !== 'No response text from AI') {
tag('error').log(message);
}
throw new AiError(message);
}
}

Expand Down Expand Up @@ -376,7 +379,6 @@ export class Provider {
} catch (error: any) {
clearActivity();
if (error?.message?.includes('Tool choice is required')) {
tag('warning').log('Model completed without calling a tool, returning empty result');
return { text: '', toolCalls: [], toolResults: [], response: { messages: [] }, usage: null };
}
if (error?.name === 'AbortError') throw error;
Expand Down
8 changes: 4 additions & 4 deletions src/ai/researcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ export class Researcher extends ResearcherBase implements Agent {
if (!deep && !force) {
const similar = await findSimilarResearch(combinedHtml);
if (similar) {
tag('substep').log('Similar research found, reusing cached result');
tag('operation').log('Similar research found, reusing cached result');
if (stateHash) saveResearch(stateHash, similar, combinedHtml);
tag('multiline').log(formatResearchSummary(similar));
tag('success').log('Research complete (reused)');
Expand Down Expand Up @@ -316,10 +316,10 @@ export class Researcher extends ResearcherBase implements Agent {

tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
tag('success').log('Research complete');
if (researchFile) tag('substep').log(`Research file saved to: ${researchFile}`);
if (researchFile) tag('operation').log(`Research file saved to: ${researchFile}`);
if (this.actionResult?.screenshotFile) {
const screenshotPath = outputPath('states', this.actionResult.screenshotFile);
tag('substep').log(`UI screenshot: file://${screenshotPath}`);
tag('operation').log(`UI screenshot: file://${screenshotPath}`);
}

await this.hooksRunner.runAfterHook('researcher', state.url);
Expand Down Expand Up @@ -467,7 +467,7 @@ export class Researcher extends ResearcherBase implements Agent {
.filter((k) => !!k)
.join('\n\n');

tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`);
tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`);
knowledge = `
<hint>
Here is relevant knowledge for this page:
Expand Down
2 changes: 1 addition & 1 deletion src/ai/researcher/locators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ export function WithLocators<T extends Constructor>(Base: T) {
}
}

tag('substep').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
tag('operation').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
}

async fixBrokenSections(result: ResearchResult, conversation: Conversation): Promise<void> {
Expand Down
5 changes: 4 additions & 1 deletion src/ai/session-analyst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ export class SessionAnalyst implements Agent {

Crucial distinction: "the app misbehaved" vs "the automation could not interact with the app". ONLY the first is a Defect. If the automation gives up before the app responds — timeout, retries exhausted, dead loop / loop detected, could not click or find an element — that is an Execution issue regardless of what the log calls it. Failure inside the automation ≠ failure inside the product.

The action log is more authoritative than the scenario title. If the actual submitted data, page state, or action sequence does not match the scenario title, classify it as Execution issue and do not list that scenario under What works. Do NOT infer a product Defect or UX issue from behavior caused by incorrect test data or an automation mismatch.
Negative test data is valid when it matches a negative scenario. Do not call intentionally invalid input wrong data when the scenario expects rejection or validation feedback.

A solitary failure where adjacent tests on the same feature passed → Execution, not Defect.

## Severity (defects only)
Expand Down Expand Up @@ -76,7 +79,7 @@ export class SessionAnalyst implements Agent {

## Brevity rules

- Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Banned words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
- Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Never use these words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
- What works: feature name + test refs. NO parentheticals, NO caveats. If there's a caveat, the entry doesn't belong here.
- Defect title is the BUG ("Search returns non-matching results"), never the scenario name.
- Reproduce steps are imperative one-liners drawn from the log.
Expand Down
4 changes: 2 additions & 2 deletions src/ai/task-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export abstract class TaskAgent {
.filter((k) => !!k)
.join('\n\n');

tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`);
tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`);
return dedent`
<knowledge>
Here is relevant knowledge for this page:
Expand All @@ -61,7 +61,7 @@ export abstract class TaskAgent {

const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0);
debugLog(`injecting experience TOC (${toc.length} files, ${totalSections} sections)`);
tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`);
tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`);
return renderExperienceToc(toc);
}

Expand Down
2 changes: 1 addition & 1 deletion src/commands/context-aria-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ export class ContextAriaCommand extends BaseCommand {
throw new Error('No ARIA snapshot available for current page');
}

tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`);
tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`, { maxLines: 10 });
}
}
Loading
Loading