testomatio · DavertMik · Jun 8, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/src/ai/historian.ts b/src/ai/historian.ts
@@ -62,6 +62,6 @@ export class Historian extends HistorianBase {
 
     writeFileSync(filePath, content);
     this.savedFiles.add(filePath);
-    tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
+    tag('operation').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
   }
 }
diff --git a/src/ai/historian/codeceptjs.ts b/src/ai/historian/codeceptjs.ts
@@ -102,7 +102,7 @@ export function WithCodeceptJS<T extends Constructor>(Base: T) {
       writeFileSync(filePath, lines.join('\n'));
       this.savedFiles.add(filePath);
 
-      tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
+      tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
       return filePath;
     }
 

diff --git a/src/ai/historian/experience.ts b/src/ai/historian/experience.ts
@@ -56,7 +56,7 @@ export function WithExperience<T extends Constructor>(Base: T) {
 
       await this.stopScreencast();
 
-      tag('substep').log(`Historian saved session for: ${task.description}`);
+      tag('operation').log(`Historian saved session for: ${task.description}`);
     }
 
     private async reportSession(test: Test, steps: SessionStep[]): Promise<void> {

diff --git a/src/ai/historian/playwright.ts b/src/ai/historian/playwright.ts
@@ -140,7 +140,7 @@ export function WithPlaywright<T extends Constructor>(Base: T) {
       writeFileSync(filePath, lines.join('\n'));
       this.savedFiles.add(filePath);
 
-      tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
+      tag('operation').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
       return filePath;
     }
 

diff --git a/src/ai/historian/screencast.ts b/src/ai/historian/screencast.ts
@@ -92,7 +92,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
         this.screencastTask = test?._explorbotTest || null;
         this.screencastLastChapter = null;
       } catch (err) {
-        tag('substep').log(`Screencast start failed: ${(err as Error).message}`);
+        tag('operation').log(`Screencast start failed: ${(err as Error).message}`);
       }
     }
 
@@ -116,7 +116,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
       try {
         await this.screencastPage.screencast.stop();
       } catch (err) {
-        tag('substep').log(`Screencast stop failed: ${(err as Error).message}`);
+        tag('operation').log(`Screencast stop failed: ${(err as Error).message}`);
       }
       this.screencastActive = false;
       this.screencastPage = null;
@@ -126,7 +126,7 @@ export function WithScreencast<T extends Constructor>(Base: T) {
       if (path) {
         this.savedFiles.add(path);
         task?.addArtifact?.(path);
-        tag('substep').log(`Saved screencast: ${relativeToCwd(path)}`);
+        tag('operation').log(`Saved screencast: ${relativeToCwd(path)}`);
       }
     }
   };

diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts
@@ -206,7 +206,7 @@ class Navigator implements Agent {
     if (!actionResult.isInsideIframe) {
       const successful = this.experienceTracker.getSuccessfulExperience(actionResult);
       if (successful.length > 0) {
-        tag('substep').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
+        tag('operation').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
         experience = `<experience>\nPast successful recipes recorded from prior runs for this page. Prefer these solutions first if they match the goal.\n\n${successful.join('\n\n')}\n</experience>`;
       }
     }
@@ -307,7 +307,7 @@ class Navigator implements Agent {
             stop();
             return;
           }
-          tag('substep').log('Feeding failures back to AI for a new batch...');
+          tag('operation').log('Feeding failures back to AI for a new batch...');
           let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n';
           if (batchFailures.length > 0) {
             const lines = batchFailures
@@ -633,7 +633,7 @@ class Navigator implements Agent {
 
     const cachedVerification = actionResult.getVerification(message);
     if (cachedVerification !== null) {
-      tag('substep').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`);
+      tag('operation').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`);
       return { verified: cachedVerification, successfulCodes: [], assertionSteps: [], totalAttempted: 0 };
     }
 
@@ -654,7 +654,7 @@ class Navigator implements Agent {
       const toc = this.experienceTracker.getExperienceTableOfContents(actionResult);
       if (toc.length > 0) {
         const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0);
-        tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`);
+        tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`);
         experience = renderExperienceToc(toc);
       }
     }

diff --git a/src/ai/pilot.ts b/src/ai/pilot.ts
@@ -104,7 +104,7 @@ export class Pilot implements Agent {
 
     const schema = z.object({
       decision: z.enum(['pass', 'fail', 'continue', 'skipped']).describe('pass = test succeeded, fail = test failed, continue = tester should keep going, skipped = scenario is irrelevant OR systematic execution failures prevented testing'),
-      reason: z.string().describe('What happened and why (1-2 sentences). Do NOT repeat the decision status (e.g. "scenario goal achieved/not achieved") — just explain the evidence. For continue: explain why rejected and suggest alternatives.'),
+      reason: z.string().describe('Concise user-facing reason, maximum 1 short sentence and 120 characters. Do NOT repeat the decision status; explain only the evidence. For continue: explain why rejected and suggest alternatives.'),
       guidance: z.string().nullable().describe('Required for "continue": specific actionable instruction for the tester — what exactly to verify, retry differently, or complete next. Be concrete.'),
       requestVerification: z
         .string()
@@ -177,7 +177,7 @@ export class Pilot implements Agent {
         }
       }
 
-      tag('info').log(`Pilot: ${result.decision} — ${result.reason}`);
+      tag('info').log(`Pilot: ${result.decision} - ${result.reason}`);
       task.summary = result.reason;
 
       const verdictState = screenshotState || currentState;
@@ -221,7 +221,7 @@ export class Pilot implements Agent {
 
     const schema = z.object({
       decision: z.enum(['allow', 'fail', 'continue', 'skipped']).describe('allow = reset proceeds, fail = test failed (stop looping), continue = veto reset, tester should act on current page instead, skipped = scenario is irrelevant or cannot be executed'),
-      reason: z.string().describe('What evidence justifies this decision (1-2 sentences). Do not restate the decision.'),
+      reason: z.string().describe('Concise evidence-only reason, maximum 1 short sentence and 120 characters. Do not restate the decision.'),
       guidance: z.string().nullable().describe('Required for "continue": concrete instruction for what the tester should do instead of resetting (e.g. which tool to call, what to verify).'),
     });
 
@@ -388,8 +388,9 @@ export class Pilot implements Agent {
       - "continue": tester hasn't completed the goal; provide concrete guidance (which tool, what to check).
         If a verify() asserted a state that was ALREADY TRUE before the test, it proves nothing — reject.
 
-      reason field: do NOT restate the decision ("scenario goal achieved/not achieved"). State what happened —
-      what was verified, what failed, what evidence was found.
+      reason field: one short sentence, maximum 120 characters. Do NOT restate the decision
+      ("scenario goal achieved/not achieved"). State what happened: what was verified, what failed,
+      or what evidence was found.
     `;
   }
 
@@ -1017,6 +1018,8 @@ export class Pilot implements Agent {
       Response format:
       PROGRESS: <1 sentence assessment>
       NEXT: <specific actionable instruction for Tester>
+
+      Keep user-facing reasons concise: one short sentence, maximum 120 characters, evidence only, no repeated verdict wording.
     `;
   }
 }
diff --git a/src/ai/planner.ts b/src/ai/planner.ts
@@ -36,7 +36,7 @@ const TasksSchema = z.object({
         scenario: z.string().describe('A single sentence describing what to test'),
         priority: z.enum(['critical', 'important', 'high', 'normal', 'low']).describe('Priority of the task based on business importance'),
         startUrl: z.string().nullable().describe('Start URL for the test if different from plan URL (only for tests on visited subpages)'),
-        steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Click on Login button", "Enter username in email field", "Submit the form"). Keep steps atomic and actionable.'),
+        steps: z.array(z.string()).describe('List of steps to perform for this scenario. Each step should be a specific action (e.g., "Open the form", "Enter required data", "Submit the form"). Keep steps atomic and actionable.'),
         expectedOutcomes: z
           .array(z.string())
           .describe('List of expected outcomes that can be verified. Each outcome should be simple, specific, and easy to check (e.g., "Success message appears", "URL changes to /dashboard", "Form field shows error"). Keep outcomes atomic - do not combine multiple checks into one.'),
@@ -226,9 +226,7 @@ export class Planner extends PlannerBase implements Agent {
       }
     }
 
-    const availableStyles = Object.keys(getStyles()).join(', ');
     tag('success').log(`Planning complete! ${this.currentPlan.tests.length} tests in plan: ${this.currentPlan.title}`);
-    tag('info').log(`Planning style: ${this.lastStyleName} (available: ${availableStyles})`);
 
     if (state.url) registerPlan(state.url, this.currentPlan, feature, state.hash);
 

diff --git a/src/ai/provider.ts b/src/ai/provider.ts
@@ -294,8 +294,11 @@ export class Provider {
         }
         throw new ContextLengthError(error.message || error.toString());
       }
-      tag('error').log(error.message || error.toString());
-      throw new AiError(error.message || error.toString());
+      const message = error.message || error.toString();
+      if (message !== 'No response text from AI') {
+        tag('error').log(message);
+      }
+      throw new AiError(message);
     }
   }
 
@@ -376,7 +379,6 @@ export class Provider {
     } catch (error: any) {
       clearActivity();
       if (error?.message?.includes('Tool choice is required')) {
-        tag('warning').log('Model completed without calling a tool, returning empty result');
         return { text: '', toolCalls: [], toolResults: [], response: { messages: [] }, usage: null };
       }
       if (error?.name === 'AbortError') throw error;

diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts
@@ -151,7 +151,7 @@ export class Researcher extends ResearcherBase implements Agent {
       if (!deep && !force) {
         const similar = await findSimilarResearch(combinedHtml);
         if (similar) {
-          tag('substep').log('Similar research found, reusing cached result');
+          tag('operation').log('Similar research found, reusing cached result');
           if (stateHash) saveResearch(stateHash, similar, combinedHtml);
           tag('multiline').log(formatResearchSummary(similar));
           tag('success').log('Research complete (reused)');
@@ -316,10 +316,10 @@ export class Researcher extends ResearcherBase implements Agent {
 
       tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
       tag('success').log('Research complete');
-      if (researchFile) tag('substep').log(`Research file saved to: ${researchFile}`);
+      if (researchFile) tag('operation').log(`Research file saved to: ${researchFile}`);
       if (this.actionResult?.screenshotFile) {
         const screenshotPath = outputPath('states', this.actionResult.screenshotFile);
-        tag('substep').log(`UI screenshot: file://${screenshotPath}`);
+        tag('operation').log(`UI screenshot: file://${screenshotPath}`);
       }
 
       await this.hooksRunner.runAfterHook('researcher', state.url);
@@ -467,7 +467,7 @@ export class Researcher extends ResearcherBase implements Agent {
         .filter((k) => !!k)
         .join('\n\n');
 
-      tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`);
+      tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')} for: ${this.actionResult.url}`);
       knowledge = `
         <hint>
         Here is relevant knowledge for this page:

diff --git a/src/ai/researcher/locators.ts b/src/ai/researcher/locators.ts
@@ -80,7 +80,7 @@ export function WithLocators<T extends Constructor>(Base: T) {
         }
       }
 
-      tag('substep').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
+      tag('operation').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
     }
 
     async fixBrokenSections(result: ResearchResult, conversation: Conversation): Promise<void> {

diff --git a/src/ai/session-analyst.ts b/src/ai/session-analyst.ts
@@ -41,6 +41,9 @@ export class SessionAnalyst implements Agent {
 
       Crucial distinction: "the app misbehaved" vs "the automation could not interact with the app". ONLY the first is a Defect. If the automation gives up before the app responds — timeout, retries exhausted, dead loop / loop detected, could not click or find an element — that is an Execution issue regardless of what the log calls it. Failure inside the automation ≠ failure inside the product.
 
+      The action log is more authoritative than the scenario title. If the actual submitted data, page state, or action sequence does not match the scenario title, classify it as Execution issue and do not list that scenario under What works. Do NOT infer a product Defect or UX issue from behavior caused by incorrect test data or an automation mismatch.
+      Negative test data is valid when it matches a negative scenario. Do not call intentionally invalid input wrong data when the scenario expects rejection or validation feedback.
+
       A solitary failure where adjacent tests on the same feature passed → Execution, not Defect.
 
       ## Severity (defects only)
@@ -76,7 +79,7 @@ export class SessionAnalyst implements Agent {
 
       ## Brevity rules
 
-      - Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Banned words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
+      - Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Never use these words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
       - What works: feature name + test refs. NO parentheticals, NO caveats. If there's a caveat, the entry doesn't belong here.
       - Defect title is the BUG ("Search returns non-matching results"), never the scenario name.
       - Reproduce steps are imperative one-liners drawn from the log.

diff --git a/src/ai/task-agent.ts b/src/ai/task-agent.ts
@@ -44,7 +44,7 @@ export abstract class TaskAgent {
       .filter((k) => !!k)
       .join('\n\n');
 
-    tag('substep').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`);
+    tag('operation').log(`Found ${knowledgeFiles.length} relevant knowledge ${pluralize(knowledgeFiles.length, 'file')}`);
     return dedent`
       <knowledge>
       Here is relevant knowledge for this page:
@@ -61,7 +61,7 @@ export abstract class TaskAgent {
 
     const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0);
     debugLog(`injecting experience TOC (${toc.length} files, ${totalSections} sections)`);
-    tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`);
+    tag('operation').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections)`);
     return renderExperienceToc(toc);
   }
 

diff --git a/src/commands/context-aria-command.ts b/src/commands/context-aria-command.ts
@@ -17,6 +17,6 @@ export class ContextAriaCommand extends BaseCommand {
       throw new Error('No ARIA snapshot available for current page');
     }
 
-    tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`);
+    tag('multiline').log(`ARIA Snapshot:\n\n${ariaSnapshot}`, { maxLines: 10 });
   }
 }
-Original file line number
+Diff line change
@@ Expand Up @@
             }
           }
-          tag('substep').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
+          tag('operation').log(`Validated ${locators.length} locators: ${locators.length - broken} valid, ${broken} broken`);
         }
         async fixBrokenSections(result: ResearchResult, conversation: Conversation): Promise<void> {
@@ Expand Down @@