Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions src/commands/analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,17 @@ export const analyzeCommand = new Command('analyze')
spinner.succeed(`Analysis complete for ${id}`);
}

// Print summary for single runs
if (runIds.length === 1) {
printAnalysisSummary(analysis);
} else {
const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const score = calculateKSM(methodology, calculateEfficacyFromResults(result.challenge, result.modelVersion, allResults), getTokenEfficiency(result));
console.log(colors.gray(` Score: ${score}/100 | Approach: ${analysis.behavior.approach}`));
if (!analysis.parseFailed) {
// Print summary for single runs
if (runIds.length === 1) {
printAnalysisSummary(analysis);
} else {
const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const score = calculateKSM(methodology, calculateEfficacyFromResults(result.challenge, result.modelVersion, allResults), getTokenEfficiency(result));
console.log(colors.gray(` Score: ${score}/100 | Approach: ${analysis.behavior.approach}`));
}
} else if (runIds.length > 1) {
console.log(colors.gray(` Score: N/A (parse failed)`));
}
} catch (error) {
if (error instanceof QuotaExceededError) {
Expand Down
26 changes: 14 additions & 12 deletions src/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,18 +356,20 @@ export const runCommand = new Command('run')
spinnerAnalysis.succeed('Analysis complete');
}

// Print analysis summary
printAnalysisSummary(analysis);

// Print score summary
const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir());
printScoreSummary({
ksm: calculateKSM(methodology, efficacy, getTokenEfficiency(result)),
efficacy,
efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0,
time: result.totalTime,
});
if (!analysis.parseFailed) {
// Print analysis summary
printAnalysisSummary(analysis);

// Print score summary
const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir());
printScoreSummary({
ksm: calculateKSM(methodology, efficacy, getTokenEfficiency(result)),
efficacy,
efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0,
time: result.totalTime,
});
}

console.log(colors.gray(`Analysis saved to: ${analysisPath}`));
} catch (analysisError) {
Expand Down
4 changes: 3 additions & 1 deletion src/interactive/results-flow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ async function showRunDetail(entry: LoadedResult): Promise<void> {

switch (action) {
case 'analysis':
if (analysis) {
if (analysis && !analysis.parseFailed) {
printAnalysisSummary(analysis);
} else if (analysis?.parseFailed) {
console.log(colors.gray(' Analysis could not be parsed. Retry with: oasis analyze ' + result.id));
}
break;
case 'report':
Expand Down
24 changes: 13 additions & 11 deletions src/interactive/run-flow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -559,17 +559,19 @@ export async function runBenchmarkFlow(): Promise<void> {
spinnerAnalysis.succeed('Analysis complete');
}

printAnalysisSummary(analysis);

const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir());
runKsmScore = calculateKSM(methodology, efficacy, getTokenEfficiency(result));
printScoreSummary({
ksm: runKsmScore,
efficacy,
efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0,
time: result.totalTime,
});
if (!analysis.parseFailed) {
printAnalysisSummary(analysis);

const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore;
const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir());
runKsmScore = calculateKSM(methodology, efficacy, getTokenEfficiency(result));
printScoreSummary({
ksm: runKsmScore,
efficacy,
efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0,
time: result.totalTime,
});
}

console.log(colors.gray(` Analysis saved to: ${analysisPath}`));
} catch (analysisError) {
Expand Down
90 changes: 90 additions & 0 deletions tests/unit/export.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import type { RunResult, AnalysisResult } from '../../src/lib/types.js';

// Mock @inquirer/prompts before importing the module under test
vi.mock('@inquirer/prompts', () => ({
select: vi.fn(),
input: vi.fn(),
}));

const { promptExport } = await import('../../src/lib/export.js');

function makeRunResult(): RunResult {
return {
id: 'test-run-001',
model: 'anthropic',
modelVersion: 'claude-3',
challenge: 'gatekeeper',
startTime: new Date(),
endTime: new Date(),
success: false,
flag: null,
totalTime: 10,
iterations: 1,
tokens: { input: 0, output: 0, total: 0 },
steps: [],
techniquesUsed: [],
tacticBreakdown: {},
methodologies: [],
toolsUsed: [],
methodologyBreakdown: {},
};
}

function makeAnalysis(overrides: Partial<AnalysisResult> = {}): AnalysisResult {
return {
runId: 'test-run-001',
analyzedAt: new Date(),
analyzerModel: 'claude-3',
attackChain: { phases: [], techniques: [], killChainCoverage: [] },
narrative: { summary: 'Test', detailed: 'Test', keyFindings: [] },
behavior: { approach: 'methodical', approachDescription: '', strengths: [], inefficiencies: [], decisionQuality: 50 },
strategy: { reconQuality: 50, exploitEfficiency: 50, adaptability: 50, overallScore: 50, scoreBreakdown: '' },
...overrides,
};
}

describe('promptExport', () => {
let consoleSpy: ReturnType<typeof vi.spyOn>;

beforeEach(() => {
consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
});

it('skips export when analysis is undefined', async () => {
await promptExport(makeRunResult(), undefined);
const output = consoleSpy.mock.calls.map(c => c[0]).join('\n');
expect(output).toContain('No analysis available');
expect(output).toContain('oasis analyze test-run-001');
});

it('skips export when analysis.parseFailed is true', async () => {
const analysis = makeAnalysis({ parseFailed: true });
await promptExport(makeRunResult(), analysis);
const output = consoleSpy.mock.calls.map(c => c[0]).join('\n');
expect(output).toContain('No analysis available');
expect(output).toContain('oasis analyze test-run-001');
});

it('does not skip export for valid analysis (parseFailed undefined)', async () => {
const { select } = await import('@inquirer/prompts');
(select as ReturnType<typeof vi.fn>).mockResolvedValueOnce('done');

const analysis = makeAnalysis();
await promptExport(makeRunResult(), analysis);
const output = consoleSpy.mock.calls.map(c => c[0]).join('\n');
expect(output).not.toContain('No analysis available');
expect(output).toContain('oasis report test-run-001');
});

it('does not skip export for explicit parseFailed: false', async () => {
const { select } = await import('@inquirer/prompts');
(select as ReturnType<typeof vi.fn>).mockResolvedValueOnce('done');

const analysis = makeAnalysis({ parseFailed: false });
await promptExport(makeRunResult(), analysis);
const output = consoleSpy.mock.calls.map(c => c[0]).join('\n');
expect(output).not.toContain('No analysis available');
expect(output).toContain('oasis report test-run-001');
});
});