Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
"dotenv": "^16.3.1",
"gradient-string": "^3.0.0",
"openai": "^6.25.0",
"ora": "^8.1.1"
"ora": "^8.1.1",
"zod": "^4.3.6"
},
"devDependencies": {
"@types/gradient-string": "^1.1.6",
Expand Down
65 changes: 35 additions & 30 deletions src/lib/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@ import {
finalizeRubricScore,
fallbackOverallScore,
} from './scoring.js';
import { AnalysisResponseSchema } from './schemas.js';
import { MAX_COMPLETION_TOKENS, ANALYZER_OUTPUT_LIMIT } from './constants.js';
import { withRateLimitRetry } from './retry.js';
import { isAnthropicProvider, resolveProvider, resolveProviderName } from './providers.js';
import { isAnthropicProvider, resolveProvider } from './providers.js';
import { normalizeProvider } from './config.js';

// =============================================================================
// Configuration
// =============================================================================

const DEFAULT_ANALYZER_MODEL = 'claude-sonnet-4-5-20250929';
const MAX_OUTPUT_PER_STEP = 500;
const MAX_OUTPUT_PER_STEP = ANALYZER_OUTPUT_LIMIT;

// =============================================================================
// System Prompt
Expand Down Expand Up @@ -229,7 +232,7 @@ Return ONLY the JSON object, no other text.`;
// Response Parser
// =============================================================================

async function parseAnalysisResponse(
export async function parseAnalysisResponse(
response: string,
runId: string,
result: RunResult,
Expand All @@ -243,39 +246,41 @@ async function parseAnalysisResponse(
jsonStr = jsonStr.trim();

try {
const parsed = JSON.parse(jsonStr);
const parsed = AnalysisResponseSchema.parse(JSON.parse(jsonStr));

const analysisResult: AnalysisResult = {
runId,
analyzedAt: new Date(),
analyzerModel: DEFAULT_ANALYZER_MODEL,
attackChain: {
phases: parsed.attackChain?.phases || [],
techniques: parsed.attackChain?.techniques || [],
killChainCoverage: parsed.attackChain?.killChainCoverage || [],
phases: parsed.attackChain.phases,
techniques: parsed.attackChain.techniques,
killChainCoverage: parsed.attackChain.killChainCoverage,
},
narrative: {
summary: parsed.narrative?.summary || 'Analysis unavailable',
detailed: parsed.narrative?.detailed || '',
keyFindings: parsed.narrative?.keyFindings || [],
summary: parsed.narrative.summary,
detailed: parsed.narrative.detailed,
keyFindings: parsed.narrative.keyFindings,
},
behavior: {
approach: parsed.behavior?.approach || 'exploratory',
approachDescription: parsed.behavior?.approachDescription || '',
strengths: parsed.behavior?.strengths || [],
inefficiencies: parsed.behavior?.inefficiencies || [],
decisionQuality: parsed.behavior?.decisionQuality || 0,
approach: (['methodical', 'aggressive', 'exploratory', 'targeted'].includes(parsed.behavior.approach)
? parsed.behavior.approach as 'methodical' | 'aggressive' | 'exploratory' | 'targeted'
: 'exploratory'),
approachDescription: parsed.behavior.approachDescription,
strengths: parsed.behavior.strengths,
inefficiencies: parsed.behavior.inefficiencies,
decisionQuality: parsed.behavior.decisionQuality,
},
strategy: {
reconQuality: parsed.strategy?.reconQuality ?? 0,
exploitEfficiency: parsed.strategy?.exploitEfficiency ?? 0,
adaptability: parsed.strategy?.adaptability ?? 0,
overallScore: parsed.strategy?.overallScore || fallbackOverallScore(
parsed.strategy?.reconQuality ?? 0,
parsed.strategy?.exploitEfficiency ?? 0,
parsed.strategy?.adaptability ?? 0,
reconQuality: parsed.strategy.reconQuality,
exploitEfficiency: parsed.strategy.exploitEfficiency,
adaptability: parsed.strategy.adaptability,
overallScore: parsed.strategy.overallScore ?? fallbackOverallScore(
parsed.strategy.reconQuality,
parsed.strategy.exploitEfficiency,
parsed.strategy.adaptability,
),
scoreBreakdown: parsed.strategy?.scoreBreakdown ?? '',
scoreBreakdown: parsed.strategy.scoreBreakdown,
},
};

Expand Down Expand Up @@ -393,17 +398,17 @@ function buildRubricScore(
milestones: { results: milestoneResults, achieved: [], points: 0 },
qualitative: {
reconQuality: {
score: Math.min(llmEval.qualitative?.reconQuality?.score || 0, scoring.qualitative.reconQuality.maxPoints),
score: Math.min(llmEval.qualitative?.reconQuality?.score ?? 0, scoring.qualitative.reconQuality.maxPoints),
maxPoints: scoring.qualitative.reconQuality.maxPoints,
reasoning: llmEval.qualitative?.reconQuality?.reasoning || '',
},
techniqueSelection: {
score: Math.min(llmEval.qualitative?.techniqueSelection?.score || 0, scoring.qualitative.techniqueSelection.maxPoints),
score: Math.min(llmEval.qualitative?.techniqueSelection?.score ?? 0, scoring.qualitative.techniqueSelection.maxPoints),
maxPoints: scoring.qualitative.techniqueSelection.maxPoints,
reasoning: llmEval.qualitative?.techniqueSelection?.reasoning || '',
},
adaptability: {
score: Math.min(llmEval.qualitative?.adaptability?.score || 0, scoring.qualitative.adaptability.maxPoints),
score: Math.min(llmEval.qualitative?.adaptability?.score ?? 0, scoring.qualitative.adaptability.maxPoints),
maxPoints: scoring.qualitative.adaptability.maxPoints,
reasoning: llmEval.qualitative?.adaptability?.reasoning || '',
},
Expand Down Expand Up @@ -443,8 +448,8 @@ export function resolveDefaultAnalyzerModel(analyzerProvider: string, benchmarkR
const preset = resolveProvider(analyzerProvider);

// Same provider as benchmark — use the benchmark model since we know it's available
const benchmarkProvider = resolveProviderName(benchmarkResult.model);
if (benchmarkProvider === resolveProviderName(analyzerProvider)) {
const benchmarkProvider = normalizeProvider(benchmarkResult.model);
if (benchmarkProvider === normalizeProvider(analyzerProvider)) {
return benchmarkResult.modelVersion || preset?.models[0] || DEFAULT_ANALYZER_MODEL;
}

Expand All @@ -459,7 +464,7 @@ async function callAnthropicAnalyzer(
const response = await withRateLimitRetry(
() => client.messages.create({
model,
max_tokens: 4096,
max_tokens: MAX_COMPLETION_TOKENS,
system: SYSTEM_PROMPT,
messages: [{ role: 'user', content: prompt }],
}),
Expand All @@ -480,7 +485,7 @@ async function callOpenAIAnalyzer(
const response = await withRateLimitRetry(
() => client.chat.completions.create({
model,
max_completion_tokens: 4096,
max_completion_tokens: MAX_COMPLETION_TOKENS,
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt },
Expand Down
49 changes: 11 additions & 38 deletions src/lib/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { existsSync, mkdirSync, readFileSync, writeFileSync, chmodSync, openSync, writeSync, closeSync, constants } from 'fs';
import { join, resolve } from 'path';
import { homedir } from 'os';
import { ConfigError } from './errors.js';
import { PROVIDERS, resolveProviderName } from './providers.js';

// XDG Base Directory compliant config path
function resolveConfigDir(): string {
Expand Down Expand Up @@ -48,7 +50,8 @@ export function loadConfig(): OasisConfig {
}
try {
return JSON.parse(readFileSync(CONFIG_FILE, 'utf-8'));
} catch {
} catch (error) {
console.error(new ConfigError(`Failed to load config from ${CONFIG_FILE}`, { error: String(error) }).message);
return {};
}
}
Expand Down Expand Up @@ -80,21 +83,6 @@ export function getConfigDir(): string {
return CONFIG_DIR;
}

// Run-ID validation and safe path resolution
const SAFE_RUN_ID_PATTERN = /^[A-Za-z0-9_-]+$/;

export function resolveResultPath(runId: string, suffix: '.json' | '.analysis.json' = '.json'): string {
if (!SAFE_RUN_ID_PATTERN.test(runId)) {
throw new Error(`Invalid run ID: "${runId}". Run IDs may only contain letters, numbers, hyphens, and underscores.`);
}
const resultsDir = resolve(getResultsDir());
const filePath = resolve(resultsDir, `${runId}${suffix}`);
if (!filePath.startsWith(resultsDir)) {
throw new Error(`Invalid run ID: "${runId}". Path escapes results directory.`);
}
return filePath;
}

// Registry URL resolution: config → env var → default
const DEFAULT_REGISTRY_URL = 'https://raw.githubusercontent.com/KryptSec/oasis-challenges/main/index.json';

Expand Down Expand Up @@ -130,7 +118,8 @@ export function loadCredentials(): OasisCredentials {
}
try {
return JSON.parse(readFileSync(CREDENTIALS_FILE, 'utf-8'));
} catch {
} catch (error) {
console.error(new ConfigError(`Failed to load credentials from ${CREDENTIALS_FILE}`, { error: String(error) }).message);
return { apiKeys: {} };
}
}
Expand Down Expand Up @@ -200,15 +189,8 @@ function getApiKeyFromEnv(provider: string): string | undefined {
return envVar ? process.env[envVar] : undefined;
}

// Provider normalization
export function normalizeProvider(provider: string): string {
const aliases: Record<string, string> = {
claude: 'anthropic',
grok: 'xai',
gemini: 'google',
};
return aliases[provider.toLowerCase()] || provider.toLowerCase();
}
// Provider normalization — delegates to providers.ts single source of truth
export { resolveProviderName as normalizeProvider } from './providers.js';

// Provider URLs (for ollama, custom endpoints)
export function getProviderUrl(provider: string): string | undefined {
Expand Down Expand Up @@ -238,22 +220,13 @@ export function listProviderUrls(): Record<string, string> {
return config.providerUrls || {};
}

// Default URLs for providers
const DEFAULT_PROVIDER_URLS: Record<string, string> = {
anthropic: 'https://api.anthropic.com',
openai: 'https://api.openai.com/v1',
xai: 'https://api.x.ai/v1',
google: 'https://generativelanguage.googleapis.com/v1beta/openai',
ollama: 'http://localhost:11434/v1',
};

export function getEffectiveProviderUrl(provider: string): string {
const normalized = normalizeProvider(provider);
const normalized = resolveProviderName(provider);
// Custom URL takes precedence
const customUrl = getProviderUrl(normalized);
if (customUrl) {
return customUrl;
}
// Fall back to default
return DEFAULT_PROVIDER_URLS[normalized] || '';
// Fall back to provider preset
return PROVIDERS[normalized]?.baseUrl || '';
}
21 changes: 21 additions & 0 deletions src/lib/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Named constants — replacing magic numbers across the codebase

// API limits
export const MAX_COMPLETION_TOKENS = 4096;

// Output truncation
export const STEP_OUTPUT_LIMIT = 10_000; // Stored in step records
export const TOOL_FEEDBACK_LIMIT = 50_000; // Sent back to model as context
export const ANALYZER_OUTPUT_LIMIT = 500; // In analysis prompts

// Timeouts (ms)
export const DOCKER_EXEC_TIMEOUT = 60_000;
export const DOCKER_WAIT_TIMEOUT = 30_000;
export const DOCKER_POLL_INTERVAL = 2_000;
export const DOCKER_STARTUP_POLL = 2_500;

// Display
export const VERBOSE_OUTPUT_PREVIEW = 2_000;

// Memory bounds
export const MAX_CONTEXT_MESSAGES = 40;
16 changes: 7 additions & 9 deletions src/lib/docker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
*/

import { execSync } from 'child_process';
import { shellEscape } from './shell.js';
import { DOCKER_WAIT_TIMEOUT, DOCKER_POLL_INTERVAL } from './constants.js';

export interface ContainerSpec {
challengeId: string;
Expand All @@ -15,11 +17,6 @@ export interface ContainerSpec {
targetContainerName: string;
}

/** Escape a string for safe inclusion in a shell command (single-quote wrapping). */
function shellEscape(s: string): string {
return "'" + s.replace(/'/g, "'\\''") + "'";
}

/**
* Pull a Docker image. Tries native platform first, falls back to linux/amd64
* if the image has no matching manifest (common for challenge images on Apple Silicon).
Expand All @@ -36,8 +33,9 @@ export function pullImage(image: string, onProgress?: (line: string) => void): b
encoding: 'utf-8',
});
return false;
} catch (err: any) {
const msg = err?.stderr || err?.message || '';
} catch (err: unknown) {
const eObj = err != null && typeof err === 'object' ? err as Record<string, unknown> : {};
const msg = String(eObj.stderr || eObj.message || '');
if (!msg.includes('no matching manifest') && !msg.includes('no match for platform')) {
throw err;
}
Expand Down Expand Up @@ -133,10 +131,10 @@ export function pullAndStartContainers(
export function waitForTarget(
kaliContainer: string,
targetUrl: string,
timeoutMs = 30000
timeoutMs = DOCKER_WAIT_TIMEOUT
): void {
const start = Date.now();
const pollInterval = 2000;
const pollInterval = DOCKER_POLL_INTERVAL;

while (Date.now() - start < timeoutMs) {
try {
Expand Down
Loading