Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/main/java/dev/talos/cli/modes/ExecutionOutcome.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import dev.talos.runtime.outcome.MutationFailureAnswerRenderer;
import dev.talos.runtime.outcome.MutationOutcome;
import dev.talos.runtime.outcome.NoToolAnswerTruthfulnessGuard;
import dev.talos.runtime.outcome.PathExistenceAnswerRenderer;
import dev.talos.runtime.outcome.ProtectedReadAnswerGuard;
import dev.talos.runtime.outcome.ReadOnlyToolLimitOutcome;
import dev.talos.runtime.outcome.StaticVerificationAnswerRenderer;
Expand Down Expand Up @@ -286,6 +287,12 @@ static ExecutionOutcome fromToolLoop(
messages,
loopResult,
workspace);
current = PathExistenceAnswerRenderer.prependVerifiedStatusIfNeeded(
current,
safePlan,
evidenceObligation,
evidenceResult,
workspace);
}
ReadOnlyToolLimitOutcome readOnlyToolLimit = ReadOnlyToolLimitOutcome.assess(
contract,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package dev.talos.runtime.outcome;

import dev.talos.runtime.policy.EvidenceObligation;
import dev.talos.runtime.policy.EvidenceObligationVerifier;
import dev.talos.runtime.task.TaskContract;
import dev.talos.runtime.toolcall.ToolCallSupport;
import dev.talos.runtime.turn.CurrentTurnPlan;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Set;

/** Renders deterministic file-existence facts once path-existence evidence is satisfied. */
public final class PathExistenceAnswerRenderer {
private static final String PREFIX = "[Path existence verified]";

private PathExistenceAnswerRenderer() {}

public static String prependVerifiedStatusIfNeeded(
String answer,
CurrentTurnPlan plan,
EvidenceObligation obligation,
EvidenceObligationVerifier.Result evidenceResult,
Path workspace
) {
String current = answer == null ? "" : answer;
if (current.startsWith(PREFIX)) return current;
if (obligation != EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED) return current;
if (evidenceResult == null || evidenceResult.status() != EvidenceObligationVerifier.Status.SATISFIED) {
return current;
}
if (workspace == null) return current;

List<String> targets = sortedTargets(plan == null ? null : plan.taskContract());
if (targets.isEmpty()) return current;

Path root;
try {
root = workspace.toAbsolutePath().normalize();
} catch (RuntimeException e) {
return current;
}

List<String> lines = new ArrayList<>();
for (String target : targets) {
String status = status(root, target);
if (status.isBlank()) continue;
lines.add(target + ": " + status);
}
if (lines.isEmpty()) return current;

String summary = PREFIX + "\n- " + String.join("\n- ", lines);
return current.isBlank() ? summary : summary + "\n\n" + current;
}

private static List<String> sortedTargets(TaskContract contract) {
if (contract == null) return List.of();
Set<String> targets = contract.sourceEvidenceTargets().isEmpty()
? contract.expectedTargets()
: contract.sourceEvidenceTargets();
if (targets == null || targets.isEmpty()) return List.of();
return targets.stream()
.map(ToolCallSupport::normalizePath)
.map(String::strip)
.filter(target -> !target.isBlank())
.distinct()
.sorted(Comparator.comparing((String target) -> target.toLowerCase(Locale.ROOT))
.thenComparing(Comparator.naturalOrder()))
.toList();
}

private static String status(Path root, String target) {
Path resolved = resolve(root, target);
if (resolved == null) return "outside workspace";
return Files.exists(resolved) ? "exists" : "not found";

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Use no-follow checks when verifying path existence

In workspaces containing a broken symlink, this reports <link>: not found even though the path entry itself exists and talos.list_dir evidence would show it. Files.exists(resolved) follows the symlink target by default, so a broken link is treated the same as an absent path; path-existence answers should check the directory entry itself, e.g. with LinkOption.NOFOLLOW_LINKS or an explicit symbolic-link check.

Useful? React with 👍 / 👎.

}

private static Path resolve(Path root, String target) {
if (root == null || target == null || target.isBlank()) return null;
try {
Path candidate = Path.of(target);
Path resolved = candidate.isAbsolute() ? candidate : root.resolve(candidate);
resolved = resolved.toAbsolutePath().normalize();
return resolved.startsWith(root) ? resolved : null;
} catch (RuntimeException e) {
return null;
}
}
}
115 changes: 115 additions & 0 deletions src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3118,6 +3118,121 @@ void attemptedProtectedReadFailureDoesNotReportNoToolAttempt() {
assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE));
}

@Test
void pathExistenceAnswerPrependsExactStatusWhenListDirEvidenceIsSatisfied() throws Exception {
Path ws = Files.createTempDirectory("talos-path-existence-summary-");
try {
Files.writeString(ws.resolve("scripts.js"), "console.log('present');\n");
Files.writeString(ws.resolve("styles.css"), "body { color: red; }\n");

var messages = new ArrayList<ChatMessage>();
messages.add(ChatMessage.system("sys"));
messages.add(ChatMessage.user(
"Check whether scripts.js exists and whether script.js exists. Do not change anything."));

var plan = dev.talos.runtime.turn.CurrentTurnPlan.create(
dev.talos.runtime.task.TaskContractResolver.fromMessages(messages),
dev.talos.runtime.phase.ExecutionPhase.INSPECT,
List.of("talos.list_dir", "talos.read_file"),
List.of("talos.list_dir", "talos.read_file"),
List.of());

var loopResult = new ToolCallLoop.LoopResult(
"I checked the files.",
1,
1,
List.of("talos.list_dir"),
List.of(),
0,
0,
false,
0,
List.of(),
0,
0,
0,
0,
List.of(new ToolCallLoop.ToolOutcome(
"talos.list_dir", ".", true, false, false,
"scripts.js\nstyles.css\n", "")));

ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop(
loopResult.finalAnswer(), plan, messages, loopResult, ws, 0);

assertEquals(ExecutionOutcome.CompletionStatus.COMPLETE, outcome.completionStatus());
assertEquals(TaskCompletionStatus.READ_ONLY_ANSWERED, outcome.taskOutcome().completionStatus());
assertTrue(outcome.finalAnswer().startsWith("[Path existence verified]"),
outcome.finalAnswer());
assertTrue(outcome.finalAnswer().contains("scripts.js: exists"), outcome.finalAnswer());
assertTrue(outcome.finalAnswer().contains("script.js: not found"), outcome.finalAnswer());
assertFalse(outcome.finalAnswer().startsWith("[Evidence incomplete:"), outcome.finalAnswer());
assertFalse(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE));
} finally {
try (var walk = Files.walk(ws)) {
walk.sorted(Comparator.reverseOrder()).forEach(path -> {
try { Files.deleteIfExists(path); } catch (Exception ignored) { }
});
}
}
}

@Test
void pathExistenceAnswerWithOnlyIrrelevantReadEvidenceRemainsContained() throws Exception {
Path ws = Files.createTempDirectory("talos-path-existence-irrelevant-read-");
try {
Files.writeString(ws.resolve("scripts.js"), "console.log('present');\n");
Files.writeString(ws.resolve("styles.css"), "body { color: red; }\n");

var messages = new ArrayList<ChatMessage>();
messages.add(ChatMessage.system("sys"));
messages.add(ChatMessage.user(
"Check whether scripts.js exists and whether script.js exists. Do not change anything."));

var plan = dev.talos.runtime.turn.CurrentTurnPlan.create(
dev.talos.runtime.task.TaskContractResolver.fromMessages(messages),
dev.talos.runtime.phase.ExecutionPhase.INSPECT,
List.of("talos.list_dir", "talos.read_file"),
List.of("talos.list_dir", "talos.read_file"),
List.of());

var loopResult = new ToolCallLoop.LoopResult(
"scripts.js does not exist.",
1,
1,
List.of("talos.read_file"),
List.of(),
1,
0,
false,
0,
List.of("styles.css"),
0,
0,
0,
0,
List.of(new ToolCallLoop.ToolOutcome(
"talos.read_file", "styles.css", true, false, false,
"body { color: red; }", "")));

ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop(
loopResult.finalAnswer(), plan, messages, loopResult, ws, 0);

assertEquals(ExecutionOutcome.CompletionStatus.ADVISORY_ONLY, outcome.completionStatus());
assertTrue(outcome.finalAnswer().startsWith(
"[Evidence incomplete: required workspace evidence was not gathered in this turn.]"),
outcome.finalAnswer());
assertFalse(outcome.finalAnswer().contains("scripts.js does not exist"), outcome.finalAnswer());
assertFalse(outcome.finalAnswer().contains("scripts.js: exists"), outcome.finalAnswer());
assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE));
} finally {
try (var walk = Files.walk(ws)) {
walk.sorted(Comparator.reverseOrder()).forEach(path -> {
try { Files.deleteIfExists(path); } catch (Exception ignored) { }
});
}
}
}

@Test
void listOnlyWithReadFileIsAdvisoryWithMissingEvidenceWarning() {
var messages = new ArrayList<ChatMessage>();
Expand Down