From 265fb3205f9f17ec9b2f315ccae7e65395767deb Mon Sep 17 00:00:00 2001 From: Vissarion Zounarakis Date: Sun, 31 May 2026 18:59:57 +0200 Subject: [PATCH] [T619] Render grounded path existence answers --- .../dev/talos/cli/modes/ExecutionOutcome.java | 7 ++ .../outcome/PathExistenceAnswerRenderer.java | 93 ++++++++++++++ .../talos/cli/modes/ExecutionOutcomeTest.java | 115 ++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 src/main/java/dev/talos/runtime/outcome/PathExistenceAnswerRenderer.java diff --git a/src/main/java/dev/talos/cli/modes/ExecutionOutcome.java b/src/main/java/dev/talos/cli/modes/ExecutionOutcome.java index 5378eb9f..386f2698 100644 --- a/src/main/java/dev/talos/cli/modes/ExecutionOutcome.java +++ b/src/main/java/dev/talos/cli/modes/ExecutionOutcome.java @@ -9,6 +9,7 @@ import dev.talos.runtime.outcome.MutationFailureAnswerRenderer; import dev.talos.runtime.outcome.MutationOutcome; import dev.talos.runtime.outcome.NoToolAnswerTruthfulnessGuard; +import dev.talos.runtime.outcome.PathExistenceAnswerRenderer; import dev.talos.runtime.outcome.ProtectedReadAnswerGuard; import dev.talos.runtime.outcome.ReadOnlyToolLimitOutcome; import dev.talos.runtime.outcome.StaticVerificationAnswerRenderer; @@ -286,6 +287,12 @@ static ExecutionOutcome fromToolLoop( messages, loopResult, workspace); + current = PathExistenceAnswerRenderer.prependVerifiedStatusIfNeeded( + current, + safePlan, + evidenceObligation, + evidenceResult, + workspace); } ReadOnlyToolLimitOutcome readOnlyToolLimit = ReadOnlyToolLimitOutcome.assess( contract, diff --git a/src/main/java/dev/talos/runtime/outcome/PathExistenceAnswerRenderer.java b/src/main/java/dev/talos/runtime/outcome/PathExistenceAnswerRenderer.java new file mode 100644 index 00000000..d5cad972 --- /dev/null +++ b/src/main/java/dev/talos/runtime/outcome/PathExistenceAnswerRenderer.java @@ -0,0 +1,93 @@ +package dev.talos.runtime.outcome; + +import dev.talos.runtime.policy.EvidenceObligation; +import dev.talos.runtime.policy.EvidenceObligationVerifier; +import dev.talos.runtime.task.TaskContract; +import dev.talos.runtime.toolcall.ToolCallSupport; +import dev.talos.runtime.turn.CurrentTurnPlan; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +/** Renders deterministic file-existence facts once path-existence evidence is satisfied. */ +public final class PathExistenceAnswerRenderer { + private static final String PREFIX = "[Path existence verified]"; + + private PathExistenceAnswerRenderer() {} + + public static String prependVerifiedStatusIfNeeded( + String answer, + CurrentTurnPlan plan, + EvidenceObligation obligation, + EvidenceObligationVerifier.Result evidenceResult, + Path workspace + ) { + String current = answer == null ? "" : answer; + if (current.startsWith(PREFIX)) return current; + if (obligation != EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED) return current; + if (evidenceResult == null || evidenceResult.status() != EvidenceObligationVerifier.Status.SATISFIED) { + return current; + } + if (workspace == null) return current; + + List targets = sortedTargets(plan == null ? null : plan.taskContract()); + if (targets.isEmpty()) return current; + + Path root; + try { + root = workspace.toAbsolutePath().normalize(); + } catch (RuntimeException e) { + return current; + } + + List lines = new ArrayList<>(); + for (String target : targets) { + String status = status(root, target); + if (status.isBlank()) continue; + lines.add(target + ": " + status); + } + if (lines.isEmpty()) return current; + + String summary = PREFIX + "\n- " + String.join("\n- ", lines); + return current.isBlank() ? summary : summary + "\n\n" + current; + } + + private static List sortedTargets(TaskContract contract) { + if (contract == null) return List.of(); + Set targets = contract.sourceEvidenceTargets().isEmpty() + ? contract.expectedTargets() + : contract.sourceEvidenceTargets(); + if (targets == null || targets.isEmpty()) return List.of(); + return targets.stream() + .map(ToolCallSupport::normalizePath) + .map(String::strip) + .filter(target -> !target.isBlank()) + .distinct() + .sorted(Comparator.comparing((String target) -> target.toLowerCase(Locale.ROOT)) + .thenComparing(Comparator.naturalOrder())) + .toList(); + } + + private static String status(Path root, String target) { + Path resolved = resolve(root, target); + if (resolved == null) return "outside workspace"; + return Files.exists(resolved) ? "exists" : "not found"; + } + + private static Path resolve(Path root, String target) { + if (root == null || target == null || target.isBlank()) return null; + try { + Path candidate = Path.of(target); + Path resolved = candidate.isAbsolute() ? candidate : root.resolve(candidate); + resolved = resolved.toAbsolutePath().normalize(); + return resolved.startsWith(root) ? resolved : null; + } catch (RuntimeException e) { + return null; + } + } +} diff --git a/src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java b/src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java index 31fa429c..b16111e9 100644 --- a/src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java +++ b/src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java @@ -3118,6 +3118,121 @@ void attemptedProtectedReadFailureDoesNotReportNoToolAttempt() { assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE)); } + @Test + void pathExistenceAnswerPrependsExactStatusWhenListDirEvidenceIsSatisfied() throws Exception { + Path ws = Files.createTempDirectory("talos-path-existence-summary-"); + try { + Files.writeString(ws.resolve("scripts.js"), "console.log('present');\n"); + Files.writeString(ws.resolve("styles.css"), "body { color: red; }\n"); + + var messages = new ArrayList(); + messages.add(ChatMessage.system("sys")); + messages.add(ChatMessage.user( + "Check whether scripts.js exists and whether script.js exists. Do not change anything.")); + + var plan = dev.talos.runtime.turn.CurrentTurnPlan.create( + dev.talos.runtime.task.TaskContractResolver.fromMessages(messages), + dev.talos.runtime.phase.ExecutionPhase.INSPECT, + List.of("talos.list_dir", "talos.read_file"), + List.of("talos.list_dir", "talos.read_file"), + List.of()); + + var loopResult = new ToolCallLoop.LoopResult( + "I checked the files.", + 1, + 1, + List.of("talos.list_dir"), + List.of(), + 0, + 0, + false, + 0, + List.of(), + 0, + 0, + 0, + 0, + List.of(new ToolCallLoop.ToolOutcome( + "talos.list_dir", ".", true, false, false, + "scripts.js\nstyles.css\n", ""))); + + ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop( + loopResult.finalAnswer(), plan, messages, loopResult, ws, 0); + + assertEquals(ExecutionOutcome.CompletionStatus.COMPLETE, outcome.completionStatus()); + assertEquals(TaskCompletionStatus.READ_ONLY_ANSWERED, outcome.taskOutcome().completionStatus()); + assertTrue(outcome.finalAnswer().startsWith("[Path existence verified]"), + outcome.finalAnswer()); + assertTrue(outcome.finalAnswer().contains("scripts.js: exists"), outcome.finalAnswer()); + assertTrue(outcome.finalAnswer().contains("script.js: not found"), outcome.finalAnswer()); + assertFalse(outcome.finalAnswer().startsWith("[Evidence incomplete:"), outcome.finalAnswer()); + assertFalse(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE)); + } finally { + try (var walk = Files.walk(ws)) { + walk.sorted(Comparator.reverseOrder()).forEach(path -> { + try { Files.deleteIfExists(path); } catch (Exception ignored) { } + }); + } + } + } + + @Test + void pathExistenceAnswerWithOnlyIrrelevantReadEvidenceRemainsContained() throws Exception { + Path ws = Files.createTempDirectory("talos-path-existence-irrelevant-read-"); + try { + Files.writeString(ws.resolve("scripts.js"), "console.log('present');\n"); + Files.writeString(ws.resolve("styles.css"), "body { color: red; }\n"); + + var messages = new ArrayList(); + messages.add(ChatMessage.system("sys")); + messages.add(ChatMessage.user( + "Check whether scripts.js exists and whether script.js exists. Do not change anything.")); + + var plan = dev.talos.runtime.turn.CurrentTurnPlan.create( + dev.talos.runtime.task.TaskContractResolver.fromMessages(messages), + dev.talos.runtime.phase.ExecutionPhase.INSPECT, + List.of("talos.list_dir", "talos.read_file"), + List.of("talos.list_dir", "talos.read_file"), + List.of()); + + var loopResult = new ToolCallLoop.LoopResult( + "scripts.js does not exist.", + 1, + 1, + List.of("talos.read_file"), + List.of(), + 1, + 0, + false, + 0, + List.of("styles.css"), + 0, + 0, + 0, + 0, + List.of(new ToolCallLoop.ToolOutcome( + "talos.read_file", "styles.css", true, false, false, + "body { color: red; }", ""))); + + ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop( + loopResult.finalAnswer(), plan, messages, loopResult, ws, 0); + + assertEquals(ExecutionOutcome.CompletionStatus.ADVISORY_ONLY, outcome.completionStatus()); + assertTrue(outcome.finalAnswer().startsWith( + "[Evidence incomplete: required workspace evidence was not gathered in this turn.]"), + outcome.finalAnswer()); + assertFalse(outcome.finalAnswer().contains("scripts.js does not exist"), outcome.finalAnswer()); + assertFalse(outcome.finalAnswer().contains("scripts.js: exists"), outcome.finalAnswer()); + assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.MISSING_EVIDENCE)); + } finally { + try (var walk = Files.walk(ws)) { + walk.sorted(Comparator.reverseOrder()).forEach(path -> { + try { Files.deleteIfExists(path); } catch (Exception ignored) { } + }); + } + } + } + @Test void listOnlyWithReadFileIsAdvisoryWithMissingEvidenceWarning() { var messages = new ArrayList();