diff --git a/src/main/java/dev/talos/cli/modes/ReadEvidenceHandoff.java b/src/main/java/dev/talos/cli/modes/ReadEvidenceHandoff.java index 03b9ae65..f173339c 100644 --- a/src/main/java/dev/talos/cli/modes/ReadEvidenceHandoff.java +++ b/src/main/java/dev/talos/cli/modes/ReadEvidenceHandoff.java @@ -131,7 +131,8 @@ static Result readEvidenceRecoveryForPartialTargetsIfNeeded( safePlan, workspace, ctx == null ? null : ctx.cfg()); - if (obligation != EvidenceObligation.READ_TARGET_REQUIRED) { + if (obligation != EvidenceObligation.READ_TARGET_REQUIRED + && obligation != EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED) { return new Result(answer, null, null); } if (contract.mutationRequested() || contract.mutationAllowed()) { diff --git a/src/main/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuard.java b/src/main/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuard.java index 7a2727fb..abd07db3 100644 --- a/src/main/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuard.java +++ b/src/main/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuard.java @@ -77,6 +77,10 @@ private static String missingEvidenceContainmentMessage( "I did not inspect the required workspace target this turn, so I cannot " + "answer from its contents or propose grounded changes yet." + targetSentence(plan); + case PATH_EXISTENCE_EVIDENCE_REQUIRED -> + "I did not gather directory or target-read evidence for the requested path " + + "existence check, so I cannot answer whether those files exist yet." + + targetSentence(plan); case LIST_DIRECTORY_ONLY -> "I did not complete a directory-list-only evidence path this turn. " + "I cannot answer with file contents or derived file claims from " diff --git a/src/main/java/dev/talos/runtime/policy/CurrentTurnCapabilityFrame.java b/src/main/java/dev/talos/runtime/policy/CurrentTurnCapabilityFrame.java index c5e63f2b..d3423f51 100644 --- a/src/main/java/dev/talos/runtime/policy/CurrentTurnCapabilityFrame.java +++ b/src/main/java/dev/talos/runtime/policy/CurrentTurnCapabilityFrame.java @@ -398,6 +398,9 @@ private static String promptPreview(String value) { private static String evidenceGuidance(EvidenceObligation evidence) { return switch (evidence) { case READ_TARGET_REQUIRED -> "Evidence: read the named target before answering."; + case PATH_EXISTENCE_EVIDENCE_REQUIRED -> + "Evidence: verify path existence with talos.list_dir for the parent directory " + + "or talos.read_file for each named target before answering."; case PROTECTED_READ_APPROVAL_REQUIRED -> "Evidence: the named target is protected. " + "Call talos.read_file for the protected target; runtime will request approval. " diff --git a/src/main/java/dev/talos/runtime/policy/EvidenceGate.java b/src/main/java/dev/talos/runtime/policy/EvidenceGate.java index 52341d14..89bde7b5 100644 --- a/src/main/java/dev/talos/runtime/policy/EvidenceGate.java +++ b/src/main/java/dev/talos/runtime/policy/EvidenceGate.java @@ -39,6 +39,7 @@ public static EvidenceObligation selectObligation(CurrentTurnPlan plan, Path wor public static boolean requiresReadEvidenceHandoff(EvidenceObligation obligation) { return obligation == EvidenceObligation.READ_TARGET_REQUIRED + || obligation == EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED || obligation == EvidenceObligation.PROTECTED_READ_APPROVAL_REQUIRED || obligation == EvidenceObligation.UNSUPPORTED_CAPABILITY_CHECK_REQUIRED; } @@ -70,7 +71,8 @@ public static List handoffTargets( } else if (obligation == EvidenceObligation.UNSUPPORTED_CAPABILITY_CHECK_REQUIRED && isUnsupportedExpectedTarget(target, cfg)) { targets.add(target); - } else if (obligation == EvidenceObligation.READ_TARGET_REQUIRED && !protectedTarget) { + } else if ((obligation == EvidenceObligation.READ_TARGET_REQUIRED + || obligation == EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED) && !protectedTarget) { targets.add(target); } } diff --git a/src/main/java/dev/talos/runtime/policy/EvidenceObligation.java b/src/main/java/dev/talos/runtime/policy/EvidenceObligation.java index 4b0e6959..915e9ef7 100644 --- a/src/main/java/dev/talos/runtime/policy/EvidenceObligation.java +++ b/src/main/java/dev/talos/runtime/policy/EvidenceObligation.java @@ -5,6 +5,7 @@ public enum EvidenceObligation { NONE, LIST_DIRECTORY_ONLY, READ_TARGET_REQUIRED, + PATH_EXISTENCE_EVIDENCE_REQUIRED, PROTECTED_READ_APPROVAL_REQUIRED, WORKSPACE_INSPECTION_REQUIRED, STATIC_WEB_DIAGNOSIS_REQUIRED, diff --git a/src/main/java/dev/talos/runtime/policy/EvidenceObligationPolicy.java b/src/main/java/dev/talos/runtime/policy/EvidenceObligationPolicy.java index 78acc08d..c1b7bdaf 100644 --- a/src/main/java/dev/talos/runtime/policy/EvidenceObligationPolicy.java +++ b/src/main/java/dev/talos/runtime/policy/EvidenceObligationPolicy.java @@ -45,6 +45,9 @@ public static EvidenceObligation derive( if (!contract.mutationAllowed() && hasProtectedExpectedTarget(contract, workspace)) { return EvidenceObligation.PROTECTED_READ_APPROVAL_REQUIRED; } + if (hasReadOnlyPathExistenceObligation(contract)) { + return EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED; + } if (hasStaticWebDiagnosisObligation(contract, type)) { return EvidenceObligation.STATIC_WEB_DIAGNOSIS_REQUIRED; } @@ -128,6 +131,23 @@ private static boolean hasStaticWebDiagnosisObligation(TaskContract contract, Ta || lower.contains("button"); } + private static boolean hasReadOnlyPathExistenceObligation(TaskContract contract) { + if (contract == null || contract.mutationAllowed() || contract.expectedTargets().isEmpty()) { + return false; + } + String request = contract.originalUserRequest(); + if (request == null || request.isBlank()) return false; + String lower = request.toLowerCase(Locale.ROOT); + boolean asksExistence = lower.contains("exists") + || lower.contains("exist") + || lower.contains("present") + || lower.contains("is there") + || lower.contains("are there"); + boolean asksPathStatus = lower.contains("path") + && (lower.contains("check") || lower.contains("verify") || lower.contains("whether")); + return asksExistence || asksPathStatus; + } + private static boolean isStaticWebTarget(String target) { if (target == null || target.isBlank()) return false; String lower = target.replace('\\', '/').toLowerCase(Locale.ROOT); diff --git a/src/main/java/dev/talos/runtime/policy/EvidenceObligationVerifier.java b/src/main/java/dev/talos/runtime/policy/EvidenceObligationVerifier.java index aba55d7f..c1becfc0 100644 --- a/src/main/java/dev/talos/runtime/policy/EvidenceObligationVerifier.java +++ b/src/main/java/dev/talos/runtime/policy/EvidenceObligationVerifier.java @@ -78,6 +78,7 @@ public static Result verify( case NONE -> Result.satisfied("No workspace evidence was required."); case LIST_DIRECTORY_ONLY -> verifyListDirectoryOnly(safeOutcomes); case READ_TARGET_REQUIRED -> verifyReadTargets(targets, safeOutcomes, false); + case PATH_EXISTENCE_EVIDENCE_REQUIRED -> verifyPathExistenceTargets(targets, safeOutcomes); case PROTECTED_READ_APPROVAL_REQUIRED -> verifyProtectedRead(targets, safeOutcomes); case STATIC_WEB_DIAGNOSIS_REQUIRED -> verifyStaticWebDiagnosis(targets, safeOutcomes, workspace); case WORKSPACE_INSPECTION_REQUIRED, VERIFY_FROM_TRACE_OR_EVIDENCE -> @@ -190,6 +191,46 @@ private static Result verifyProtectedRead(Set expectedTargets, List expectedTargets, + List outcomes + ) { + if (outcomes.isEmpty()) { + return Result.unsatisfied("Path existence evidence was not gathered."); + } + return aggregateTargetResults( + expectedTargets, + target -> verifyPathExistenceTarget(target, outcomes), + "Path existence evidence was gathered."); + } + + private static Result verifyPathExistenceTarget( + String expectedTarget, + List outcomes + ) { + String expected = normalizePath(expectedTarget); + for (ToolCallLoop.ToolOutcome outcome : outcomes) { + if (!"talos.read_file".equals(canonicalToolName(outcome.toolName()))) continue; + if (!expected.equals(normalizePath(outcome.pathHint()))) continue; + if (outcome.denied()) { + return Result.blocked("Path existence read was blocked by approval."); + } + return Result.satisfied("Path existence evidence was gathered."); + } + String expectedParent = parentDirectory(expected); + for (ToolCallLoop.ToolOutcome outcome : outcomes) { + if (!"talos.list_dir".equals(canonicalToolName(outcome.toolName()))) continue; + if (outcome.denied()) { + return Result.blocked("Path existence directory listing was blocked by approval."); + } + if (!outcome.success()) continue; + if (expectedParent.equals(normalizeDirectory(outcome.pathHint()))) { + return Result.satisfied("Path existence evidence was gathered."); + } + } + return Result.unsatisfied("Path existence evidence was not gathered for " + expectedTarget + "."); + } + private static Result verifyReadTarget( String expectedTarget, List outcomes, @@ -490,6 +531,19 @@ private static String normalizePath(String path) { return normalized; } + private static String normalizeDirectory(String path) { + String normalized = normalizePath(path); + return normalized.isBlank() ? "." : normalized; + } + + private static String parentDirectory(String normalizedPath) { + String normalized = normalizePath(normalizedPath); + int slash = normalized.lastIndexOf('/'); + if (slash < 0) return "."; + String parent = normalized.substring(0, slash); + return parent.isBlank() ? "." : parent; + } + private static String canonicalToolName(String toolName) { ToolAliasPolicy.Decision decision = ToolAliasPolicy.resolve(toolName); if (decision.accepted() && decision.canonicalToolName() != null && !decision.canonicalToolName().isBlank()) { diff --git a/src/test/java/dev/talos/cli/modes/ReadEvidenceHandoffTest.java b/src/test/java/dev/talos/cli/modes/ReadEvidenceHandoffTest.java index f637d792..5292e03e 100644 --- a/src/test/java/dev/talos/cli/modes/ReadEvidenceHandoffTest.java +++ b/src/test/java/dev/talos/cli/modes/ReadEvidenceHandoffTest.java @@ -163,6 +163,60 @@ void partialTargetRecoveryDoesNotRetryAfterDeniedEvidenceTarget(@TempDir Path wo assertNull(result.extraSummary()); } + @Test + void pathExistenceRecoveryRunsAfterIrrelevantReadEvidence(@TempDir Path workspace) throws Exception { + Files.writeString(workspace.resolve("scripts.js"), "console.log('present');\n"); + Files.writeString(workspace.resolve("styles.css"), "body { color: red; }\n"); + Context ctx = context(workspace, "Path existence answer after deterministic handoff."); + List messages = messages( + "Check whether scripts.js exists and whether script.js exists. Do not change anything."); + CurrentTurnPlan plan = plan( + new TaskContract( + TaskType.DIAGNOSE_ONLY, + false, + false, + false, + Set.of("scripts.js", "script.js"), + Set.of(), + "Check whether scripts.js exists and whether script.js exists. Do not change anything."), + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED); + ToolCallLoop.LoopResult irrelevantRead = new ToolCallLoop.LoopResult( + "scripts.js does not exist.", + 1, + 1, + List.of("talos.read_file"), + messages, + 1, + 0, + false, + 0, + List.of("styles.css"), + 0, + 0, + 0, + 0, + List.of(new ToolCallLoop.ToolOutcome( + "talos.read_file", + "styles.css", + true, + false, + false, + "body { color: red; }", + ""))); + + ReadEvidenceHandoff.Result result = ReadEvidenceHandoff.readEvidenceRecoveryForPartialTargetsIfNeeded( + "scripts.js does not exist.", + messages, + plan, + irrelevantRead, + workspace, + ctx); + + assertNotNull(result.loopResult(), "path existence should recover from irrelevant read evidence"); + assertEquals("Path existence answer after deterministic handoff.", result.answer()); + assertTrue(result.extraSummary().contains("talos.read_file"), result.extraSummary()); + } + private static CurrentTurnPlan plan(TaskContract contract, EvidenceObligation obligation) { return new CurrentTurnPlan( contract, diff --git a/src/test/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuardTest.java b/src/test/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuardTest.java index fd2225ca..f409c017 100644 --- a/src/test/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuardTest.java +++ b/src/test/java/dev/talos/runtime/outcome/EvidenceContainmentAnswerGuardTest.java @@ -47,6 +47,27 @@ I did not inspect the required workspace target this turn, so I cannot answer fr assertFalse(answer.contains("Proposed change"), answer); } + @Test + void pathExistenceMissingEvidenceSuppressesFabricatedExistenceAnswer() { + String answer = EvidenceContainmentAnswerGuard.containMissingEvidence( + "scripts.js does not exist and script.js exists.", + pathExistencePlan(), + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + EvidenceObligationVerifier.Result.unsatisfied( + "Path existence evidence was not gathered for scripts.js."), + MARKERS); + + assertTrue(answer.startsWith(EvidenceObligationVerifier.MISSING_EVIDENCE_PREFIX), answer); + assertTrue(answer.contains( + "I did not gather directory or target-read evidence for the requested path existence check"), + answer); + assertTrue(answer.contains("Required target(s):"), answer); + assertTrue(answer.contains("scripts.js"), answer); + assertTrue(answer.contains("script.js"), answer); + assertFalse(answer.contains("scripts.js does not exist"), answer); + assertFalse(answer.contains("script.js exists"), answer); + } + @Test void protectedReadNotAttemptedSuppressesFabricatedProtectedBody() { String answer = EvidenceContainmentAnswerGuard.containMissingEvidence( @@ -164,4 +185,21 @@ private static CurrentTurnPlan readTargetPlan(String target) { List.of("talos.read_file"), List.of()); } + + private static CurrentTurnPlan pathExistencePlan() { + TaskContract contract = new TaskContract( + TaskType.DIAGNOSE_ONLY, + false, + false, + false, + Set.of("scripts.js", "script.js"), + Set.of(), + "Check whether scripts.js exists and whether script.js exists. Do not change anything."); + return CurrentTurnPlan.create( + contract, + ExecutionPhase.INSPECT, + List.of("talos.list_dir", "talos.read_file"), + List.of("talos.list_dir", "talos.read_file"), + List.of()); + } } diff --git a/src/test/java/dev/talos/runtime/policy/EvidenceGateTest.java b/src/test/java/dev/talos/runtime/policy/EvidenceGateTest.java index d1160d08..c2bbe3b9 100644 --- a/src/test/java/dev/talos/runtime/policy/EvidenceGateTest.java +++ b/src/test/java/dev/talos/runtime/policy/EvidenceGateTest.java @@ -68,6 +68,27 @@ void readTargetHandoffSkipsProtectedTargets(@TempDir Path workspace) { assertFalse(targets.contains(".env"), targets.toString()); } + @Test + void pathExistenceHandoffUsesNamedNonProtectedTargets(@TempDir Path workspace) { + TaskContract contract = new TaskContract( + TaskType.DIAGNOSE_ONLY, + false, + false, + false, + Set.of("scripts.js", "script.js"), + Set.of(), + "Check whether scripts.js exists and whether script.js exists. Do not change anything."); + + assertTrue(EvidenceGate.requiresReadEvidenceHandoff( + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED)); + assertEquals( + Set.of("scripts.js", "script.js"), + Set.copyOf(EvidenceGate.handoffTargets( + contract, + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + workspace))); + } + @Test void protectedReadHandoffRequiresExplicitReadIntent(@TempDir Path workspace) { TaskContract readEnv = new TaskContract( diff --git a/src/test/java/dev/talos/runtime/policy/EvidenceObligationPolicyTest.java b/src/test/java/dev/talos/runtime/policy/EvidenceObligationPolicyTest.java index f3e5a900..7c0361cc 100644 --- a/src/test/java/dev/talos/runtime/policy/EvidenceObligationPolicyTest.java +++ b/src/test/java/dev/talos/runtime/policy/EvidenceObligationPolicyTest.java @@ -75,6 +75,16 @@ void staticWebDiagnosisRequiresStaticWebDiagnosisEvidence() { EvidenceObligationPolicy.derive(contract, ExecutionPhase.INSPECT, WORKSPACE)); } + @Test + void fileExistenceQuestionRequiresPathExistenceEvidenceBeforeStaticWebDiagnosis() { + TaskContract contract = TaskContractResolver.fromUserRequest( + "Check whether scripts.js exists and whether script.js exists. Do not change anything."); + + assertEquals( + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + EvidenceObligationPolicy.derive(contract, ExecutionPhase.INSPECT, WORKSPACE)); + } + @Test void extractableDocumentTargetRequiresNormalReadEvidence() { TaskContract contract = TaskContractResolver.fromUserRequest("Read report.docx and summarize it."); diff --git a/src/test/java/dev/talos/runtime/policy/EvidenceObligationVerifierTest.java b/src/test/java/dev/talos/runtime/policy/EvidenceObligationVerifierTest.java index 708485e9..b27f5769 100644 --- a/src/test/java/dev/talos/runtime/policy/EvidenceObligationVerifierTest.java +++ b/src/test/java/dev/talos/runtime/policy/EvidenceObligationVerifierTest.java @@ -208,6 +208,46 @@ void listOnlyRejectsRetrieve() { assertEquals(EvidenceObligationVerifier.Status.UNSATISFIED, result.status()); } + @Test + void pathExistenceRejectsIrrelevantReadEvidence() { + var result = EvidenceObligationVerifier.verify( + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + Set.of("scripts.js", "script.js"), + List.of(new ToolCallLoop.ToolOutcome( + "talos.read_file", "styles.css", true, false, false, + "body { color: red; }", ""))); + + assertEquals(EvidenceObligationVerifier.Status.UNSATISFIED, result.status()); + } + + @Test + void pathExistenceAcceptsParentDirectoryListingEvidence() { + var result = EvidenceObligationVerifier.verify( + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + Set.of("scripts.js", "script.js"), + List.of(new ToolCallLoop.ToolOutcome( + "talos.list_dir", ".", true, false, false, + "index.html\nscripts.js\nstyles.css\n", ""))); + + assertEquals(EvidenceObligationVerifier.Status.SATISFIED, result.status()); + } + + @Test + void pathExistenceAcceptsDirectTargetReadAttempts() { + var result = EvidenceObligationVerifier.verify( + EvidenceObligation.PATH_EXISTENCE_EVIDENCE_REQUIRED, + Set.of("scripts.js", "script.js"), + List.of( + new ToolCallLoop.ToolOutcome( + "talos.read_file", "scripts.js", true, false, false, + "console.log('ok');", ""), + new ToolCallLoop.ToolOutcome( + "talos.read_file", "script.js", false, false, false, + "", "script.js was not found.", null, ToolError.NOT_FOUND))); + + assertEquals(EvidenceObligationVerifier.Status.SATISFIED, result.status()); + } + @Test void staticWebDiagnosisRejectsDirectoryListingOnlyWhenIndexIsPresent() { var result = EvidenceObligationVerifier.verify(