From cfeba28acd9a83a07b06e2ee7636c0c10109856a Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Sun, 22 Mar 2026 16:01:38 -0500
Subject: [PATCH 1/7] fix: standardize token estimation heuristic and add
 consistency tests

---
 code_puppy/tools/file_operations.py           |  2 +-
 .../test_token_estimation_consistency.py      | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 tests/agents/test_token_estimation_consistency.py

diff --git a/code_puppy/tools/file_operations.py b/code_puppy/tools/file_operations.py
index 3796e6e49..d175f7680 100644
--- a/code_puppy/tools/file_operations.py
+++ b/code_puppy/tools/file_operations.py
@@ -513,7 +513,7 @@ def _read_file(
                 )
 
             # Simple approximation: ~4 characters per token
-            num_tokens = len(content) // 4
+            num_tokens = max(1, math.floor(len(content) / 2.5))
             if num_tokens > 10000:
                 return ReadFileOutput(
                     content=None,
diff --git a/tests/agents/test_token_estimation_consistency.py b/tests/agents/test_token_estimation_consistency.py
new file mode 100644
index 000000000..3f29d73a5
--- /dev/null
+++ b/tests/agents/test_token_estimation_consistency.py
@@ -0,0 +1,52 @@
+"""Tests for token estimation consistency across modules.
+
+Ensures file_operations._read_file and BaseAgent.estimate_token_count
+use the same chars-per-token heuristic to prevent unexpected early
+compaction triggered by estimation mismatch.
+"""
+
+import math
+
+from code_puppy.agents.agent_code_puppy import CodePuppyAgent
+
+
+class TestTokenEstimationConsistency:
+    """Token estimation should be consistent between file_operations and BaseAgent."""
+
+    def test_estimate_token_count_matches_file_operations_heuristic(self):
+        """
+        BaseAgent.estimate_token_count and file_operations._read_file
+        must use the same 2.5 chars/token heuristic.
+        """
+        agent = CodePuppyAgent()
+        content = "x" * 1000
+
+        base_agent_estimate = agent.estimate_token_count(content)
+        expected_heuristic = math.floor(len(content) / 2.5)
+
+        assert base_agent_estimate == expected_heuristic
+
+    def test_estimation_consistent_across_content_sizes(self):
+        """
+        Consistency holds across small, medium, and large content sizes.
+        """
+        agent = CodePuppyAgent()
+
+        for size in [100, 1000, 10000, 25000]:
+            content = "x" * size
+            base_agent_estimate = agent.estimate_token_count(content)
+            expected_heuristic = math.floor(len(content) / 2.5)
+            assert base_agent_estimate == expected_heuristic, (
+                f"Mismatch at size {size}: "
+                f"base_agent={base_agent_estimate}, "
+                f"expected={expected_heuristic}"
+            )
+
+    def test_minimum_token_count_is_one(self):
+        """
+        estimate_token_count enforces a minimum of 1 token even for empty content.
+        """
+        agent = CodePuppyAgent()
+
+        result = agent.estimate_token_count("")
+        assert result == 1
\ No newline at end of file

From 78ce419f3e63ea81f889cdb55193bfe564cb1e87 Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Sun, 22 Mar 2026 16:26:18 -0500
Subject: [PATCH 2/7] fix: fall back to non-recursive listing when ripgrep is
 not installed

---
 code_puppy/tools/file_operations.py           | 10 ++--
 .../tools/test_list_files_ripgrep_fallback.py | 48 +++++++++++++++++++
 2 files changed, 54 insertions(+), 4 deletions(-)
 create mode 100644 tests/tools/test_list_files_ripgrep_fallback.py

diff --git a/code_puppy/tools/file_operations.py b/code_puppy/tools/file_operations.py
index d175f7680..048250fed 100644
--- a/code_puppy/tools/file_operations.py
+++ b/code_puppy/tools/file_operations.py
@@ -193,10 +193,12 @@ def _list_files(
                     break
 
         if not rg_path and recursive:
-            # Only need ripgrep for recursive listings
-            error_msg = "Error: ripgrep (rg) not found. Please install ripgrep to use this tool."
-            return ListFileOutput(content=error_msg, error=error_msg)
-
+            # Fall back to non-recursive listing when ripgrep is not available
+            output_lines.append(
+                "Warning: ripgrep (rg) not found. Falling back to non-recursive listing. "
+                "Install ripgrep for full recursive support."
+            )
+            recursive = False
         # Only use ripgrep for recursive listings
         if recursive:
             # Build command for ripgrep --files
diff --git a/tests/tools/test_list_files_ripgrep_fallback.py b/tests/tools/test_list_files_ripgrep_fallback.py
new file mode 100644
index 000000000..079a246fc
--- /dev/null
+++ b/tests/tools/test_list_files_ripgrep_fallback.py
@@ -0,0 +1,48 @@
+"""Regression test for ripgrep fallback in _list_files.
+
+When ripgrep is not installed, _list_files should fall back to
+non-recursive os.listdir instead of returning an error.
+"""
+
+import os
+import tempfile
+from unittest.mock import patch
+
+from code_puppy.tools.file_operations import _list_files
+
+
+class TestListFilesRipgrepFallback:
+    """_list_files should gracefully handle missing ripgrep."""
+
+    def test_falls_back_when_ripgrep_not_found(self):
+        """
+        When ripgrep is not installed, _list_files should return
+        a non-recursive listing instead of an error.
+        """
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_file = os.path.join(tmpdir, "test.py")
+            with open(test_file, "w") as f:
+                f.write("print('hello')")
+
+            with patch("shutil.which", return_value=None):
+                result = _list_files(None, tmpdir, recursive=True)
+
+            # Should not return a hard error
+            assert result.content is not None
+            assert "not found" not in (result.content or "").lower() or "falling back" in (result.content or "").lower()
+            # Should still return file listing
+            assert "test.py" in result.content
+
+    def test_returns_files_without_ripgrep(self):
+        """
+        Files in the directory should be listed even without ripgrep.
+        """
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_file = os.path.join(tmpdir, "myfile.py")
+            with open(test_file, "w") as f:
+                f.write("x = 1")
+
+            with patch("shutil.which", return_value=None):
+                result = _list_files(None, tmpdir, recursive=True)
+
+            assert "myfile.py" in result.content
\ No newline at end of file

From b4a9938582fb4dae456e10be030765096a739e8b Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Sun, 22 Mar 2026 16:37:16 -0500
Subject: [PATCH 3/7] fix: add missing math import and update stale token
 estimation comment

---
 code_puppy/tools/file_operations.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/code_puppy/tools/file_operations.py b/code_puppy/tools/file_operations.py
index 048250fed..5e01787a1 100644
--- a/code_puppy/tools/file_operations.py
+++ b/code_puppy/tools/file_operations.py
@@ -1,5 +1,6 @@
 # file_operations.py
 
+import math
 import os
 import shutil
 import subprocess
@@ -514,7 +515,7 @@ def _read_file(
                     for char in content
                 )
 
-            # Simple approximation: ~4 characters per token
+            # Token estimation consistent with BaseAgent (~2.5 characters per token)
             num_tokens = max(1, math.floor(len(content) / 2.5))
             if num_tokens > 10000:
                 return ReadFileOutput(

From c2a6663eb8ad8a22cc8bacc33beb575d039956a8 Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Sun, 22 Mar 2026 17:12:05 -0500
Subject: [PATCH 4/7] fix: update test and stale comment to reflect ripgrep
 fallback behavior

---
 tests/tools/test_file_operations_coverage.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_file_operations_coverage.py b/tests/tools/test_file_operations_coverage.py
index e14d62773..79de8cfc7 100644
--- a/tests/tools/test_file_operations_coverage.py
+++ b/tests/tools/test_file_operations_coverage.py
@@ -268,8 +268,9 @@ def test_list_files_ripgrep_not_found_recursive(self, tmp_path):
         ):
             result = _list_files(None, str(tmp_path), recursive=True)
 
-        assert result.error is not None
-        assert "ripgrep" in result.error.lower() or "rg" in result.error.lower()
+        # Fallback behavior: warning in content, no hard error, files still listed
+        assert result.content is not None
+        assert result.error is None or "falling back" in (result.content or "").lower()
 
     def test_list_files_non_recursive_without_ripgrep(self, tmp_path):
         """Test non-recursive listing works without ripgrep."""

From d32b274f023c69b0cd8591740ab3215b28e2c22f Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Wed, 15 Apr 2026 19:01:46 -0500
Subject: [PATCH 5/7] Delete tests/agents/test_token_estimation_consistency.py

---
 .../test_token_estimation_consistency.py      | 52 -------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 tests/agents/test_token_estimation_consistency.py

diff --git a/tests/agents/test_token_estimation_consistency.py b/tests/agents/test_token_estimation_consistency.py
deleted file mode 100644
index 3f29d73a5..000000000
--- a/tests/agents/test_token_estimation_consistency.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""Tests for token estimation consistency across modules.
-
-Ensures file_operations._read_file and BaseAgent.estimate_token_count
-use the same chars-per-token heuristic to prevent unexpected early
-compaction triggered by estimation mismatch.
-"""
-
-import math
-
-from code_puppy.agents.agent_code_puppy import CodePuppyAgent
-
-
-class TestTokenEstimationConsistency:
-    """Token estimation should be consistent between file_operations and BaseAgent."""
-
-    def test_estimate_token_count_matches_file_operations_heuristic(self):
-        """
-        BaseAgent.estimate_token_count and file_operations._read_file
-        must use the same 2.5 chars/token heuristic.
-        """
-        agent = CodePuppyAgent()
-        content = "x" * 1000
-
-        base_agent_estimate = agent.estimate_token_count(content)
-        expected_heuristic = math.floor(len(content) / 2.5)
-
-        assert base_agent_estimate == expected_heuristic
-
-    def test_estimation_consistent_across_content_sizes(self):
-        """
-        Consistency holds across small, medium, and large content sizes.
-        """
-        agent = CodePuppyAgent()
-
-        for size in [100, 1000, 10000, 25000]:
-            content = "x" * size
-            base_agent_estimate = agent.estimate_token_count(content)
-            expected_heuristic = math.floor(len(content) / 2.5)
-            assert base_agent_estimate == expected_heuristic, (
-                f"Mismatch at size {size}: "
-                f"base_agent={base_agent_estimate}, "
-                f"expected={expected_heuristic}"
-            )
-
-    def test_minimum_token_count_is_one(self):
-        """
-        estimate_token_count enforces a minimum of 1 token even for empty content.
-        """
-        agent = CodePuppyAgent()
-
-        result = agent.estimate_token_count("")
-        assert result == 1
\ No newline at end of file

From b76f6dc25708b80ac6459e215e3e8097b82963aa Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Wed, 15 Apr 2026 19:16:13 -0500
Subject: [PATCH 6/7] style: apply ruff formatting

---
 tests/tools/test_list_files_ripgrep_fallback.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_list_files_ripgrep_fallback.py b/tests/tools/test_list_files_ripgrep_fallback.py
index 079a246fc..0cf091b6c 100644
--- a/tests/tools/test_list_files_ripgrep_fallback.py
+++ b/tests/tools/test_list_files_ripgrep_fallback.py
@@ -29,7 +29,10 @@ def test_falls_back_when_ripgrep_not_found(self):
 
             # Should not return a hard error
             assert result.content is not None
-            assert "not found" not in (result.content or "").lower() or "falling back" in (result.content or "").lower()
+            assert (
+                "not found" not in (result.content or "").lower()
+                or "falling back" in (result.content or "").lower()
+            )
             # Should still return file listing
             assert "test.py" in result.content
 
@@ -45,4 +48,4 @@ def test_returns_files_without_ripgrep(self):
             with patch("shutil.which", return_value=None):
                 result = _list_files(None, tmpdir, recursive=True)
 
-            assert "myfile.py" in result.content
\ No newline at end of file
+            assert "myfile.py" in result.content

From 1b46d6d5ce3c572bb9baa1ff157a0c11e5412018 Mon Sep 17 00:00:00 2001
From: Ryan Hicks <ryanoflucas@gmail.com>
Date: Wed, 15 Apr 2026 19:21:52 -0500
Subject: [PATCH 7/7] fix: update tests for 2.5 chars/token heuristic

---
 tests/tools/test_file_operations_extended.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/tools/test_file_operations_extended.py b/tests/tools/test_file_operations_extended.py
index e535c702f..11cd384bf 100644
--- a/tests/tools/test_file_operations_extended.py
+++ b/tests/tools/test_file_operations_extended.py
@@ -75,7 +75,7 @@ def test_read_file_line_range_out_of_bounds(self, tmp_path):
 
         assert result.error is None
         assert result.content == ""  # Should return empty string
-        assert result.num_tokens == 0
+        assert result.num_tokens == 1
 
     def test_read_file_line_range_negative_start(self, tmp_path):
         """Test reading with negative start line is rejected."""
@@ -124,7 +124,7 @@ def test_read_file_empty_file(self, tmp_path):
 
         assert result.error is None
         assert result.content == ""
-        assert result.num_tokens == 0
+        assert result.num_tokens == 1
 
     # ==================== LIST FILES TESTS ====================
 
@@ -430,7 +430,7 @@ def test_read_large_file_with_token_limit(self, tmp_path):
         """Test that large files are handled and tokens are counted."""
         test_file = tmp_path / "large.txt"
         # Create file with 500 lines
-        lines = [f"Line {i}: " + ("x" * 50) for i in range(500)]
+        lines = [f"Line {i}: " + ("x" * 30) for i in range(400)]
         test_file.write_text("\n".join(lines))
 
         result = _read_file(None, str(test_file))