feat: select wave target by fewest outputs, tiebreak by success rate

damageboy · damageboy · commit c159b2d5ffc9 · 2026-03-23T10:24:30.000+01:00
Change weakest_stage to prioritize stages with the fewest distinct
outputs (the bottleneck), breaking ties by lowest success rate
(distinct_outputs/attempts). This avoids sunk-cost traps where a
hard stage with low success rate keeps getting targeted over
underexplored stages.

Also: print immediate feedback on Ctrl-C before clean shutdown.
diff --git a/vxsort/smallsort/codegen/src/transition_table.py b/vxsort/smallsort/codegen/src/transition_table.py
@@ -203,32 +203,33 @@ def stage_stats(self, stage: int) -> dict:
         }
 
     def weakest_stage(self, exclude_exhausted: set[int] | None = None) -> int | None:
-        """Return the index of the stage with the lowest success rate.
+        """Return the stage most in need of exploration.
+
+        Selection criteria (in priority order):
+        1. Fewest distinct outputs (the bottleneck stage)
+        2. Lowest success rate (distinct_outputs / attempts) as tiebreaker
+        3. Earliest stage index as final tiebreaker
 
         Parameters
         ----------
         exclude_exhausted :
             Stage indices to skip.  If all stages are excluded (or the
             table is empty), returns ``None``.
-
-        Ties are broken by earliest stage index.  Stages with zero attempts
-        are treated as having a success rate of 0.0.
         """
         excluded = exclude_exhausted or set()
         best_idx: int | None = None
-        best_rate = float("inf")
+        best_key = (float("inf"), float("inf"))
 
         for i, sd in enumerate(self.stages):
             if i in excluded:
                 continue
 
-            if sd.attempts > 0:
-                rate = len(sd.unique_outputs) / sd.attempts
-            else:
-                rate = 0.0
+            n_outputs = len(sd.unique_outputs)
+            rate = n_outputs / sd.attempts if sd.attempts > 0 else 0.0
+            key = (n_outputs, rate)
 
-            if rate < best_rate:
-                best_rate = rate
+            if key < best_key:
+                best_key = key
                 best_idx = i
 
         return best_idx
diff --git a/vxsort/smallsort/codegen/src/wave_engine.py b/vxsort/smallsort/codegen/src/wave_engine.py
@@ -837,6 +837,12 @@ def run(self) -> dict:
 
         def _sigint_handler(_signum, _frame):
             self._interrupted = True
+            import sys
+
+            print(
+                "\nCtrl-C detected — finishing current work and saving checkpoint...",
+                file=sys.stderr,
+            )
 
         signal.signal(signal.SIGINT, _sigint_handler)
 
diff --git a/vxsort/smallsort/codegen/tests/test_transition_table.py b/vxsort/smallsort/codegen/tests/test_transition_table.py
@@ -386,23 +386,23 @@ def test_all_empty_returns_first(self):
         tt = TransitionTable(num_stages=3)
         assert tt.weakest_stage() == 0
 
-    def test_returns_lowest_success_rate(self):
+    def test_returns_fewest_outputs(self):
         tt = TransitionTable(num_stages=3)
         inp = _vs([0, 1, 2, 3], [4, 5, 6, 7])
 
-        # Stage 0: 2 outputs from 4 attempts => 50%
+        # Stage 0: 2 outputs from 4 attempts
         tt.record_attempt(0, count=4)
         out0a = _vs([0, 1, 2, 3], [4, 5, 6, 7])
         out0b = _vs([1, 0, 3, 2], [5, 4, 7, 6])
         tt.add_transition(0, inp, out0a, _make_gadget(top_args={"ctrl": 1}))
         tt.add_transition(0, inp, out0b, _make_gadget(top_args={"ctrl": 2}))
 
-        # Stage 1: 1 output from 4 attempts => 25%
+        # Stage 1: 1 output from 4 attempts — fewest outputs, selected
         tt.record_attempt(1, count=4)
         out1a = _vs([0, 1, 2, 3], [4, 5, 6, 7])
         tt.add_transition(1, inp, out1a, _make_gadget(top_args={"ctrl": 3}))
 
-        # Stage 2: 3 outputs from 4 attempts => 75%
+        # Stage 2: 3 outputs from 4 attempts
         tt.record_attempt(2, count=4)
         out2a = _vs([0, 1, 2, 3], [4, 5, 6, 7])
         out2b = _vs([1, 0, 3, 2], [5, 4, 7, 6])
@@ -413,6 +413,22 @@ def test_returns_lowest_success_rate(self):
 
         assert tt.weakest_stage() == 1
 
+    def test_tiebreak_by_success_rate(self):
+        """When two stages have the same output count, pick the one struggling more."""
+        tt = TransitionTable(num_stages=2)
+        inp = _vs([0, 1, 2, 3], [4, 5, 6, 7])
+        out_a = _vs([1, 0, 3, 2], [5, 4, 7, 6])
+
+        # Stage 0: 1 output from 10 attempts (10% rate)
+        tt.record_attempt(0, count=10)
+        tt.add_transition(0, inp, out_a, _make_gadget(top_args={"ctrl": 1}))
+
+        # Stage 1: 1 output from 100 attempts (1% rate) — same outputs, worse rate
+        tt.record_attempt(1, count=100)
+        tt.add_transition(1, inp, out_a, _make_gadget(top_args={"ctrl": 2}))
+
+        assert tt.weakest_stage() == 1  # same outputs, lower success rate
+
     def test_tie_breaks_by_earliest_stage(self):
         tt = TransitionTable(num_stages=3)
         inp = _vs([0, 1, 2, 3], [4, 5, 6, 7])