Skip to content

Commit a5d800d

Browse files
committed
duplicate the imm8 preference logic to SolutionNode, so that it is also used in asm exporting
1 parent 95fa3d5 commit a5d800d

1 file changed

Lines changed: 23 additions & 2 deletions

File tree

vxsort/smallsort/codegen/src/bitonic_super_optimizer.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,29 @@ def __repr__(self):
189189
return f"SolutionNode(stage={self.stage}, gadgets={len(self.gadgets)}, cost={self.cost}, children={len(self.children)})"
190190

191191
def best_gadget(self) -> PermutationGadget:
192-
"""Return the gadget with fewest instructions."""
193-
return min(self.gadgets, key=lambda g: g.instruction_count())
192+
"""Return the gadget with fewest instructions, preferring imm8 over control vectors."""
193+
def gadget_sort_key(gadget: PermutationGadget) -> tuple[int, int]:
194+
"""Sort key: (instruction_count, control_vector_count).
195+
196+
Prefer gadgets with fewer instructions first, then among equal-cost
197+
gadgets prefer those with fewer control vector instructions (which
198+
require additional YMM/ZMM registers).
199+
"""
200+
control_count = 0
201+
for inst in gadget.top_instructions + gadget.bottom_instructions:
202+
# Check if instruction uses a control vector instead of immediate
203+
if "op_idx" in inst.args:
204+
# permutexvar family - uses control vector
205+
control_count += 1
206+
elif "mask" in inst.args and inst.intrinsic_name.endswith("v_ps"):
207+
# blendv_ps - uses variable mask (256-bit control)
208+
control_count += 1
209+
elif "b" in inst.args and "permutevar" in inst.intrinsic_name:
210+
# permutevar_ps - 'b' is control vector
211+
control_count += 1
212+
return (gadget.instruction_count(), control_count)
213+
214+
return min(self.gadgets, key=gadget_sort_key)
194215

195216

196217
class GadgetSynthesizer:

0 commit comments

Comments
 (0)