Skip to content
Closed

test #17

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
98f0ba7
draft
ttomsa Dec 13, 2025
51e1292
cleanup test_encodings
ttomsa Dec 20, 2025
678a6b3
cleanup test_isel
ttomsa Dec 20, 2025
edb592f
model flag state and support rematerialization
ttomsa Dec 20, 2025
b8f0697
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Dec 20, 2025
54396f5
woops
ttomsa Dec 20, 2025
8365bc8
add vbroadcastss instruction
ttomsa Dec 20, 2025
32942f1
don't fuse load if used multiple times in src
ttomsa Dec 21, 2025
1271433
add movabs instruction and fix idiv
ttomsa Dec 23, 2025
1eca96e
fixes
ttomsa Jan 1, 2026
885172f
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Jan 1, 2026
8d4a48f
add x86 backend to tests
ttomsa Jan 1, 2026
5872599
float16 fix
ttomsa Jan 1, 2026
f4309a3
rm TwoAddress2nd
ttomsa Jan 1, 2026
bcd8b2b
add BARRIER
ttomsa Jan 2, 2026
f92e2d2
test windows ci
ttomsa Jan 2, 2026
c005ab0
yup isel fixes the mask stuff too and its beautiful
ttomsa Jan 4, 2026
243f6c8
add cmoves to the spec
ttomsa Jan 4, 2026
d0d3272
support storing imms
ttomsa Jan 5, 2026
138e20a
no TUPLE_ORDER, breaks tests
ttomsa Jan 5, 2026
0fe5d75
fix remaining seg faults
ttomsa Jan 5, 2026
f9b2f51
Merge branch 'master' into new_x86_backend
ttomsa Jan 6, 2026
b4f8d64
add float max
ttomsa Jan 7, 2026
7ab9908
always fuse index
ttomsa Jan 7, 2026
423f7e6
minor
ttomsa Jan 10, 2026
0daa1d9
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Jan 10, 2026
c133d3b
fix DEFINE_VAR/SPECIAL and enable multithreading
ttomsa Jan 11, 2026
7bafe52
linter
ttomsa Jan 11, 2026
a5e1897
more linter
ttomsa Jan 11, 2026
7864067
more
ttomsa Jan 11, 2026
ff5f071
more
ttomsa Jan 11, 2026
5a61a10
more
ttomsa Jan 11, 2026
609d938
let's try this
ttomsa Jan 12, 2026
037c824
perhaps
ttomsa Jan 12, 2026
1fe4185
start new scheduler
ttomsa Jan 26, 2026
dd48f6a
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Jan 26, 2026
f8ade82
more scheduling info
ttomsa Jan 29, 2026
c1b2816
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Jan 29, 2026
3fcde08
cleaner shuffle functions
ttomsa Jan 30, 2026
db3ed92
fixup isel tests
ttomsa Jan 31, 2026
6f97710
skip bounds check when NOOPs exist
ttomsa Jan 31, 2026
f1327eb
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Jan 31, 2026
b53bcb3
skip inf rewrite tests
ttomsa Jan 31, 2026
a198cb5
fix const tag hack and add x86ops to _shape
ttomsa Feb 1, 2026
f0234b9
fix
ttomsa Feb 1, 2026
983f7a2
skip a few tests
ttomsa Feb 1, 2026
e9f2e89
Merge branch 'master' into new_x86_backend
ttomsa Feb 2, 2026
0ae5c5e
func arg order independent from op value
ttomsa Feb 2, 2026
77a28ac
x86 goes in own linearize
ttomsa Feb 3, 2026
69a27d9
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Feb 3, 2026
4c3081b
switch to PARAM
ttomsa Feb 3, 2026
bbe012a
more
ttomsa Feb 3, 2026
74e3d9f
add min x86op and neg in decomps
ttomsa Feb 4, 2026
6de9da8
Merge branch 'master' into new_x86_backend
ttomsa Feb 4, 2026
93022ac
do mulacc in isel
ttomsa Feb 6, 2026
4d6ed29
use def_reg in test_encodings
ttomsa Feb 6, 2026
1d8a277
enable emulated int64 tests
ttomsa Feb 6, 2026
a3d1f84
how much does this fix
ttomsa Feb 6, 2026
c4c69d8
Ops becomes OpType
ttomsa Feb 6, 2026
74e24d5
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Feb 6, 2026
e2d49fa
fix
ttomsa Feb 6, 2026
fdaad71
rm noqa
ttomsa Feb 6, 2026
ef76bfa
rm machine scheduler stuff
ttomsa Feb 6, 2026
733789e
and this
ttomsa Feb 6, 2026
5c2b0b2
allow for extending enums and move X86Ops out of uop
ttomsa Feb 8, 2026
fe2b08b
fix imports
ttomsa Feb 8, 2026
6ff6778
Merge branch 'master' into new_x86_backend
ttomsa Feb 8, 2026
e1bf9c9
rm X86GroupOp from ops.py
ttomsa Feb 8, 2026
78171c4
spacing
ttomsa Feb 8, 2026
f0565ed
tell mypy to shut up
ttomsa Feb 8, 2026
80e68f3
more linter
ttomsa Feb 8, 2026
8785570
add x86op test
ttomsa Feb 8, 2026
86b5441
allow set[X86Ops] in upat
ttomsa Feb 8, 2026
ce31a4f
move NOOPs to pre_isel_matcher and rm NOOP from spec
ttomsa Feb 10, 2026
9fbf64e
more asserts
ttomsa Feb 10, 2026
0681e33
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Feb 10, 2026
b32bafe
also this
ttomsa Feb 10, 2026
72f341a
cleanup encode
ttomsa Feb 10, 2026
d1c28c2
simplify live range
ttomsa Feb 17, 2026
194d498
fix idiv
ttomsa Feb 17, 2026
1f140d9
Merge remote-tracking branch 'upstream/master' into new_x86_backend
ttomsa Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: [llvm, cpu, opencl, lvp]
backend: [llvm, cpu, opencl, lvp, x86]

name: Linux (${{ matrix.backend }})
runs-on: ubuntu-22.04
Expand All @@ -759,7 +759,7 @@ jobs:
llvm: ${{ matrix.backend == 'llvm' || matrix.backend == 'lvp' }}
mesa: ${{ matrix.backend == 'lvp' && 'true' }}
- name: Set env
run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'opencl' && 'CL=1' || matrix.backend == 'lvp' && 'CPU=1\nCPU_LVP=1' }}" >> $GITHUB_ENV
run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'opencl' && 'CL=1' || matrix.backend == 'lvp' && 'CPU=1\nCPU_LVP=1' || matrix.backend == 'x86' && 'CPU=1\nCPU_X86=1' }}" >> $GITHUB_ENV
- name: Check Device.DEFAULT and print some source
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CPU','CL'], Device.DEFAULT"
Expand Down Expand Up @@ -910,7 +910,7 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: [llvm, cpu, webgpu]
backend: [llvm, cpu, webgpu, x86]

name: Windows (${{ matrix.backend }})
runs-on: windows-latest
Expand All @@ -926,7 +926,7 @@ jobs:
pydeps: ${{ matrix.backend == 'webgpu' && 'dawn-python' || '' }}
- name: Set env
shell: bash
run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'webgpu' && 'WEBGPU=1'}}" >> $GITHUB_ENV
run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'webgpu' && 'WEBGPU=1' || matrix.backend == 'x86' && 'CPU=1\nCPU_X86=1' }}" >> $GITHUB_ENV
- name: Run unit tests
if: matrix.backend=='llvm'
# test_newton_schulz hits RecursionError
Expand All @@ -938,7 +938,7 @@ jobs:
- name: Run pytest (${{ matrix.backend }})
shell: bash
run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT"
python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU', 'X86':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT"
python -m pytest -n=auto test/test_tiny.py test/backend/test_ops.py --durations=20

# ****** Compile-only Tests ******
Expand Down
2 changes: 2 additions & 0 deletions test/backend/test_linearizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.cstyle import CUDARenderer
from tinygrad.renderer.isa.x86 import X86Renderer
MOCKGPU = getenv("MOCKGPU")

from tinygrad.uop.ops import print_uops # noqa: F401 # pylint: disable=unused-import
Expand Down Expand Up @@ -376,6 +377,7 @@ def test_assign_fold(self):
np.testing.assert_equal(a.flatten().numpy(), [1.,1.,1.,1.,2.,2.,2.,2.,1.,1.,1.,1.,1.,1.,1.,1.])

@unittest.skipIf(MOCKGPU and isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, CUDARenderer)), "PTX indexes differently. might be ok?")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, X86Renderer), "this will work once cast to bool becomes cmpne 0")
def test_where_fold(self):
a = Tensor.ones(4, 4).contiguous().realize()
b = a.shrink(((1, 2), None)).pad(((1, 2), None))
Expand Down
3 changes: 2 additions & 1 deletion test/backend/test_tensor_variable.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
import numpy as np
from tinygrad import Tensor, Variable
from tinygrad import Tensor, Variable, Device
from tinygrad.renderer.isa.x86 import X86Renderer

class TestTensorVariable(unittest.TestCase):
def test_add_tvar(self):
Expand Down
2 changes: 2 additions & 0 deletions test/backend/test_uops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tinygrad.device import is_dtype_supported
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.isa.x86 import X86Renderer
from test.helpers import to_uops_list
from dataclasses import replace

Expand Down Expand Up @@ -267,6 +268,7 @@ def test_use_cmpeq(self):
self.assertNotIn(Ops.CMPNE, ops)

class TestZeroRange(unittest.TestCase):
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, X86Renderer), "range check is done at the end so 1 iter always happens, skip for now")
def test_reduce_variable(self):
for i in range(3,-1,-1):
v = UOp.variable("i", 0, 5).bind(i)
Expand Down
4 changes: 2 additions & 2 deletions test/null/test_opts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
from tinygrad import Tensor, Device
from tinygrad.helpers import CPU_LLVM, CPU_LVP
from tinygrad.helpers import CPU_LLVM, CPU_LVP, CPU_X86
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.engine.realize import get_program

Expand All @@ -12,7 +12,7 @@ def test_opt_upcast(self):
out = (a+b).contiguous(arg=opts)
s = out.schedule()
self.assertEqual(s[-1].ast.arg.opts_to_apply, opts)
if Device.DEFAULT in {"CPU", "CL", "METAL"} and not CPU_LLVM and not CPU_LVP:
if Device.DEFAULT in {"CPU", "CL", "METAL"} and not CPU_LLVM and not CPU_LVP and not CPU_X86:
prg = get_program(s[-1].ast, renderer=Device[Device.DEFAULT].renderer)
self.assertIn('float4', prg.src)

Expand Down
147 changes: 147 additions & 0 deletions test/unit/test_encodings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import unittest
from tinygrad.uop.ops import UOp, Ops
from tinygrad.dtype import dtypes
from tinygrad.helpers import SPEC
from tinygrad.renderer.isa.x86 import X86Ops, X86Renderer, RBP, RDI, RSP, RSI, RAX, RDX, XMM, GPR, imm, def_reg

@unittest.skipIf(SPEC > 1, "x86 spec not supported in full_spec")
class TestEncodingsX86(unittest.TestCase):
# NOTE: x86 supports a single displacement as memory address and index without base memory address
# these have no use cases so they aren't supported
def encode(self, u:UOp): return X86Renderer().render([u], lower=False)

# displacement of 0 isn't emitted
def test_base_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RDI), UOp(Ops.NOOP), imm(dtypes.int8, 0)), RDI)
# mov edi, dword ptr [rdi]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 3F"))

# rsp/r12 require a sib byte when used as base memory address
def test_rsp_base_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RSP), UOp(Ops.NOOP), imm(dtypes.int8, 0)), RSP)
# mov esp, dword ptr [rsp]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 24 24"))

# rbp/r13 require a displacement when used as base memory address
def test_rbp_base_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RBP), UOp(Ops.NOOP), imm(dtypes.int8, 0)), RBP)
# mov ebp, dword ptr [rbp + 0]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 6D 00"))

# test [base + index*scale]
def test_base_index_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RAX), def_reg(dtypes.int32, RDX), imm(dtypes.int8, 0)), RAX)
# mov eax, dword ptr [rax + rdx*4]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 04 90"))

# rsp as index means no index
def test_rsp_index_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RAX), def_reg(dtypes.int32, RSP), imm(dtypes.int8, 0)), RAX)
# mov eax, dword ptr [rax]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 00"))

# however r12 is a valid index
def test_r12_index_address(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RAX), def_reg(dtypes.int32, GPR[12]), imm(dtypes.int8, 0)), RAX)
# mov eax, dword ptr [rax + r12*4]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("42 8B 04 A0"))

# test [base + index*scale + 8bit disp]
def test_complex_address_8bit_disp(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RDI), def_reg(dtypes.int32, RSI), imm(dtypes.int8, 10)), RDI)
# mov edi, dword ptr [rdi + rsi*4 + 0xa]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B 7C B7 0A"))

# test [base + index*scale + 32bit disp]
def test_complex_address_32bit_disp(self):
load = UOp(X86Ops.MOV, dtypes.int32, (def_reg(dtypes.int32.ptr(), RDI), def_reg(dtypes.int32, RSI), imm(dtypes.int32, 10000)), RDI)
# mov edi, dword ptr [rdi + rsi*4 + 0x2710]
self.assertEqual(bytes.fromhex(self.encode(load)), bytes.fromhex("8B BC B7 10 27 00 00"))

# 8bit variants of legacy instructions subtract 1 from opcode
def test_8bit_legacy_encoding(self):
cast = UOp(X86Ops.MOVSX, dtypes.int32, (def_reg(dtypes.int8, RDX),), RAX)
# movsx eax, dl
self.assertEqual(bytes.fromhex(self.encode(cast)), bytes.fromhex("0F BE C2"))

# accessing lower 8 bits of rsp, rbp, rsi, rdi requires rex prefix
def test_lower_8bits_reg(self):
cast = UOp(X86Ops.MOVSX, dtypes.int32, (def_reg(dtypes.int8, RDI),), RAX)
# movsx eax, dil
self.assertEqual(bytes.fromhex(self.encode(cast)), bytes.fromhex("40 0F BE C7"))

# test 16 bit variant of legacy instruction
def test_16bit_legacy_encoding(self):
cast = UOp(X86Ops.MOVSX, dtypes.int16, (def_reg(dtypes.int8, RDX),), RAX)
# movsx ax, dl
self.assertEqual(bytes.fromhex(self.encode(cast)), bytes.fromhex("66 0F BE C2"))

# test 64 bit variant of legacy instruction
def test_64bit_legacy_encoding(self):
cast = UOp(X86Ops.MOVSX, dtypes.int64, (def_reg(dtypes.int8, RDX),), RAX)
# movsx rax, dl
self.assertEqual(bytes.fromhex(self.encode(cast)), bytes.fromhex("48 0F BE C2"))

# test compact vex encoding
def test_compact_vex_encoding(self):
xmm0, xmm1 = def_reg(dtypes.float32, XMM[0]), def_reg(dtypes.float32, XMM[1])
add = UOp(X86Ops.VADDSS, dtypes.float32, (xmm0, xmm1), XMM[0])
# vaddss xmm0, xmm0, xmm1
self.assertEqual(bytes.fromhex(self.encode(add)), bytes.fromhex("C5 FA 58 C1"))

# test long vex encoding
def test_long_vex_encoding(self):
xmm0, xmm8 = def_reg(dtypes.float32, XMM[0]), def_reg(dtypes.float32, XMM[8])
add = UOp(X86Ops.VADDSS, dtypes.float32, (xmm0, xmm8), XMM[0])
# vaddss xmm0, xmm0, xmm8
self.assertEqual(bytes.fromhex(self.encode(add)), bytes.fromhex("C4 C1 7A 58 C0"))

# test ymm encoding
def test_ymm_encoding(self):
xmm0, xmm1 = def_reg(dtypes.float32.vec(8), XMM[0]), def_reg(dtypes.float32.vec(8), XMM[1])
add = UOp(X86Ops.VADDPS, dtypes.float32.vec(8), (xmm0, xmm1), XMM[0])
# vaddps ymm0, ymm0, ymm1
self.assertEqual(bytes.fromhex(self.encode(add)), bytes.fromhex("C5 FC 58 C1"))

# test encoding where register is in the immediate field
def test_reg_in_imm_field(self):
xmm0, xmm1, xmm2 = def_reg(dtypes.float32, XMM[0]), def_reg(dtypes.float32, XMM[1]), def_reg(dtypes.float32, XMM[2])
blend = UOp(X86Ops.VBLENDVPS, dtypes.float32, (xmm0, xmm1, xmm2), XMM[0])
# vblendvps xmm0, xmm0, xmm1, xmm2
self.assertEqual(bytes.fromhex(self.encode(blend)), bytes.fromhex("C4 E3 79 4A C1 20"))

# when writting to mem the uop takes the store form where dtype is void and there's no definition
def test_write_mem(self):
base, index, disp = def_reg(dtypes.int32.ptr(), RDI), def_reg(dtypes.int32, RSI), imm(dtypes.int8, 10)
xmm0 = def_reg(dtypes.float32, XMM[0])
extr = UOp(X86Ops.VPEXTRD, dtypes.void, (base, index, disp, xmm0, imm(dtypes.uint8, 0)))
# vpextrd dword ptr [rdi + rsi*4 + 0xa], xmm0, 0
self.assertEqual(bytes.fromhex(self.encode(extr)), bytes.fromhex("C4 E3 79 16 44 B7 0A 00"))

# test two address instruction with fused load works
def test_two_address_load(self):
base, index, disp = def_reg(dtypes.int32.ptr(), RDI), def_reg(dtypes.int32, RSI), imm(dtypes.int8, 10)
cmove = UOp(X86Ops.CMOVE, dtypes.int32, (base, index, disp), RAX)
# cmove eax, dword ptr [rdi + rsi*4 + 0xa]
self.assertEqual(bytes.fromhex(self.encode(cmove)), bytes.fromhex("0F 44 44 B7 0A"))

# test instruction where displacement and imm have the same value
def test_disp_imm_same_value(self):
base, index, disp = def_reg(dtypes.int8.ptr(), RDI), def_reg(dtypes.int8, RSI), imm(dtypes.int8, 10)
mov = UOp(X86Ops.MOVi, dtypes.void, (base, index, disp, disp))
# mov byte ptr [rdi + rsi + 0xa], 0xa
self.assertEqual(bytes.fromhex(self.encode(mov)), bytes.fromhex("40 C6 44 37 0A 0A"))

base, index, disp = def_reg(dtypes.int32.ptr(), RDI), def_reg(dtypes.int32, RSI), imm(dtypes.int32, 10)
imul = UOp(X86Ops.IMULi, dtypes.int32, (base, index, disp) + (imm(dtypes.int32, 10),), RDI)
# imul edi, dword ptr [rdi + rsi*4 + 0xa], 0xa
self.assertEqual(bytes.fromhex(self.encode(imul)), bytes.fromhex("69 BC B7 0A 00 00 00 0A 00 00 00"))

# cmoves have the cmp as the last src even though it is not explicitly used, the cmp doesn't define a reg and is ignored in the encoding
def test_cmove_ignore_cmp(self):
cmove = UOp(X86Ops.CMOVE, dtypes.int32, (def_reg(dtypes.int32, RAX), UOp(X86Ops.CMP)), RDX)
# cmove edx, eax
self.assertEqual(bytes.fromhex(self.encode(cmove)), bytes.fromhex("0F 44 D0"))

if __name__ == "__main__":
unittest.main()
113 changes: 113 additions & 0 deletions test/unit/test_isel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import unittest
from tinygrad.uop import Ops
from tinygrad.uop.ops import UOp, dtypes, graph_rewrite
from tinygrad.renderer.isa import X86Ops
from tinygrad.renderer.isa.x86 import X86Renderer
from tinygrad.renderer.isa.isa import IselContext, Register
from tinygrad.helpers import SPEC

@unittest.skipIf(SPEC > 1, "x86 spec not supported in full_spec")
class TestIselX86(unittest.TestCase):
def isel_rewrite(self, x:UOp): return graph_rewrite(x, X86Renderer().isel_matcher, IselContext(x), bottom_up=True)

def test_cmove(self):
a = UOp.variable("a", 0, 0, dtypes.int32)
b = UOp.variable("b", 0, 0, dtypes.int32)
c = (a < b).where(a, b)
d = (a != b).where(a, b)
f = c + d
n = self.isel_rewrite(f)
self.assertTrue(n.src[0].op is X86Ops.CMOVL and n.src[1].op is X86Ops.CMOVNE)
# both comparisons become the same instruction
self.assertTrue(n.src[0].src[2] == n.src[1].src[2] and n.src[0].src[2].op is X86Ops.CMP)

def test_cmove_and_blend_with_float_cmp(self):
a = UOp.variable("a", 0, 0, dtypes.float32)
b = UOp.variable("b", 0, 0, dtypes.float32)
c = a < b
d = c.where(a.cast(dtypes.int32), b.cast(dtypes.int32))
e = c.where(a, b)
f = d + e
n = self.isel_rewrite(f)
# the comparison instruction depends on the user, int cmove uses flag while float cmove uses mask
# so both flag producing and mask producing comparisons must be present
self.assertTrue(n.src[0].op is X86Ops.CMOVB and n.src[0].src[2].op is X86Ops.VUCOMISS)
self.assertTrue(n.src[1].op is X86Ops.VBLENDVPS and n.src[1].src[2].op is X86Ops.VCMPSS and n.src[1].src[2].src[2].arg == 1)

# lower 2 32 bits must come from the same register and upper 2 32 bits must come from the same register
def test_vshufps(self):
a = UOp.variable("a", 0, 0, dtypes.float32.vec(4))
b = UOp.variable("b", 0, 0, dtypes.float32.vec(4))
c = UOp.variable("c", 0, 0, dtypes.float32)
d = UOp.variable("d", 0, 0, dtypes.float32)
# shuffle between 2 vectors
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (a.gep(0), a.gep(1), b.gep(2), b.gep(3))))
self.assertTrue(n.op is X86Ops.VSHUFPS)
# shuffle between 2 scalars
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (c, c, d, d)))
self.assertTrue(n.op is X86Ops.VSHUFPS)
# shuffle between vector and scalar
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (a.gep(0), a.gep(1), c, c)))
self.assertTrue(n.op is X86Ops.VSHUFPS)
# shuffle between 1 vector
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (a.gep(1), a.gep(2), a.gep(3), a.gep(0))))
self.assertTrue(n.op is X86Ops.VSHUFPS and n.src[0] is n.src[1])
# a shuffle between 1 scalar is just a broadcast and matches X86Ops.VBROADCASTSS to allow for load fusion

# this is the fallback slow VECTORIZE, 1 vinsertps per src in VECTORIZE
def test_vinsertps(self):
a = UOp.variable("a", 0, 0, dtypes.float32.vec(4))
b = UOp.variable("b", 0, 0, dtypes.float32.vec(4))
c = UOp.variable("c", 0, 0, dtypes.float32.vec(4))
d = UOp.variable("e", 0, 0, dtypes.float32)
# pack 1 from vector and 1 from scalar, moving 0th element to position 0 does nothing so only 1 vinsertps is generated
n = self.isel_rewrite(UOp(Ops.VECTORIZE, dtypes.float32.vec(2), (a.gep(0), d)))
self.assertTrue(n.op is X86Ops.VINSERTPS and n.src[0].op is X86Ops.DEFINE_REG)
# interleaved shuffle between 2 vectors
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (a.gep(0), b.gep(1), a.gep(2), b.gep(3))))
self.assertTrue(n.op is X86Ops.VINSERTPS)
# shuffle between 4 sources
n = self.isel_rewrite(UOp(Ops.VECTORIZE, a.dtype, (a.gep(3), b.gep(2), c.gep(1), d)))
self.assertTrue(n.op is X86Ops.VINSERTPS)

# complex address is [base + index*scale + displacement]
def test_complex_address(self):
a = UOp.variable("a", 0, 0, dtypes.int32)
load = UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0).index(a + 1, ptr=True).load()
n = self.isel_rewrite(load)
# base is PARAM, index is "a"
self.assertTrue(n.src[0].op is X86Ops.DEFINE_REG and n.src[1].op is X86Ops.DEFINE_REG)
# displacement is the constant in "a" scaled to the buffer element size, dtype is int8 when the value fits otherwise int32
self.assertTrue(n.src[2].op is X86Ops.IMM and n.src[2].dtype is dtypes.int8 and n.src[2].arg == 4)

def test_fuse_load(self):
load1 = UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0).index(UOp.const(dtypes.int32, 0), ptr=True).load()
load2 = UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0).index(UOp.const(dtypes.int32, 1), ptr=True).load()
n = self.isel_rewrite(load1 + load2)
self.assertTrue(len(n.src) == 4)

# don't fuse when used multiple times
def test_dont_fuse_load_diff_users(self):
load = UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0).index(UOp.const(dtypes.int32, 0), ptr=True).load()
add = load + 1
n = self.isel_rewrite(add + load)
self.assertTrue(len(n.src) == 2)

def test_dont_fuse_load_same_user(self):
load = UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0).index(UOp.const(dtypes.int32, 0), ptr=True).load()
n = self.isel_rewrite(load * load)
self.assertTrue(len(n.src) == 2)

# test noop has same reg as src, this is because noops aren't instructions but still need to be part of the graph
# as they may have different dtype from src and the correct dtype is required to encode the correct instruction
# by giving them the same reg as src we ensure they share the same live range
@unittest.skip("hmmm")
def test_noop(self):
noop = UOp(Ops.NOOP, dtypes.int32, (UOp(Ops.PARAM, dtypes.int32.ptr(), arg=0),))
n = self.isel_rewrite(noop)
self.assertTrue(isinstance(n.arg, Register) and n.arg == n.src[0].arg)

# TODO: might want to check that load isn't part of another range when fusing

if __name__ == "__main__":
unittest.main()
25 changes: 25 additions & 0 deletions test/unit/test_x86op_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest
from tinygrad.uop import Ops, GroupOp
from tinygrad.renderer.isa import X86Ops, X86GroupOp

class TestX86OpValues(unittest.TestCase):
def test_values(self):
# ADD is added in X86Ops
assert X86Ops.ADD != Ops.ADD
assert X86Ops.ADD is not Ops.ADD
assert Ops.ADD not in X86GroupOp.All
assert X86Ops.ADD not in GroupOp.All
assert X86Ops.ADD in X86GroupOp.All
assert not isinstance(Ops.ADD, X86Ops)
assert isinstance(X86Ops.ADD, Ops)
assert isinstance(X86Ops.ADD, X86Ops)
# SINK is not added in X86Ops, this is now possible but behavior doesn't change
assert X86Ops.SINK == Ops.SINK
assert X86Ops.SINK is Ops.SINK
assert X86Ops.SINK in GroupOp.All
assert X86Ops.SINK not in X86GroupOp.All
assert not isinstance(X86Ops.SINK, X86Ops)
assert max(op.value for op in GroupOp.All) + 1 == min(op.value for op in X86GroupOp.All)

if __name__ == "__main__":
unittest.main()
Loading
Loading