Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ The following files may have been Modified by MetaX Integrated Circuits (Shangha
modified: tests/python/all-platform-minimal-test/test_runtime_ndarray.py
modified: tests/python/codegen/test_gpu_codegen_allreduce.py
modified: tests/python/codegen/test_target_codegen.py
modified: tests/python/codegen/test_target_codegen_blob.py
modified: tests/python/codegen/test_target_codegen_cuda.py
modified: tests/python/codegen/test_target_codegen_device.py
modified: tests/python/codegen/test_target_codegen_extern.py
modified: tests/python/codegen/test_target_codegen_gpu_common.py
modified: tests/python/testing/test_tvm_testing_features.py
Modification copyright 2025 MetaX Integrated Circuits (Shanghai) Co., Ltd.
Expand All @@ -78,6 +81,7 @@ The following files are newly added by MetaX Integrated Circuits (Shanghai) Co.,
added: src/target/source/codegen_maca.h
added: src/target/source/intrin_rule_maca.cc
added: src/target/source/literal/maca_half_t.h
added: tests/python/codegen/test_target_codegen_maca.py
added: tests/scripts/notice.py

---------------------------------------------------
Expand Down
5 changes: 1 addition & 4 deletions src/target/source/literal/maca_half_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,10 +488,7 @@ __host__ __device__ half4 make_half4(__half x, __half y, __half z, __half w) {
stream << R"(
using maca_bfloat164 = half4_bfloat164<maca_bfloat16, maca_bfloat162>;
__host__ __device__ maca_bfloat164 make_maca_bfloat164(maca_bfloat16 x, maca_bfloat16 y, maca_bfloat16 z, maca_bfloat16 w) {
return maca_bfloat164(x, y, z, w);
}
__host__ __device__ maca_bfloat162 make_maca_bfloat162(maca_bfloat16 x, maca_bfloat16 y) {
return maca_bfloat162(x, y);
return maca_bfloat164{x, y, z, w};
}
)";
if (enable_fp8) {
Expand Down
2 changes: 1 addition & 1 deletion tests/python/codegen/test_target_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def func(a: T.handle, b: T.handle):
tvm.compile(func)


@tvm.testing.parametrize_targets("cuda", "opencl", "metal", "rocm", "vulkan -from_device=0")
@tvm.testing.parametrize_targets("cuda", "opencl", "metal", "rocm", "vulkan -from_device=0", "maca")
def test_buffer_load_predicate_not_supported_gpu(target):
@T.prim_func
def func(a: T.handle, b: T.handle):
Expand Down
72 changes: 72 additions & 0 deletions tests/python/codegen/test_target_codegen_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,78 @@ def popen_check():
worker.recv()


@tvm.testing.uses_gpu
def test_maca_multi_lib():
# test combining two system lib together
# each contains a fatbin component in maca
dev = tvm.maca(0)
for device in ["llvm", "maca"]:
if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return

@tvm.script.ir_module
class ModA:
I.module_attrs({"system_lib_prefix": "modA_"})

@T.prim_func
def my_inplace_update(x: T.Buffer((12), "float32")) -> None:
T.func_attr({"global_symbol": "modA_my_inplace_update"})
for bx in T.thread_binding(T.int64(1), thread="blockIdx.x"):
for tx in T.thread_binding(T.int64(12), thread="threadIdx.x"):
x[tx] = x[tx] + 1

@tvm.script.ir_module
class ModB:
I.module_attrs({"system_lib_prefix": "modB_"})

@T.prim_func
def my_inplace_update(x: T.Buffer((12), "float32")) -> None:
T.func_attr({"global_symbol": "modB_my_inplace_update"})
for bx in T.thread_binding(T.int64(1), thread="blockIdx.x"):
for tx in T.thread_binding(T.int64(12), thread="threadIdx.x"):
x[tx] = x[tx] + 2

temp = utils.tempdir()
target = tvm.target.Target("maca", host="llvm")
libA = tvm.compile(ModA, target=target)
libB = tvm.compile(ModB, target=target)

pathA = temp.relpath("libA.tar")
pathB = temp.relpath("libB.tar")
pathAll = temp.relpath("libAll.a")

path_dso = temp.relpath("mylib.so")
libA.export_library(pathA, fcompile=tar.tar)
libB.export_library(pathB, fcompile=tar.tar)
cc.create_staticlib(pathAll, [pathA, pathB])
# package two static libs together
cc.create_shared(path_dso, ["-Wl,--whole-archive", pathAll, "-Wl,--no-whole-archive"])

def popen_check():
# Load dll, will trigger system library registration
ctypes.CDLL(path_dso)
# Load the system wide library
dev = tvm.maca()
a_np = np.random.uniform(size=12).astype("float32")
a_nd = tvm.nd.array(a_np, dev)
b_nd = tvm.nd.array(a_np, dev)
syslibA = tvm.runtime.system_lib("modA_")
syslibB = tvm.runtime.system_lib("modB_")
# reload same lib twice
syslibA = tvm.runtime.system_lib("modA_")
syslibA["my_inplace_update"](a_nd)
syslibB["my_inplace_update"](b_nd)
np.testing.assert_equal(a_nd.numpy(), a_np + 1)
np.testing.assert_equal(b_nd.numpy(), a_np + 2)

# system lib should be loaded in different process
worker = popen_pool.PopenWorker()
worker.send(popen_check)
worker.recv()


if __name__ == "__main__":
test_synthetic()
test_cuda_multilib()
test_maca_multilib()
2 changes: 2 additions & 0 deletions tests/python/codegen/test_target_codegen_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def check_target(device):

check_target("cuda")
check_target("vulkan -from_device=0")
check_target("maca")


@tvm.testing.requires_gpu
Expand Down Expand Up @@ -108,6 +109,7 @@ def check_target(device, host):
check_target("nvptx", host="llvm")
check_target("vulkan", host="llvm")
check_target("rocm", host="llvm")
check_target("maca", host="llvm")


if __name__ == "__main__":
Expand Down
5 changes: 3 additions & 2 deletions tests/python/codegen/test_target_codegen_extern.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def extern_generator_gpu(ins, outs):
def check_target(target):
if not tvm.testing.device_enabled(target):
return
mod = mod_gpu if target in ["opencl", "cuda"] else mod_cpu
C = C_gpu if target in ["opencl", "cuda"] else C_cpu
mod = mod_gpu if target in ["opencl", "cuda", "maca"] else mod_cpu
C = C_gpu if target in ["opencl", "cuda", "maca"] else C_cpu
# build and invoke the kernel.
f = tvm.compile(mod, target=target)
dev = tvm.device(target, 0)
Expand All @@ -81,6 +81,7 @@ def check_target(target):
check_target("llvm")
check_target("opencl")
check_target("cuda")
check_target("maca")


def test_pack_buffer_simple():
Expand Down
Loading
Loading