From 7d144b5504557b47a0f164dbb5685c874df1c7f7 Mon Sep 17 00:00:00 2001
From: chamikasudusinghe <chamika.sudusinghe.17@cse.mrt.ac.lk>
Date: Thu, 16 Apr 2026 00:59:10 -0500
Subject: [PATCH] fix compatibility with dgl and cuda 12.0+

---
 examples/rgms/rgcn/bench_rgcn_baseline.py                | 2 +-
 examples/rgms/rgcn/bench_rgcn_composable.py              | 2 +-
 examples/rgms/rgcn/bench_rgcn_non_composable.py          | 2 +-
 examples/rgms/rgcn/bench_rgcn_tensorcore.py              | 2 +-
 examples/rgms/sparse_conv/rgms.py                        | 2 +-
 examples/sddmm/bench_sddmm.py                            | 2 +-
 examples/spmm/bench_spmm.py                              | 2 +-
 examples/spmm/bench_spmm_naive.py                        | 2 +-
 examples/spmm/bench_tc_spmm.py                           | 2 +-
 python/tvm/contrib/nvcc.py                               | 6 ++++++
 tests/python/sparsetir/test_format_conversion_routine.py | 6 +++---
 11 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/examples/rgms/rgcn/bench_rgcn_baseline.py b/examples/rgms/rgcn/bench_rgcn_baseline.py
index cea1254aa..6221a1a4d 100644
--- a/examples/rgms/rgcn/bench_rgcn_baseline.py
+++ b/examples/rgms/rgcn/bench_rgcn_baseline.py
@@ -46,7 +46,7 @@ def test_rgcn_baseline(g: DGLHeteroGraph, feat_size: int):
     feat = th.rand(g.num_src_nodes(), feat_size).to(0) / 100
     out = th.zeros(g.num_dst_nodes(), feat_size).to(0) / 100
     weight = th.rand(g.num_rels, feat_size, feat_size).to(0)
-    indptr, indices, eid = g.adj_sparse(fmt="csc")
+    indptr, indices, eid = g.adj_tensors(fmt="csc")
     etype = g.edata[dgl.ETYPE][eid.long()]
 
     # dgl-lowmem
diff --git a/examples/rgms/rgcn/bench_rgcn_composable.py b/examples/rgms/rgcn/bench_rgcn_composable.py
index 6be9302a3..2ca63ac45 100644
--- a/examples/rgms/rgcn/bench_rgcn_composable.py
+++ b/examples/rgms/rgcn/bench_rgcn_composable.py
@@ -109,7 +109,7 @@ def test_rgcn_composable_format(
         dst_type_id = g.get_ntype_id(dst_type)
         g_sub = g[etype]
         m_sub, n_sub = g_sub.num_dst_nodes(), g_sub.num_src_nodes()
-        indptr, indices, _ = g_sub.adj_sparse(fmt="csc")
+        indptr, indices, _ = g_sub.adj_tensors(fmt="csc")
         csf_indptr_0.append(csf_indptr_0[-1] + m_sub)
         csf_indices_0.append(ntype_node_pointer[dst_type_id] + th.arange(m_sub, dtype=th.int32))
         csf_indptr_1.append(csf_indptr_1[-1][-1] + indptr[1:])
diff --git a/examples/rgms/rgcn/bench_rgcn_non_composable.py b/examples/rgms/rgcn/bench_rgcn_non_composable.py
index c1c2fc210..ebfbc969c 100644
--- a/examples/rgms/rgcn/bench_rgcn_non_composable.py
+++ b/examples/rgms/rgcn/bench_rgcn_non_composable.py
@@ -60,7 +60,7 @@ def test_lower_rgcn_hetero(
         src_type_id = g.get_ntype_id(src_type)
         dst_type_id = g.get_ntype_id(dst_type)
         g_sub = g[etype]
-        indptr, indices, _ = g_sub.adj_sparse(fmt="csc")
+        indptr, indices, _ = g_sub.adj_tensors(fmt="csc")
 
         unique_nodes = th.nonzero(indptr[:-1] != indptr[1:]).squeeze(1)
         indptr_i.append(th.LongTensor([len(unique_nodes) + indptr_i[-1].item()]))
diff --git a/examples/rgms/rgcn/bench_rgcn_tensorcore.py b/examples/rgms/rgcn/bench_rgcn_tensorcore.py
index 71fe653d0..5bbfb7b48 100644
--- a/examples/rgms/rgcn/bench_rgcn_tensorcore.py
+++ b/examples/rgms/rgcn/bench_rgcn_tensorcore.py
@@ -680,7 +680,7 @@ def rgcn_tensorcore(
         dst_type_id = g.get_ntype_id(dst_type)
         g_sub = g[etype]
         m_sub, n_sub = g_sub.num_dst_nodes(), g_sub.num_src_nodes()
-        indptr, indices, _ = g_sub.adj_sparse(fmt="csc")
+        indptr, indices, _ = g_sub.adj_tensors(fmt="csc")
         csf_indptr_0.append(csf_indptr_0[-1] + m_sub)
         csf_indices_0.append(ntype_node_pointer[dst_type_id] + th.arange(m_sub, dtype=th.int32))
         csf_indptr_1.append(csf_indptr_1[-1][-1] + indptr[1:])
diff --git a/examples/rgms/sparse_conv/rgms.py b/examples/rgms/sparse_conv/rgms.py
index 7199e95b7..f52cbf721 100644
--- a/examples/rgms/sparse_conv/rgms.py
+++ b/examples/rgms/sparse_conv/rgms.py
@@ -682,7 +682,7 @@ def rgms_tensorcore(
         etype_id = g.get_etype_id(str(etype))
         g_sub = g[str(etype)]
         m_sub, n_sub = g_sub.num_dst_nodes(), g_sub.num_src_nodes()
-        indptr, indices, _ = g_sub.adj_sparse(fmt="csc")
+        indptr, indices, _ = g_sub.adj_tensors(fmt="csc")
         csf_indptr_0.append(csf_indptr_0[-1] + m_sub)
         csf_indices_0.append(th.arange(m_sub, dtype=th.int32))
         csf_indptr_1.append(csf_indptr_1[-1][-1] + indptr[1:])
diff --git a/examples/sddmm/bench_sddmm.py b/examples/sddmm/bench_sddmm.py
index 3afb09412..aee761761 100644
--- a/examples/sddmm/bench_sddmm.py
+++ b/examples/sddmm/bench_sddmm.py
@@ -57,7 +57,7 @@ def func(
 
 def bench_sddmm(g: dgl.DGLGraph, feat_size: int):
     global sddmm
-    indptr, indices, _ = g.adj_sparse("csr")
+    indptr, indices, _ = g.adj_tensors("csr")
     m = g.num_src_nodes()
     n = g.num_dst_nodes()
     nnz = g.number_of_edges()
diff --git a/examples/spmm/bench_spmm.py b/examples/spmm/bench_spmm.py
index fdd62f5c7..e3ca9e484 100644
--- a/examples/spmm/bench_spmm.py
+++ b/examples/spmm/bench_spmm.py
@@ -100,7 +100,7 @@ def bench_hyb(
 ):
     num_buckets = len(bucket_sizes)
     coersening_factor = min(coersening_factor, feat_size // 32)
-    indptr, indices, _ = g.adj_sparse("csc")
+    indptr, indices, _ = g.adj_tensors("csc")
     m = g.num_dst_nodes()
     n = g.num_src_nodes()
     nnz = g.num_edges()
diff --git a/examples/spmm/bench_spmm_naive.py b/examples/spmm/bench_spmm_naive.py
index b48c75ccc..d342aed34 100644
--- a/examples/spmm/bench_spmm_naive.py
+++ b/examples/spmm/bench_spmm_naive.py
@@ -79,7 +79,7 @@ def bench_hyb(
     feat_size=128,
     cwm=2,
 ):
-    indptr, indices, _ = g.adj_sparse("csc")
+    indptr, indices, _ = g.adj_tensors("csc")
     m = g.num_dst_nodes()
     n = g.num_src_nodes()
     nnz = g.num_edges()
diff --git a/examples/spmm/bench_tc_spmm.py b/examples/spmm/bench_tc_spmm.py
index 258e6cc00..65ac2f9cc 100644
--- a/examples/spmm/bench_tc_spmm.py
+++ b/examples/spmm/bench_tc_spmm.py
@@ -578,7 +578,7 @@ def parse_mma_shape(mma_shape_str: str):
 
 def bench_tc_spmm(g: dgl.DGLHeteroGraph, x: th.Tensor, y_golden: th.Tensor, mma_shape_str: str):
     mma_m, mma_n, mma_k = parse_mma_shape(mma_shape_str)
-    indptr, indices, _ = g.adj_sparse("csc")
+    indptr, indices, _ = g.adj_tensors("csc")
     indptr_nd = tvm.nd.array(indptr.numpy().astype("int32"), device=tvm.cpu())
     indices_nd = tvm.nd.array(indices.numpy().astype("int32"), device=tvm.cpu())
     tile_size = mma_m
diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index 2a64d9a55..86af0e9d0 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -72,6 +72,12 @@ def compile_cuda(code, target_format="ptx", arch=None, options=None, path_target
     temp_code = temp.relpath("my_kernel.cu")
     temp_target = temp.relpath("my_kernel.%s" % target_format)
 
+    # CUDA 12.0+ defines htanh/htan/hatan/herf/hpow in cuda_fp16.hpp;
+    # strip TVM's duplicate definitions to avoid redefinition errors.
+    import re as _re
+    _half_fns = r"CUDA_UNSUPPORTED_HALF_MATH_(?:UNARY|BINARY)\(h(?:tanh|tan|atan|erf|pow)[^\n]*\n"
+    code = _re.sub(_half_fns, "", code)
+
     with open(temp_code, "w") as out_file:
         out_file.write(code)
 
diff --git a/tests/python/sparsetir/test_format_conversion_routine.py b/tests/python/sparsetir/test_format_conversion_routine.py
index 036436fe0..1053df2a2 100644
--- a/tests/python/sparsetir/test_format_conversion_routine.py
+++ b/tests/python/sparsetir/test_format_conversion_routine.py
@@ -105,7 +105,7 @@ def test_column_part_hyb():
     g = dgl.rand_graph(1000, 10000).int()
     column_parts = 4
     buckets = [1, 2, 4]
-    indptr, indices, _ = g.adj_sparse("csc")
+    indptr, indices, _ = g.adj_tensors("csc")
     indptr_nd = tvm.nd.array(indptr.numpy(), device=tvm.cpu())
     indices_nd = tvm.nd.array(indices.numpy(), device=tvm.cpu())
     # built-in c++ funcion
@@ -146,7 +146,7 @@ def condense_py(indptr, indices, block_size):
 def test_condense():
     g = dgl.rand_graph(1000, 10000).int()
     t = 4
-    indptr, indices, _ = g.adj_sparse("csc")
+    indptr, indices, _ = g.adj_tensors("csc")
     indptr = indptr.numpy()
     indices = indices.numpy()
     indptr_nd = tvm.nd.array(indptr, device=tvm.cpu())
@@ -197,7 +197,7 @@ def test_hetero_csr_to_ell3d():
         g_sub = g[etype]
         # print(g_sub)
         m, n = g_sub.num_dst_nodes(), g_sub.num_src_nodes()
-        indptr, indices, _ = g_sub.adj_sparse(fmt="csc")
+        indptr, indices, _ = g_sub.adj_tensors(fmt="csc")
         # print(indptr, indices)
         csf_indptr_0.append(csf_indptr_0[-1] + m)
         csf_indices_0.append(ntype_node_pointer[src_type_id] + th.arange(m, dtype=th.int32))