LMCache/setup.py at dev · hlin99/LMCache · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
# SPDX-License-Identifier: Apache-2.0
# Standard
from pathlib import Path
from typing import TYPE_CHECKING
import os
import sys

# Third Party
from setuptools import find_packages, setup

if TYPE_CHECKING:
    # Third Party
    from setuptools.extension import Extension

ROOT_DIR = Path(__file__).parent
HIPIFY_DIR = os.path.join(ROOT_DIR, "csrc/")
HIPIFY_OUT_DIR = os.path.join(ROOT_DIR, "csrc_hip/")

# python -m build --sdist
# will run python setup.py sdist --dist-dir dist
BUILDING_SDIST = "sdist" in sys.argv
# `NO_NATIVE_EXT=1` skips compilation of all native extensions (pure-Python
# build). `NO_CUDA_EXT=1` is the legacy name kept for backwards compatibility;
# it has always controlled all native extensions, not just CUDA ones.
NO_NATIVE_EXT = (
    os.environ.get("NO_NATIVE_EXT", "0") == "1"
    or os.environ.get("NO_CUDA_EXT", "0") == "1"
)
if os.environ.get("NO_CUDA_EXT", "0") == "1":
    print(
        "warning: NO_CUDA_EXT is deprecated; use NO_NATIVE_EXT=1 instead.",
        file=sys.stderr,
    )
# Common C++ extensions only; skip CUDA / ROCm / SYCL.
NO_GPU_EXT = os.environ.get("NO_GPU_EXT", "0") == "1"

# New environment variable to choose between CUDA, HIP, and SYCL
BUILD_WITH_HIP = os.environ.get("BUILD_WITH_HIP", "0") == "1"
BUILD_WITH_SYCL = os.environ.get("BUILD_WITH_SYCL", "0") == "1"

ENABLE_CXX11_ABI = os.environ.get("ENABLE_CXX11_ABI", "1") == "1"


def _read_requirements(path: Path) -> list[str]:
    if not path.exists():
        return []

    reqs: list[str] = []
    for raw in path.read_text().splitlines():
        line = raw.strip()
        if line and not line.startswith("#"):
            reqs.append(line)
    return reqs


def hipify_wrapper() -> None:
    # Third Party
    from torch.utils.hipify.hipify_python import hipify

    print("Hipifying sources ")

    # Get absolute path for all source files.
    extra_files = [
        os.path.abspath(os.path.join(HIPIFY_DIR, item))
        for item in os.listdir(HIPIFY_DIR)
        if os.path.isfile(os.path.join(HIPIFY_DIR, item))
    ]

    hipify_result = hipify(
        project_directory=HIPIFY_DIR,
        output_directory=HIPIFY_OUT_DIR,
        header_include_dirs=[],
        includes=[],
        extra_files=extra_files,
        show_detailed=True,
        is_pytorch_extension=True,
        hipify_extra_files_only=True,
    )
    hipified_sources = []
    for source in extra_files:
        s_abs = os.path.abspath(source)
        hipified_s_abs = (
            hipify_result[s_abs].hipified_path
            if (
                s_abs in hipify_result
                and hipify_result[s_abs].hipified_path is not None
            )
            else s_abs
        )
        hipified_sources.append(hipified_s_abs)

    assert len(hipified_sources) == len(extra_files)


def _mooncake_extension(
    cpp_extension,
    mooncake_sources: list[str],
    extra_cxx_flags: list[str],
) -> list:
    """Build mooncake CppExtension if enabled via env vars.

    Returns a list with zero or one Extension objects.
    """
    mc_env = os.environ.get("BUILD_MOONCAKE")
    if mc_env is not None:
        build_mc = mc_env == "1"
    else:
        build_mc = os.environ.get("MOONCAKE_INCLUDE_DIR", "") != ""
    if not build_mc:
        return []

    mc_include = os.environ.get("MOONCAKE_INCLUDE_DIR", "")
    mc_lib = os.environ.get("MOONCAKE_LIB_DIR", "")
    mc_include_dirs = [
        "csrc/storage_backends",
        "csrc/storage_backends/mooncake",
    ]
    if mc_include:
        mc_include_dirs.extend(mc_include.split(";"))
    mc_library_dirs: list[str] = []
    if mc_lib:
        mc_library_dirs.extend(mc_lib.split(";"))
    return [
        cpp_extension.CppExtension(
            "lmcache.lmcache_mooncake",
            sources=mooncake_sources,
            include_dirs=mc_include_dirs,
            library_dirs=mc_library_dirs,
            libraries=["mooncake_store"],
            runtime_library_dirs=mc_library_dirs,
            extra_compile_args={
                "cxx": extra_cxx_flags + ["-O3", "-std=c++20", "-DYLT_ENABLE_IBV"],
            },
        ),
    ]


def _common_cpp_extensions(
    extra_cxx_flags: list[str], fs_extra_cxx_flags: list[str] | None = None
) -> tuple[list["Extension"], dict[str, type]]:
    """Build pure C++ extensions that do not depend on any GPU backend.

    Args:
        extra_cxx_flags: Additional C++ compiler flags to apply to the
            `native_storage_ops` and `lmcache_redis` pure C++ extensions.
        fs_extra_cxx_flags: Additional C++ compiler flags to apply to the
            `lmcache_fs` extension. Defaults to `extra_cxx_flags` when not set.

    Notes:
        `fs_extra_cxx_flags` exists to preserve pre-refactor SYCL compile-flag
        behavior where `lmcache_fs` intentionally omitted the ABI define.

    Returns:
        A tuple of:
            - list: CppExtension modules for native storage backends,
              including optional mooncake when enabled.
            - dict: cmdclass containing BuildExtension.
    """
    # Third Party
    from torch.utils import cpp_extension

    if fs_extra_cxx_flags is None:
        fs_extra_cxx_flags = extra_cxx_flags

    storage_manager_sources = [
        "csrc/storage_manager/bitmap.cpp",
        "csrc/storage_manager/pybind.cpp",
        "csrc/storage_manager/ttl_lock.cpp",
        "csrc/storage_manager/utils.cpp",
    ]
    redis_sources = [
        "csrc/storage_backends/redis/pybind.cpp",
        "csrc/storage_backends/redis/connector.cpp",
    ]
    fs_sources = [
        "csrc/storage_backends/fs/pybind.cpp",
        "csrc/storage_backends/fs/connector.cpp",
    ]
    mooncake_sources = [
        "csrc/storage_backends/mooncake/pybind.cpp",
        "csrc/storage_backends/mooncake/connector.cpp",
    ]
    ext_modules = [
        cpp_extension.CppExtension(
            "lmcache.native_storage_ops",
            sources=storage_manager_sources,
            include_dirs=["csrc/storage_manager"],
            extra_compile_args={
                "cxx": extra_cxx_flags + ["-O3", "-std=c++17"],
            },
        ),
        cpp_extension.CppExtension(
            "lmcache.lmcache_redis",
            sources=redis_sources,
            include_dirs=["csrc/storage_backends", "csrc/storage_backends/redis"],
            extra_compile_args={
                "cxx": extra_cxx_flags + ["-O3", "-std=c++17"],
            },
        ),
        cpp_extension.CppExtension(
            "lmcache.lmcache_fs",
            sources=fs_sources,
            include_dirs=["csrc/storage_backends", "csrc/storage_backends/fs"],
            extra_compile_args={
                "cxx": fs_extra_cxx_flags + ["-O3", "-std=c++17"],
            },
        ),
    ]
    # Mooncake extension is optional.
    ext_modules.extend(
        _mooncake_extension(cpp_extension, mooncake_sources, extra_cxx_flags)
    )
    cmdclass = {"build_ext": cpp_extension.BuildExtension}
    return ext_modules, cmdclass


def cuda_extension() -> tuple[list, dict]:
    # Third Party
    from torch.utils import cpp_extension  # Import here

    print("Building CUDA extensions")
    global ENABLE_CXX11_ABI
    if ENABLE_CXX11_ABI:
        flag_cxx_abi = "-D_GLIBCXX_USE_CXX11_ABI=1"
    else:
        flag_cxx_abi = "-D_GLIBCXX_USE_CXX11_ABI=0"

    cuda_sources = [
        "csrc/pybind.cpp",
        "csrc/mem_kernels.cu",
        "csrc/mp_mem_kernels.cu",
        "csrc/cal_cdf.cu",
        "csrc/ac_enc.cu",
        "csrc/ac_dec.cu",
        "csrc/pos_kernels.cu",
        "csrc/mem_alloc.cpp",
        "csrc/utils.cpp",
        "csrc/event_recorder.cpp",
        "csrc/completion_recorder.cpp",
    ]
    ext_modules = [
        cpp_extension.CUDAExtension(
            "lmcache.c_ops",
            sources=cuda_sources,
            extra_compile_args={
                "cxx": [flag_cxx_abi, "-std=c++17"],
                "nvcc": [flag_cxx_abi],
            },
        ),
    ]
    cmdclass = {"build_ext": cpp_extension.BuildExtension}
    return ext_modules, cmdclass


def rocm_extension() -> tuple[list, dict]:
    # Third Party
    from torch.utils import cpp_extension  # Import here

    print("Building ROCM extensions")
    hipify_wrapper()
    hip_sources = [
        "csrc/pybind_hip.cpp",  # Use the hipified pybind
        "csrc/mem_kernels.hip",
        "csrc/mp_mem_kernels.hip",
        "csrc/cal_cdf.hip",
        "csrc/ac_enc.hip",
        "csrc/ac_dec.hip",
        "csrc/pos_kernels.hip",
        "csrc/mem_alloc_hip.cpp",
        "csrc/utils_hip.cpp",
        "csrc/event_recorder.cpp",
        "csrc/completion_recorder.cpp",
    ]
    # For HIP, we generally use CppExtension and let hipcc handle things.
    # Ensure CXX environment variable is set to hipcc when running this build.
    # e.g., CXX=hipcc python setup.py install
    define_macros = [("__HIP_PLATFORM_HCC__", "1"), ("USE_ROCM", "1")]
    ext_modules = [
        cpp_extension.CppExtension(
            "lmcache.c_ops",
            sources=hip_sources,
            extra_compile_args={
                "cxx": [  # hipcc is typically invoked as a C++ compiler
                    # '-D_GLIBCXX_USE_CXX11_ABI=0',
                    "-O3",
                    "-std=c++17",
                    # Add any HIP specific flags if needed.
                    # For example, if you need to specify ROCm architecture:
                    # '--offload-arch=gfx942' # (replace with your target arch)
                    # '-x hip' # Sometimes needed to explicitly treat files as HIP
                ],
                # No 'nvcc' key for hipcc with CppExtension
            },
            # You might need to specify include paths for ROCm if not found
            # automatically
            include_dirs=[
                os.path.join(os.environ.get("ROCM_PATH", "/opt/rocm"), "include")
            ],
            library_dirs=[
                os.path.join(os.environ.get("ROCM_PATH", "/opt/rocm"), "lib")
            ],
            # libraries=['amdhip64'] # Or other relevant HIP libs if needed
            define_macros=define_macros,
        ),
    ]
    cmdclass = {"build_ext": cpp_extension.BuildExtension}
    return ext_modules, cmdclass


def sycl_extension() -> tuple[list, dict]:
    # Third Party
    from torch.utils import cpp_extension  # Import here

    print("Building SYCL/XPU extensions")

    # Standard
    import shutil

    if shutil.which("icpx") is None:
        sys.exit("icpx not found. Please source oneAPI setvars.sh at first")
    os.environ["CXX"] = "icpx"
    oneapi_root = os.environ.get("ONEAPI_ROOT", "/opt/intel/oneapi")
    include_dirs = [f"{oneapi_root}/include"]
    library_dirs = [f"{oneapi_root}/lib"]

    sycl_sources = [
        "csrc/sycl/pybind_sycl.cpp",
        "csrc/sycl/mem_kernels_sycl.cpp",
        "csrc/sycl/cal_cdf_sycl.cpp",
        "csrc/sycl/pos_kernels_sycl.cpp",
        "csrc/sycl/ac_enc_sycl.cpp",
        "csrc/sycl/ac_dec_sycl.cpp",
    ]
    # Use CppExtension with DPC++ compiler (set CXX=icpx before invoking).
    # The -fsycl flag enables SYCL compilation and linking.
    # Intel XPU optimizations:
    #   -ftarget-register-alloc-mode=pvc:auto
    #       Register allocation tuned for Ponte Vecchio / Data Center GPU Max.
    #   -fno-sycl-id-queries-fit-in-int
    #       Allow 64-bit index arithmetic in SYCL kernels.
    #   -ffast-math
    #       Aggressive FP opts (safe for current implementation: kernels
    #       only copy data, no FP arithmetic.  Review if FP math is added).
    #   -funroll-loops
    #       Unroll inner copy loops for better instruction packing.
    ext_modules = [
        cpp_extension.SyclExtension(
            "lmcache.xpu_ops",
            sources=sycl_sources,
            include_dirs=include_dirs,
            library_dirs=library_dirs,
            extra_compile_args={
                "cxx": [
                    "-std=c++17",
                    "-D_GLIBCXX_USE_CXX11_ABI=1",
                    "-O3",
                    "-fsycl",
                    "-fno-sycl-id-queries-fit-in-int",
                    "-ffast-math",
                    "-funroll-loops",
                    # Suppress deprecation warnings from SYCL standard
                    # headers (sycl/accessor.hpp references the deprecated
                    # 'host_buffer' internally; our code uses USM only).
                    "-Wno-deprecated-declarations",
                    "-Wno-nan-infinity-disabled",
                ],
            },
            extra_link_args=["-fsycl"],
        ),
    ]
    cmdclass = {"build_ext": cpp_extension.BuildExtension}
    return ext_modules, cmdclass


def source_dist_extension() -> tuple[list, dict]:
    print("Not building CUDA/HIP/SYCL extensions for sdist")
    return [], {}


def _get_common_cpp_flags() -> list[str]:
    """Select common pure C++ ABI flags based on the configured build backend.

    Returns:
        A list of compiler flags for pure C++ extensions.
    """
    if BUILD_WITH_HIP:
        return []
    if BUILD_WITH_SYCL:
        return ["-D_GLIBCXX_USE_CXX11_ABI=1"]
    if ENABLE_CXX11_ABI:
        return ["-D_GLIBCXX_USE_CXX11_ABI=1"]
    return ["-D_GLIBCXX_USE_CXX11_ABI=0"]


def _collect_extensions() -> tuple[list, dict]:
    """Collect extension modules according to current setup.py build settings.

    Returns:
        A tuple of:
            - list: extension modules selected for the current build mode.
            - dict: cmdclass containing BuildExtension when extensions are built.

    Notes:
        - `sdist`: no extensions.
        - `NO_NATIVE_EXT=1`: no extensions (pure-Python lmcache-cli wheel).
        - `NO_GPU_EXT=1`: common C++ extensions only.
        - Default: common C++ extensions + one GPU backend (CUDA/ROCm/SYCL).
    """
    if BUILDING_SDIST:
        return source_dist_extension()

    if NO_NATIVE_EXT:
        return [], {}

    common_cpp_flags = _get_common_cpp_flags()
    # Preserve historical SYCL compatibility: lmcache_fs was compiled without
    # _GLIBCXX_USE_CXX11_ABI in pre-refactor builds.
    fs_cpp_flags = [] if BUILD_WITH_SYCL else common_cpp_flags
    ext_modules, cmdclass = _common_cpp_extensions(common_cpp_flags, fs_cpp_flags)

    if NO_GPU_EXT:
        return ext_modules, cmdclass

    if BUILD_WITH_SYCL:
        gpu_ext_modules, cmdclass = sycl_extension()
    elif BUILD_WITH_HIP:
        gpu_ext_modules, cmdclass = rocm_extension()
    else:
        gpu_ext_modules, cmdclass = cuda_extension()
    ext_modules.extend(gpu_ext_modules)
    return ext_modules, cmdclass


if __name__ == "__main__":
    ext_modules, cmdclass = _collect_extensions()

    install_requires = _read_requirements(ROOT_DIR / "requirements" / "common.txt")
    # NO_GPU_EXT skips GPU-vendor deps (cupy / nixl).
    if not NO_GPU_EXT:
        if BUILD_WITH_HIP:
            core_file = "rocm_core.txt"
        elif BUILD_WITH_SYCL:
            core_file = "xpu_core.txt"
        else:
            # CUDA major selects between cu12 and cu13 vendor pins (cupy, nixl).
            # Defaults to cu13 (the PyPI build); cu12.9 wheel builds set
            # LMCACHE_CUDA_MAJOR=12 to pull cu12 wheels of those deps.
            cuda_major = os.environ.get("LMCACHE_CUDA_MAJOR", "13")
            if cuda_major not in ("12", "13"):
                raise ValueError(
                    f"LMCACHE_CUDA_MAJOR must be '12' or '13', got '{cuda_major}'"
                )
            core_file = f"cuda{cuda_major}_core.txt"
        install_requires += _read_requirements(ROOT_DIR / "requirements" / core_file)

    setup(
        packages=find_packages(
            exclude=("csrc",)
        ),  # Ensure csrc is excluded if it only contains sources
        ext_modules=ext_modules,
        cmdclass=cmdclass,
        include_package_data=True,
        install_requires=install_requires,
    )