From 58a34f15760dfee844ea7007a2bf043ab6d8d47d Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 11:40:19 -0500 Subject: [PATCH 1/8] Add GitHub Actions CI/CD pipeline --- .github/workflows/ci.yml | 231 ++++++++++++++++++++++++++++++++++ .github/workflows/release.yml | 104 +++++++++++++++ 2 files changed, 335 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..cbf7a94 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,231 @@ +# GitHub Actions CI/CD Pipeline for PolyInfer +# Cross-platform testing on Windows, Linux, and macOS +# Uses FREE GitHub Actions plan (GitHub-hosted runners, CPU-only) + +name: CI + +on: + push: + branches: [master, main] + pull_request: + branches: [master, main] + workflow_dispatch: # Allow manual trigger + +# Cancel in-progress runs for the same branch +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + PYTHONDONTWRITEBYTECODE: 1 + PYTHONUNBUFFERED: 1 + +jobs: + # ============================================ + # Code Quality Checks (Fast, runs first) + # ============================================ + lint: + name: Lint & Type Check + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff mypy + + - name: Run ruff linter + run: ruff check src/ tests/ + + - name: Run ruff formatter check + run: ruff format --check src/ tests/ + + - name: Install package for type checking + run: pip install -e . + + - name: Run mypy type checker + run: mypy src/polyinfer/ + + # ============================================ + # Cross-Platform Tests (CPU-only) + # ============================================ + test: + name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 15 + needs: lint # Only run tests if linting passes + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ["3.10", "3.11", "3.12"] + include: + # Only test one Python version on macOS to save minutes (10x cost) + - os: macos-latest + python-version: "3.11" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + - name: Install package with dev dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: List available backends + run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())" + + - name: Run CPU-only tests + run: | + pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)" + + # ============================================ + # Backend-Specific Tests (Optional CPU backends) + # ============================================ + test-backends: + name: Test - ${{ matrix.backend }} Backend + runs-on: ubuntu-latest + timeout-minutes: 15 + needs: lint + strategy: + fail-fast: false + matrix: + include: + - backend: openvino + install: ".[openvino,dev]" + marker: "openvino" + - backend: iree + install: ".[iree,dev]" + marker: "iree" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install package with ${{ matrix.backend }} backend + run: | + python -m pip install --upgrade pip + pip install -e "${{ matrix.install }}" + + - name: List available backends + run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())" + + - name: Run ${{ matrix.backend }} tests + run: | + pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)" + continue-on-error: true # Backend tests are optional on CPU runners + + # ============================================ + # Build & Package Verification + # ============================================ + build: + name: Build Package + runs-on: ubuntu-latest + timeout-minutes: 10 + needs: lint + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + + - name: Check package with twine + run: twine check dist/* + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + retention-days: 7 + + # ============================================ + # Windows DirectML Test (Windows-specific) + # ============================================ + test-directml: + name: Test - DirectML (Windows) + runs-on: windows-latest + timeout-minutes: 15 + needs: lint + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install package with DirectML backend + run: | + python -m pip install --upgrade pip + pip install -e ".[amd,dev]" + + - name: List available backends + run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())" + + - name: Run DirectML-compatible tests + run: | + pytest tests/ -v --tb=short -m "not (cuda or tensorrt or intel_gpu or npu or vulkan or benchmark or slow)" + continue-on-error: true # DirectML may not work on GitHub runners without GPU + + # ============================================ + # Summary Job (Required status check) + # ============================================ + ci-success: + name: CI Success + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: [lint, test, build] + if: always() + steps: + - name: Check all jobs passed + run: | + if [[ "${{ needs.lint.result }}" != "success" ]]; then + echo "Lint job failed" + exit 1 + fi + if [[ "${{ needs.test.result }}" != "success" ]]; then + echo "Test job failed" + exit 1 + fi + if [[ "${{ needs.build.result }}" != "success" ]]; then + echo "Build job failed" + exit 1 + fi + echo "All required jobs passed!" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..76d5013 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,104 @@ +# GitHub Actions Release Pipeline for PolyInfer +# Publishes to PyPI when a GitHub release is created +# Uses trusted publishing (OIDC) - no API tokens needed + +name: Release + +on: + release: + types: [published] + +env: + PYTHONDONTWRITEBYTECODE: 1 + PYTHONUNBUFFERED: 1 + +jobs: + # ============================================ + # Run Tests Before Publishing + # ============================================ + test: + name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 15 + strategy: + fail-fast: true # Fail fast for releases + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.11"] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + - name: Install package with dev dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run CPU-only tests + run: | + pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)" + + # ============================================ + # Build Package + # ============================================ + build: + name: Build Package + runs-on: ubuntu-latest + timeout-minutes: 10 + needs: test + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + + - name: Check package with twine + run: twine check dist/* + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + # ============================================ + # Publish to PyPI + # ============================================ + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + timeout-minutes: 10 + needs: build + environment: pypi # Must match the environment name in PyPI trusted publishing + permissions: + id-token: write # Required for trusted publishing + + steps: + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + # No API token needed - uses OIDC trusted publishing From 68a5add198fb123b0c9531c5b9bf43018fb81de4 Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 13:35:54 -0500 Subject: [PATCH 2/8] Fix linting issues --- src/polyinfer/__init__.py | 51 ++++---- src/polyinfer/_logging.py | 9 +- src/polyinfer/backends/__init__.py | 6 +- src/polyinfer/backends/_autoload.py | 25 ++-- src/polyinfer/backends/base.py | 13 +- src/polyinfer/backends/iree/__init__.py | 4 +- src/polyinfer/backends/iree/backend.py | 59 ++++----- src/polyinfer/backends/onnxruntime/backend.py | 48 +++++--- src/polyinfer/backends/openvino/backend.py | 21 ++-- src/polyinfer/backends/registry.py | 14 +-- src/polyinfer/backends/tensorrt/backend.py | 41 +++---- src/polyinfer/cli.py | 21 +++- src/polyinfer/compare.py | 12 +- src/polyinfer/config.py | 2 +- src/polyinfer/discovery.py | 20 +-- src/polyinfer/mlir.py | 6 +- src/polyinfer/model.py | 13 +- src/polyinfer/nvidia_setup.py | 15 ++- src/polyinfer/quantization.py | 114 +++++++++--------- tests/conftest.py | 30 ++--- tests/test_backend_options.py | 21 ++-- tests/test_backends.py | 3 +- tests/test_benchmark.py | 49 ++++---- tests/test_devices.py | 3 +- tests/test_inference.py | 3 +- tests/test_intel_devices.py | 14 ++- tests/test_logging.py | 13 +- tests/test_mlir.py | 28 +++-- tests/test_quantization.py | 86 +++++-------- tests/test_yolov8.py | 103 +++++++++------- 30 files changed, 437 insertions(+), 410 deletions(-) diff --git a/src/polyinfer/__init__.py b/src/polyinfer/__init__.py index 0104c0f..a5f728d 100644 --- a/src/polyinfer/__init__.py +++ b/src/polyinfer/__init__.py @@ -28,40 +28,39 @@ # Auto-setup NVIDIA libraries BEFORE importing anything else # This ensures CUDA, cuDNN, TensorRT DLLs are findable -from polyinfer import nvidia_setup as _nvidia_setup -from polyinfer.nvidia_setup import fix_onnxruntime_conflict, get_nvidia_info, setup_tensorrt_paths - -from polyinfer.model import load, Model +from polyinfer import nvidia_setup as _nvidia_setup # noqa: F401 +from polyinfer._logging import ( + LogContext, + configure_logging, + disable_logging, + enable_logging, + get_log_level, + get_log_level_name, + get_logger, + set_log_level, +) +from polyinfer.compare import benchmark, compare +from polyinfer.config import InferenceConfig from polyinfer.discovery import ( - list_backends, - list_devices, get_backend, is_available, + list_backends, + list_devices, ) -from polyinfer.config import InferenceConfig -from polyinfer.compare import compare, benchmark -from polyinfer.mlir import export_mlir, compile_mlir, MLIROutput +from polyinfer.mlir import MLIROutput, compile_mlir, export_mlir +from polyinfer.model import Model, load +from polyinfer.nvidia_setup import fix_onnxruntime_conflict, get_nvidia_info, setup_tensorrt_paths from polyinfer.quantization import ( - quantize, - quantize_dynamic, - quantize_static, - convert_to_fp16, - quantize_for_tensorrt, - QuantizationResult, + CalibrationMethod, QuantizationConfig, QuantizationMethod, + QuantizationResult, QuantizationType, - CalibrationMethod, -) -from polyinfer._logging import ( - get_logger, - set_log_level, - get_log_level, - get_log_level_name, - enable_logging, - disable_logging, - configure_logging, - LogContext, + convert_to_fp16, + quantize, + quantize_dynamic, + quantize_for_tensorrt, + quantize_static, ) __all__ = [ diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py index 4fdbd24..d6c99ac 100644 --- a/src/polyinfer/_logging.py +++ b/src/polyinfer/_logging.py @@ -26,7 +26,6 @@ import logging import sys -from typing import Union # Create the polyinfer logger hierarchy _logger = logging.getLogger("polyinfer") @@ -85,7 +84,7 @@ def get_logger(name: str = "") -> logging.Logger: return _logger -def set_log_level(level: Union[str, int]) -> None: +def set_log_level(level: str | int) -> None: """Set the global polyinfer log level. Args: @@ -128,7 +127,7 @@ def get_log_level_name() -> str: return logging.getLevelName(level) -def enable_logging(level: Union[str, int] = "INFO") -> None: +def enable_logging(level: str | int = "INFO") -> None: """Enable logging with specified level. Convenience function to quickly enable verbose logging. @@ -152,7 +151,7 @@ def disable_logging() -> None: def configure_logging( - level: Union[str, int] = "WARNING", + level: str | int = "WARNING", format: str = "[%(levelname)s] %(name)s: %(message)s", stream=None, filename: str = None, @@ -205,7 +204,7 @@ class LogContext: >>> # Back to previous level """ - def __init__(self, level: Union[str, int]): + def __init__(self, level: str | int): self.new_level = level self.old_level = None diff --git a/src/polyinfer/backends/__init__.py b/src/polyinfer/backends/__init__.py index afbd419..1ffd8b5 100644 --- a/src/polyinfer/backends/__init__.py +++ b/src/polyinfer/backends/__init__.py @@ -2,11 +2,11 @@ from polyinfer.backends.base import Backend, CompiledModel from polyinfer.backends.registry import ( - register_backend, + BackendInfo, get_backend, - list_backends, get_backends_for_device, - BackendInfo, + list_backends, + register_backend, ) __all__ = [ diff --git a/src/polyinfer/backends/_autoload.py b/src/polyinfer/backends/_autoload.py index 488874d..fce79b4 100644 --- a/src/polyinfer/backends/_autoload.py +++ b/src/polyinfer/backends/_autoload.py @@ -1,7 +1,9 @@ """Auto-load and register available backends.""" -import sys +import contextlib import logging +import sys + from polyinfer.backends.registry import register_backend # Use logging module directly to avoid circular imports @@ -69,6 +71,7 @@ def _should_use_lazy_onnxruntime() -> bool: # Check if onnxruntime-gpu is installed (vs plain onnxruntime) try: import importlib.metadata as metadata + metadata.version("onnxruntime-gpu") return True # onnxruntime-gpu installed, use lazy loading except Exception: @@ -202,13 +205,14 @@ def _ensure_loaded(cls): cls._import_attempted = True try: from polyinfer.backends.iree.backend import IREEBackend + cls._real_backend = IREEBackend() except ImportError as e: cls._import_error = RuntimeError( f"IREE not available: {e}. " "Install with: pip install iree-base-runtime iree-base-compiler" ) - raise cls._import_error + raise cls._import_error from e @property def name(self) -> str: @@ -234,6 +238,7 @@ def priority(self) -> int: def is_available(self) -> bool: try: import importlib.metadata as metadata + metadata.version("iree-base-runtime") return True except Exception: @@ -245,10 +250,8 @@ def load(self, model_path: str, device: str = "cpu", **kwargs): self._ensure_loaded() return self._real_backend.load(model_path, device, **kwargs) - try: + with contextlib.suppress(Exception): register_backend("iree", LazyIREEBackend) - except Exception: - pass def _register_lazy_onnxruntime(): @@ -282,13 +285,14 @@ def _ensure_loaded(cls): cls._import_attempted = True try: from polyinfer.backends.onnxruntime.backend import ONNXRuntimeBackend + cls._real_backend = ONNXRuntimeBackend() except ImportError as e: cls._import_error = RuntimeError( f"ONNX Runtime not available: {e}. " "Install with: pip install onnxruntime or onnxruntime-gpu" ) - raise cls._import_error + raise cls._import_error from e @property def name(self) -> str: @@ -301,6 +305,7 @@ def supported_devices(self) -> list[str]: devices = ["cpu"] try: import importlib.metadata as metadata + # If onnxruntime-gpu is installed, CUDA devices are likely available metadata.version("onnxruntime-gpu") devices.append("cuda") @@ -328,12 +333,14 @@ def is_available(self) -> bool: # Check if onnxruntime package exists without importing it try: import importlib.metadata as metadata + metadata.version("onnxruntime") return True except Exception: pass try: import importlib.metadata as metadata + metadata.version("onnxruntime-gpu") return True except Exception: @@ -345,7 +352,7 @@ def load(self, model_path: str, device: str = "cpu", **kwargs): self._ensure_loaded() return self._real_backend.load(model_path, device, **kwargs) - try: + # TODO: Narrow exception suppression to specific types once register_backend() + # error conditions are documented. + with contextlib.supress(Exception): register_backend("onnxruntime", LazyONNXRuntimeBackend) - except Exception: - pass # Registration failed, skip silently diff --git a/src/polyinfer/backends/base.py b/src/polyinfer/backends/base.py index 75ee11f..a5ca904 100644 --- a/src/polyinfer/backends/base.py +++ b/src/polyinfer/backends/base.py @@ -1,9 +1,10 @@ """Base classes for all backends.""" +import time from abc import ABC, abstractmethod -from typing import Any, Union +from typing import Any + import numpy as np -import time class CompiledModel(ABC): @@ -45,7 +46,7 @@ def output_shapes(self) -> list[tuple]: return [] @abstractmethod - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference on input tensors. Args: @@ -56,9 +57,7 @@ def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, . """ ... - def run( - self, inputs: dict[str, np.ndarray] - ) -> dict[str, np.ndarray]: + def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: """Run inference with named inputs/outputs. Args: @@ -74,7 +73,7 @@ def run( if isinstance(outputs, np.ndarray): outputs = (outputs,) - return dict(zip(self.output_names, outputs)) + return dict(zip(self.output_names, outputs, strict=False)) def benchmark( self, diff --git a/src/polyinfer/backends/iree/__init__.py b/src/polyinfer/backends/iree/__init__.py index c649a62..c973aa3 100644 --- a/src/polyinfer/backends/iree/__init__.py +++ b/src/polyinfer/backends/iree/__init__.py @@ -12,11 +12,11 @@ """ from polyinfer.backends.iree.backend import ( + DEVICE_TO_DRIVER, + DEVICE_TO_TARGET, IREEBackend, IREEModel, MLIROutput, - DEVICE_TO_TARGET, - DEVICE_TO_DRIVER, ) __all__ = [ diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py index 0d7fa07..09ed69b 100644 --- a/src/polyinfer/backends/iree/backend.py +++ b/src/polyinfer/backends/iree/backend.py @@ -1,16 +1,16 @@ """IREE backend implementation.""" -from pathlib import Path -from typing import Union -import numpy as np +import shutil import subprocess -import tempfile import sys -import shutil +import tempfile from dataclasses import dataclass +from pathlib import Path + +import numpy as np -from polyinfer.backends.base import Backend, CompiledModel from polyinfer._logging import get_logger +from polyinfer.backends.base import Backend, CompiledModel _logger = get_logger("backends.iree") @@ -25,6 +25,7 @@ class MLIROutput: source_model: Path to the source ONNX model dialect: The MLIR dialect used (e.g., 'iree') """ + path: Path content: str | None = None source_model: Path | None = None @@ -54,12 +55,13 @@ def save(self, output_path: str | Path) -> Path: return output_path + # Check if IREE is available try: import iree.runtime as iree_rt IREE_RUNTIME_AVAILABLE = True - _logger.debug(f"IREE Runtime available") + _logger.debug("IREE Runtime available") except ImportError: IREE_RUNTIME_AVAILABLE = False iree_rt = None @@ -155,10 +157,7 @@ def __init__( driver = DEVICE_TO_DRIVER.get(device_type, "local-task") # Load the module using the simpler BoundModule API - self._module = iree_rt.load_vm_flatbuffer_file( - str(vmfb_path), - driver=driver - ) + self._module = iree_rt.load_vm_flatbuffer_file(str(vmfb_path), driver=driver) # Find the main inference function self._func = None @@ -192,7 +191,7 @@ def input_names(self) -> list[str]: def output_names(self) -> list[str]: return self._output_names - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference.""" # Ensure inputs are contiguous float32 inputs = tuple(np.ascontiguousarray(inp, dtype=np.float32) for inp in inputs) @@ -225,14 +224,14 @@ def supported_devices(self) -> list[str]: # Check for Vulkan try: - config = iree_rt.Config(driver_name="vulkan") + iree_rt.Config(driver_name="vulkan") devices.append("vulkan") except Exception: pass # Check for CUDA try: - config = iree_rt.Config(driver_name="cuda") + iree_rt.Config(driver_name="cuda") devices.append("cuda") except Exception: pass @@ -255,13 +254,8 @@ def is_available(self) -> bool: if not IREE_RUNTIME_AVAILABLE: return False - # Also need compiler tools to be useful - if not IREE_COMPILER_AVAILABLE: - # Check for CLI tools as fallback - if not _get_iree_import_onnx() or not _get_iree_compile(): - return False - - return True + # Need compiler tools or CLI tools as fallback + return IREE_COMPILER_AVAILABLE or (_get_iree_import_onnx() and _get_iree_compile()) def load( self, @@ -358,10 +352,7 @@ def emit_mlir( raise FileNotFoundError(f"Model not found: {model_path}") # Determine output path - if output_path is None: - output_path = model_path.with_suffix(".mlir") - else: - output_path = Path(output_path) + output_path = model_path.with_suffix(".mlir") if output_path is None else Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) @@ -377,7 +368,7 @@ def emit_mlir( # Convert ONNX to MLIR _logger.debug(f"Converting ONNX to MLIR: {model_path} -> {output_path}") try: - result = subprocess.run( + subprocess.run( [iree_import, str(model_path), "-o", str(output_path)], check=True, capture_output=True, @@ -387,7 +378,7 @@ def emit_mlir( except subprocess.CalledProcessError as e: error_msg = e.stderr if e.stderr else str(e) _logger.error(f"ONNX to MLIR conversion failed: {error_msg}") - raise RuntimeError(f"ONNX to MLIR conversion failed: {error_msg}") + raise RuntimeError(f"ONNX to MLIR conversion failed: {error_msg}") from e # Load content if requested content = None @@ -459,7 +450,8 @@ def compile_mlir( str(mlir_path), f"--iree-hal-target-backends={target}", f"--iree-opt-level=O{opt_level}", - "-o", str(output_path), + "-o", + str(output_path), ] # Add target-specific flags @@ -470,7 +462,7 @@ def compile_mlir( subprocess.run(cmd, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: error_msg = e.stderr if e.stderr else str(e) - raise RuntimeError(f"MLIR compilation failed: {error_msg}") + raise RuntimeError(f"MLIR compilation failed: {error_msg}") from e return output_path @@ -523,7 +515,7 @@ def _compile_with_cli( try: # ONNX -> MLIR - result = subprocess.run( + subprocess.run( [iree_import, str(onnx_path), "-o", str(mlir_path)], check=True, capture_output=True, @@ -537,19 +529,20 @@ def _compile_with_cli( str(mlir_path), f"--iree-hal-target-backends={target}", f"--iree-opt-level=O{opt_level}", - "-o", str(vmfb_path), + "-o", + str(vmfb_path), ] # Add target-specific flags if target == "llvm-cpu": cmd.append("--iree-llvmcpu-target-cpu=host") - result = subprocess.run(cmd, check=True, capture_output=True, text=True) + subprocess.run(cmd, check=True, capture_output=True, text=True) return vmfb_path except subprocess.CalledProcessError as e: error_msg = e.stderr if e.stderr else str(e) - raise RuntimeError(f"IREE compilation failed: {error_msg}") + raise RuntimeError(f"IREE compilation failed: {error_msg}") from e finally: if mlir_path.exists(): diff --git a/src/polyinfer/backends/onnxruntime/backend.py b/src/polyinfer/backends/onnxruntime/backend.py index fbca8bb..a0c89ed 100644 --- a/src/polyinfer/backends/onnxruntime/backend.py +++ b/src/polyinfer/backends/onnxruntime/backend.py @@ -1,10 +1,9 @@ """ONNX Runtime backend implementation.""" -from typing import Union import numpy as np -from polyinfer.backends.base import Backend, CompiledModel from polyinfer._logging import get_logger +from polyinfer.backends.base import Backend, CompiledModel _logger = get_logger("backends.onnxruntime") @@ -79,10 +78,10 @@ def provider(self) -> str: """Return the active execution provider.""" return self._provider - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference.""" # Build input dict - input_dict = {name: arr for name, arr in zip(self._input_names, inputs)} + input_dict = {name: arr for name, arr in zip(self._input_names, inputs, strict=False)} # Run inference outputs = self._session.run(None, input_dict) @@ -94,7 +93,7 @@ def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, . def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: """Run inference with named inputs/outputs.""" outputs = self._session.run(None, inputs) - return dict(zip(self._output_names, outputs)) + return dict(zip(self._output_names, outputs, strict=False)) def _verify_tensorrt_ep_works() -> bool: @@ -112,6 +111,7 @@ def _verify_tensorrt_ep_works() -> bool: if sys.platform == "win32": # On Windows, check if nvinfer DLLs are findable import ctypes + try: ctypes.CDLL("nvinfer_10.dll") return True @@ -130,7 +130,7 @@ def _verify_tensorrt_ep_works() -> bool: # First check if already loaded (from our preload) try: # Try to find the symbol in already-loaded libraries - ctypes.CDLL(None).nvinfer_version + _ = ctypes.CDLL(None).nvinfer_version return True except (OSError, AttributeError): pass @@ -276,6 +276,7 @@ def load( if device_type == "tensorrt": _logger.debug("Setting up TensorRT paths for TensorRT EP") from polyinfer.nvidia_setup import setup_tensorrt_paths + setup_tensorrt_paths() # Get providers for device @@ -312,7 +313,9 @@ def load( if "cudnn_conv_algo_search" in kwargs: opts["cudnn_conv_algo_search"] = kwargs["cudnn_conv_algo_search"] if "do_copy_in_default_stream" in kwargs: - opts["do_copy_in_default_stream"] = str(int(kwargs["do_copy_in_default_stream"])) + opts["do_copy_in_default_stream"] = str( + int(kwargs["do_copy_in_default_stream"]) + ) elif provider == "TensorrtExecutionProvider": opts["device_id"] = str(device_id) @@ -324,7 +327,9 @@ def load( opts["trt_engine_cache_path"] = kwargs.get("cache_dir", "./trt_cache") # Optimization if "builder_optimization_level" in kwargs: - opts["trt_builder_optimization_level"] = str(kwargs["builder_optimization_level"]) + opts["trt_builder_optimization_level"] = str( + kwargs["builder_optimization_level"] + ) if "timing_cache_path" in kwargs: opts["trt_timing_cache_path"] = kwargs["timing_cache_path"] opts["trt_timing_cache_enable"] = "True" @@ -336,7 +341,9 @@ def load( if "min_subgraph_size" in kwargs: opts["trt_min_subgraph_size"] = str(kwargs["min_subgraph_size"]) if "max_partition_iterations" in kwargs: - opts["trt_max_partition_iterations"] = str(kwargs["max_partition_iterations"]) + opts["trt_max_partition_iterations"] = str( + kwargs["max_partition_iterations"] + ) # DLA if kwargs.get("dla_enable", False): opts["trt_dla_enable"] = "True" @@ -345,10 +352,7 @@ def load( if kwargs.get("force_sequential_engine_build", False): opts["trt_force_sequential_engine_build"] = "True" - elif provider == "DmlExecutionProvider": - opts["device_id"] = str(device_id) - - elif provider == "ROCMExecutionProvider": + elif provider == "DmlExecutionProvider" or provider == "ROCMExecutionProvider": opts["device_id"] = str(device_id) provider_options.append(opts) @@ -398,14 +402,22 @@ def load( if "TensorrtExecutionProvider" in providers: # Try falling back to CUDA EP fallback_providers = [p for p in providers if p != "TensorrtExecutionProvider"] - fallback_options = [ - opt for i, opt in enumerate(provider_options) - if providers[i] != "TensorrtExecutionProvider" - ] if provider_options else None + fallback_options = ( + [ + opt + for i, opt in enumerate(provider_options) + if providers[i] != "TensorrtExecutionProvider" + ] + if provider_options + else None + ) if fallback_providers: - _logger.warning(f"TensorRT EP failed, falling back to {fallback_providers[0]}") + _logger.warning( + f"TensorRT EP failed, falling back to {fallback_providers[0]}" + ) import warnings + warnings.warn( f"TensorRT EP failed ({error_msg[:100]}...), " f"falling back to {fallback_providers[0]}", diff --git a/src/polyinfer/backends/openvino/backend.py b/src/polyinfer/backends/openvino/backend.py index d1caccd..c0300bc 100644 --- a/src/polyinfer/backends/openvino/backend.py +++ b/src/polyinfer/backends/openvino/backend.py @@ -1,17 +1,18 @@ """OpenVINO backend implementation.""" -from typing import Union import numpy as np -from polyinfer.backends.base import Backend, CompiledModel from polyinfer._logging import get_logger +from polyinfer.backends.base import Backend, CompiledModel _logger = get_logger("backends.openvino") # Check if OpenVINO is available try: import openvino as ov - from openvino import Core, CompiledModel as OVCompiledModel, Tensor as OVTensor + from openvino import CompiledModel as OVCompiledModel + from openvino import Core + from openvino import Tensor as OVTensor OPENVINO_AVAILABLE = True _logger.debug(f"OpenVINO {ov.__version__} available") @@ -24,10 +25,10 @@ # Performance hint mapping PERF_HINTS = { - 0: "LATENCY", # Optimize for low latency - 1: "THROUGHPUT", # Optimize for throughput - 2: "LATENCY", # Default to latency - 3: "LATENCY", # Max optimization = latency focused + 0: "LATENCY", # Optimize for low latency + 1: "THROUGHPUT", # Optimize for throughput + 2: "LATENCY", # Default to latency + 3: "LATENCY", # Max optimization = latency focused } @@ -85,7 +86,7 @@ def input_shapes(self) -> list[tuple]: def output_shapes(self) -> list[tuple]: return self._output_shapes - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference.""" # Set inputs (must wrap in OVTensor) for i, data in enumerate(inputs): @@ -155,7 +156,9 @@ def supported_devices(self) -> list[str]: if dev == "CPU": devices.append("cpu") elif dev.startswith("GPU"): - devices.append(f"intel-gpu:{dev.replace('GPU.', '')}" if "." in dev else "intel-gpu") + devices.append( + f"intel-gpu:{dev.replace('GPU.', '')}" if "." in dev else "intel-gpu" + ) elif dev == "NPU": devices.append("npu") diff --git a/src/polyinfer/backends/registry.py b/src/polyinfer/backends/registry.py index bf41dc0..62895da 100644 --- a/src/polyinfer/backends/registry.py +++ b/src/polyinfer/backends/registry.py @@ -1,9 +1,9 @@ """Backend registry for managing available inference backends.""" from dataclasses import dataclass -from typing import Type -from polyinfer.backends.base import Backend + from polyinfer._logging import get_logger +from polyinfer.backends.base import Backend _logger = get_logger("backends.registry") @@ -13,7 +13,7 @@ class BackendInfo: """Information about a registered backend.""" name: str - backend_class: Type[Backend] + backend_class: type[Backend] instance: Backend | None = None available: bool | None = None # Lazily computed @@ -34,7 +34,7 @@ def is_available(self) -> bool: _backends: dict[str, BackendInfo] = {} -def register_backend(name: str, backend_class: Type[Backend]) -> None: +def register_backend(name: str, backend_class: type[Backend]) -> None: """Register a backend class. Args: @@ -67,8 +67,7 @@ def get_backend(name: str) -> Backend: if not info.is_available(): _logger.error(f"Backend '{name}' is not available") raise RuntimeError( - f"Backend '{name}' is not available. " - f"Install it with: pip install polyinfer[{name}]" + f"Backend '{name}' is not available. Install it with: pip install polyinfer[{name}]" ) _logger.debug(f"Retrieved backend: {name}") @@ -129,8 +128,7 @@ def get_best_backend(device: str) -> Backend: if not backends: available = list_backends() raise RuntimeError( - f"No backend available for device '{device}'. " - f"Available backends: {available}" + f"No backend available for device '{device}'. Available backends: {available}" ) return backends[0] diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py index cc1d919..65a7f89 100644 --- a/src/polyinfer/backends/tensorrt/backend.py +++ b/src/polyinfer/backends/tensorrt/backend.py @@ -1,11 +1,11 @@ """Native TensorRT backend implementation.""" from pathlib import Path -from typing import Union + import numpy as np -from polyinfer.backends.base import Backend, CompiledModel from polyinfer._logging import get_logger +from polyinfer.backends.base import Backend, CompiledModel _logger = get_logger("backends.tensorrt") @@ -120,10 +120,7 @@ def _allocate_buffers(self, input_shapes: dict[str, tuple] = None): # Allocate input buffers for name in self._input_names: - if input_shapes: - shape = input_shapes[name] - else: - shape = self._bindings[name]["shape"] + shape = input_shapes[name] if input_shapes else self._bindings[name]["shape"] dtype = self._bindings[name]["dtype"] size = int(np.prod(shape)) * np.dtype(dtype).itemsize @@ -168,18 +165,18 @@ def _allocate_buffers(self, input_shapes: dict[str, tuple] = None): self._h_outputs[name] = np.empty(shape, dtype=dtype) self._allocated_shapes[name] = shape - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference.""" # For dynamic shapes, ensure buffers are allocated for current input shapes if self._has_dynamic_shapes: input_shapes = { name: tuple(data.shape) - for name, data in zip(self._input_names, inputs) + for name, data in zip(self._input_names, inputs, strict=False) } self._allocate_buffers(input_shapes) # Copy inputs to GPU - for name, data in zip(self._input_names, inputs): + for name, data in zip(self._input_names, inputs, strict=False): data = np.ascontiguousarray(data) cudart.cudaMemcpyAsync( self._d_inputs[name], @@ -342,10 +339,7 @@ def load( # Check for cached engine model_path = Path(model_path) cache_path = kwargs.get("cache_path") - if cache_path is None: - cache_path = model_path.with_suffix(".engine") - else: - cache_path = Path(cache_path) + cache_path = model_path.with_suffix(".engine") if cache_path is None else Path(cache_path) # Try to load cached engine (unless force_rebuild) if cache_path.exists() and not kwargs.get("force_rebuild", False): @@ -402,15 +396,13 @@ def _build_engine( if kwargs.get("int8", False): config.set_flag(trt.BuilderFlag.INT8) _logger.debug("INT8 precision enabled") - if kwargs.get("bf16", False): - if hasattr(trt.BuilderFlag, "BF16"): - config.set_flag(trt.BuilderFlag.BF16) - if kwargs.get("fp8", False): - if hasattr(trt.BuilderFlag, "FP8"): - config.set_flag(trt.BuilderFlag.FP8) - if not kwargs.get("tf32", True): # TF32 enabled by default on Ampere+ - if hasattr(trt.BuilderFlag, "TF32"): - config.clear_flag(trt.BuilderFlag.TF32) + if kwargs.get("bf16", False) and hasattr(trt.BuilderFlag, "BF16"): + config.set_flag(trt.BuilderFlag.BF16) + if kwargs.get("fp8", False) and hasattr(trt.BuilderFlag, "FP8"): + config.set_flag(trt.BuilderFlag.FP8) + if not kwargs.get("tf32", True) and hasattr(trt.BuilderFlag, "TF32"): + # TF32 enabled by default on Ampere+ + config.clear_flag(trt.BuilderFlag.TF32) if kwargs.get("strict_types", False): if hasattr(trt.BuilderFlag, "STRICT_TYPES"): config.set_flag(trt.BuilderFlag.STRICT_TYPES) @@ -428,9 +420,8 @@ def _build_engine( config.avg_timing_iterations = avg_timing # === Sparsity (Ampere+) === - if kwargs.get("sparsity", False): - if hasattr(trt.BuilderFlag, "SPARSE_WEIGHTS"): - config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS) + if kwargs.get("sparsity", False) and hasattr(trt.BuilderFlag, "SPARSE_WEIGHTS"): + config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS) # === Timing cache === timing_cache_path = kwargs.get("timing_cache_path") diff --git a/src/polyinfer/cli.py b/src/polyinfer/cli.py index 2883757..bb15bf9 100644 --- a/src/polyinfer/cli.py +++ b/src/polyinfer/cli.py @@ -6,9 +6,10 @@ def cmd_info(args): """Show system information.""" - import polyinfer as pi import json + import polyinfer as pi + info = pi.discovery.system_info() if args.json: @@ -36,9 +37,10 @@ def cmd_info(args): def cmd_benchmark(args): """Benchmark a model.""" - import polyinfer as pi import numpy as np + import polyinfer as pi + # Parse input shape input_shape = tuple(int(x) for x in args.input_shape.split(",")) @@ -74,9 +76,10 @@ def cmd_benchmark(args): def cmd_run(args): """Run inference on a model.""" - import polyinfer as pi import numpy as np + import polyinfer as pi + # Load model model = pi.load(args.model, device=args.device, backend=args.backend) print(f"Loaded: {model}") @@ -119,16 +122,22 @@ def main(): bench_parser.add_argument("model", help="Path to ONNX model") bench_parser.add_argument("--device", "-d", default="cpu", help="Target device") bench_parser.add_argument("--backend", "-b", help="Specific backend to use") - bench_parser.add_argument("--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)") + bench_parser.add_argument( + "--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)" + ) bench_parser.add_argument("--warmup", "-w", type=int, default=10, help="Warmup iterations") - bench_parser.add_argument("--iterations", "-n", type=int, default=100, help="Benchmark iterations") + bench_parser.add_argument( + "--iterations", "-n", type=int, default=100, help="Benchmark iterations" + ) # Run command run_parser = subparsers.add_parser("run", help="Run inference") run_parser.add_argument("model", help="Path to ONNX model") run_parser.add_argument("--device", "-d", default="cpu", help="Target device") run_parser.add_argument("--backend", "-b", help="Specific backend to use") - run_parser.add_argument("--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)") + run_parser.add_argument( + "--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)" + ) args = parser.parse_args() diff --git a/src/polyinfer/compare.py b/src/polyinfer/compare.py index 5f65a75..28186b9 100644 --- a/src/polyinfer/compare.py +++ b/src/polyinfer/compare.py @@ -1,11 +1,12 @@ """Benchmarking and comparison utilities for PolyInfer.""" +import time from pathlib import Path from typing import Any + import numpy as np -import time -from polyinfer.discovery import list_backends, get_backend +from polyinfer.discovery import get_backend, list_backends def benchmark( @@ -113,6 +114,7 @@ def compare( if input_shape is None: # Try to get shape from model import onnx + model = onnx.load(str(model_path)) input_info = model.graph.input[0] shape = [] @@ -181,7 +183,7 @@ def compare( fastest = successful[0]["mean_ms"] for r in successful: marker = " <-- FASTEST" if r["mean_ms"] == fastest else "" - slowdown = f" ({r['mean_ms']/fastest:.2f}x)" if r["mean_ms"] != fastest else "" + slowdown = f" ({r['mean_ms'] / fastest:.2f}x)" if r["mean_ms"] != fastest else "" print( f"{r['backend']:25s}: {r['mean_ms']:6.2f} ms ({r['fps']:5.1f} FPS){slowdown}{marker}" ) @@ -213,9 +215,9 @@ def compare_all_devices( for device_info in list_devices(): device = device_info.name - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Device: {device}") - print(f"{'='*60}") + print(f"{'=' * 60}") device_results = [] for backend_name in device_info.backends: diff --git a/src/polyinfer/config.py b/src/polyinfer/config.py index c01723f..3fec6d8 100644 --- a/src/polyinfer/config.py +++ b/src/polyinfer/config.py @@ -1,7 +1,7 @@ """Configuration classes for PolyInfer.""" from dataclasses import dataclass, field -from typing import Literal, Any +from typing import Any, Literal @dataclass diff --git a/src/polyinfer/discovery.py b/src/polyinfer/discovery.py index 49d9769..a6ff539 100644 --- a/src/polyinfer/discovery.py +++ b/src/polyinfer/discovery.py @@ -2,13 +2,17 @@ from dataclasses import dataclass +from polyinfer.backends.base import Backend from polyinfer.backends.registry import ( - list_backends as _list_backends, get_backend as _get_backend, +) +from polyinfer.backends.registry import ( get_backends_for_device, get_best_backend, ) -from polyinfer.backends.base import Backend +from polyinfer.backends.registry import ( + list_backends as _list_backends, +) @dataclass @@ -191,10 +195,12 @@ def system_info() -> dict: # Device info for device in list_devices(): - info["devices"].append({ - "name": device.name, - "type": device.device_type, - "backends": device.backends, - }) + info["devices"].append( + { + "name": device.name, + "type": device.device_type, + "backends": device.backends, + } + ) return info diff --git a/src/polyinfer/mlir.py b/src/polyinfer/mlir.py index d87b809..711779b 100644 --- a/src/polyinfer/mlir.py +++ b/src/polyinfer/mlir.py @@ -145,15 +145,13 @@ def compile_mlir( ) -# Re-export MLIROutput for convenience def __getattr__(name: str): + """Lazy import for MLIROutput to avoid loading IREE at module import time.""" if name == "MLIROutput": from polyinfer.backends.iree.backend import MLIROutput + return MLIROutput raise AttributeError(f"module {__name__!r} has no attribute {name!r}") -# For static type checkers -from polyinfer.backends.iree.backend import MLIROutput - __all__ = ["export_mlir", "compile_mlir", "MLIROutput"] diff --git a/src/polyinfer/model.py b/src/polyinfer/model.py index ff26856..7a083a4 100644 --- a/src/polyinfer/model.py +++ b/src/polyinfer/model.py @@ -1,13 +1,13 @@ """Unified model loading and inference for PolyInfer.""" from pathlib import Path -from typing import Union + import numpy as np +from polyinfer._logging import get_logger from polyinfer.backends.base import CompiledModel -from polyinfer.discovery import select_backend, get_backend from polyinfer.config import InferenceConfig -from polyinfer._logging import get_logger +from polyinfer.discovery import get_backend, select_backend _logger = get_logger("model") @@ -73,7 +73,9 @@ def __init__( _logger.debug(f"Auto-selecting backend for device: {device}") self._backend = select_backend(device) - _logger.debug(f"Selected backend: {self._backend.name} (priority: {self._backend.priority})") + _logger.debug( + f"Selected backend: {self._backend.name} (priority: {self._backend.priority})" + ) # Load the model _logger.debug(f"Loading with device: {device}") @@ -122,6 +124,7 @@ def _normalize_backend(backend: str | None, device: str) -> tuple[str | None, st # Check if native tensorrt backend is available try: from polyinfer.backends.registry import _backends + if "tensorrt" in _backends and _backends["tensorrt"].is_available(): return "tensorrt", device # Use native except Exception: @@ -162,7 +165,7 @@ def output_shapes(self) -> list[tuple]: """Return output tensor shapes.""" return self._model.output_shapes - def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]: + def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference. Args: diff --git a/src/polyinfer/nvidia_setup.py b/src/polyinfer/nvidia_setup.py index 6d026f7..cc7bc90 100644 --- a/src/polyinfer/nvidia_setup.py +++ b/src/polyinfer/nvidia_setup.py @@ -6,11 +6,11 @@ The setup happens automatically when polyinfer is imported. """ +import logging import os import sys import warnings from pathlib import Path -import logging # Create logger directly since _logging may not be imported yet _logger = logging.getLogger("polyinfer.nvidia_setup") @@ -76,10 +76,9 @@ def _find_nvidia_dll_dirs() -> list[Path]: nvidia_root = site_packages / "nvidia" if nvidia_root.exists(): for subdir in nvidia_root.rglob("bin"): - if subdir.is_dir() and subdir not in dll_dirs: - # Check if it contains DLLs - if any(subdir.glob("*.dll")): - dll_dirs.append(subdir) + # Check if it's a directory not already added that contains DLLs + if subdir.is_dir() and subdir not in dll_dirs and any(subdir.glob("*.dll")): + dll_dirs.append(subdir) # TensorRT root tensorrt_root = site_packages / "tensorrt_libs" @@ -194,9 +193,8 @@ def _find_tensorrt_lib_dirs() -> list[Path]: for sys_path in system_tensorrt_paths: p = Path(sys_path) - if p.exists() and p not in tensorrt_dirs: - if any(p.glob("libnvinfer.so*")): - tensorrt_dirs.append(p) + if p.exists() and p not in tensorrt_dirs and any(p.glob("libnvinfer.so*")): + tensorrt_dirs.append(p) return tensorrt_dirs @@ -380,6 +378,7 @@ def _check_onnxruntime_conflicts(): if len(installed) > 1 and sys.platform == "win32": try: import onnxruntime as ort + providers = ort.get_available_providers() has_cuda = "CUDAExecutionProvider" in providers diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py index 56e387c..c57fb78 100644 --- a/src/polyinfer/quantization.py +++ b/src/polyinfer/quantization.py @@ -24,21 +24,25 @@ def data_reader(): pi.quantize("model.onnx", "model_fp16.onnx", dtype="fp16") """ +import importlib.util +from collections.abc import Callable, Iterator from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Callable, Iterator, Union, Any + import numpy as np class QuantizationMethod(Enum): """Quantization methods available.""" - DYNAMIC = "dynamic" # Dynamic INT8 (no calibration needed) - STATIC = "static" # Static INT8 (requires calibration data) + + DYNAMIC = "dynamic" # Dynamic INT8 (no calibration needed) + STATIC = "static" # Static INT8 (requires calibration data) class QuantizationType(Enum): """Target quantization data type.""" + INT8 = "int8" UINT8 = "uint8" INT4 = "int4" @@ -48,14 +52,16 @@ class QuantizationType(Enum): class CalibrationMethod(Enum): """Calibration methods for static quantization.""" - MINMAX = "minmax" # Min-max calibration - ENTROPY = "entropy" # Entropy-based calibration (KL divergence) + + MINMAX = "minmax" # Min-max calibration + ENTROPY = "entropy" # Entropy-based calibration (KL divergence) PERCENTILE = "percentile" # Percentile-based calibration @dataclass class QuantizationConfig: """Configuration for quantization.""" + method: QuantizationMethod = QuantizationMethod.DYNAMIC dtype: QuantizationType = QuantizationType.INT8 calibration_method: CalibrationMethod = CalibrationMethod.MINMAX @@ -69,6 +75,7 @@ class QuantizationConfig: @dataclass class QuantizationResult: """Result of quantization operation.""" + input_path: Path output_path: Path backend: str @@ -80,12 +87,12 @@ class QuantizationResult: # Type alias for calibration data -CalibrationData = Union[ - Iterator[dict[str, np.ndarray]], # Iterator yielding input dicts - Callable[[], Iterator[dict[str, np.ndarray]]], # Factory function - list[dict[str, np.ndarray]], # List of input dicts - list[np.ndarray], # List of input arrays (single input models) -] +CalibrationData = ( + Iterator[dict[str, np.ndarray]] # Iterator yielding input dicts + | Callable[[], Iterator[dict[str, np.ndarray]]] # Factory function + | list[dict[str, np.ndarray]] # List of input dicts + | list[np.ndarray] # List of input arrays (single input models) +) def quantize( @@ -167,11 +174,7 @@ def quantize( if quant_dtype in (QuantizationType.FP16, QuantizationType.BF16): backend = "onnxruntime" elif quant_method == QuantizationMethod.STATIC: - try: - import nncf - backend = "openvino" - except ImportError: - backend = "onnxruntime" + backend = "openvino" if importlib.util.find_spec("nncf") is not None else "onnxruntime" else: backend = "onnxruntime" @@ -212,18 +215,16 @@ def _quantize_onnxruntime( """Quantize using ONNX Runtime quantization tools.""" try: from onnxruntime.quantization import ( + QuantFormat, + QuantType, quantize_dynamic, quantize_static, - QuantType, - QuantFormat, - CalibrationDataReader, ) from onnxruntime.quantization.calibrate import CalibrationMethod as ORTCalibMethod - except ImportError: + except ImportError as e: raise ImportError( - "onnxruntime quantization not available. " - "Install with: pip install onnxruntime" - ) + "onnxruntime quantization not available. Install with: pip install onnxruntime" + ) from e # Map dtype to QuantType dtype_map = { @@ -267,9 +268,7 @@ def _quantize_onnxruntime( ort_calib_method = calib_method_map.get(config.calibration_method, ORTCalibMethod.MinMax) # Create calibration data reader - data_reader = _create_ort_calibration_reader( - model_input, calibration_data, num_samples - ) + data_reader = _create_ort_calibration_reader(model_input, calibration_data, num_samples) quantize_static( model_input=str(model_input), @@ -303,13 +302,13 @@ def _quantize_onnxruntime( def _convert_to_fp16_onnx(model_input: Path, model_output: Path) -> None: """Convert ONNX model to FP16.""" try: - from onnxconverter_common import float16 import onnx - except ImportError: + from onnxconverter_common import float16 + except ImportError as e: raise ImportError( "FP16 conversion requires onnxconverter-common. " "Install with: pip install onnxconverter-common" - ) + ) from e model = onnx.load(str(model_input)) model_fp16 = float16.convert_float_to_float16(model, keep_io_types=True) @@ -332,6 +331,7 @@ def __init__( # Get input names from model import onnxruntime as ort + sess = ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"]) self._input_names = [inp.name for inp in sess.get_inputs()] del sess @@ -350,16 +350,15 @@ def _setup_iterator(self): if isinstance(data, list): # Convert list to iterator - if len(data) > 0: - if isinstance(data[0], np.ndarray): - # List of arrays - wrap in dicts - if len(self._input_names) != 1: - raise ValueError( - f"Model has {len(self._input_names)} inputs, " - "but calibration data is a list of arrays. " - "Use list of dicts instead." - ) - data = [{self._input_names[0]: arr} for arr in data] + if len(data) > 0 and isinstance(data[0], np.ndarray): + # List of arrays - wrap in dicts + if len(self._input_names) != 1: + raise ValueError( + f"Model has {len(self._input_names)} inputs, " + "but calibration data is a list of arrays. " + "Use list of dicts instead." + ) + data = [{self._input_names[0]: arr} for arr in data] self._data_iter = iter(data) else: self._data_iter = data @@ -406,11 +405,10 @@ def _quantize_openvino( try: import nncf import openvino as ov - except ImportError: + except ImportError as e: raise ImportError( - "OpenVINO NNCF not available. " - "Install with: pip install openvino nncf" - ) + "OpenVINO NNCF not available. Install with: pip install openvino nncf" + ) from e original_size = model_input.stat().st_size / (1024 * 1024) @@ -422,9 +420,8 @@ def _quantize_openvino( if config.dtype == QuantizationType.FP16: # OpenVINO doesn't have direct FP16 quantization, use compress_to_fp16 try: - from openvino.runtime import serialize # Compile with FP16 inference precision hint - model_fp16 = ov.compile_model(model, "CPU", {"INFERENCE_PRECISION_HINT": "f16"}) + ov.compile_model(model, "CPU", {"INFERENCE_PRECISION_HINT": "f16"}) # For saving, we need to serialize the original model # OpenVINO FP16 is handled at compile time, not model level # Fall back to ONNX Runtime for FP16 @@ -447,7 +444,9 @@ def _quantize_openvino( quantized_model = nncf.quantize( model, nncf_dataset, - preset=nncf.QuantizationPreset.MIXED if config.per_channel else nncf.QuantizationPreset.PERFORMANCE, + preset=nncf.QuantizationPreset.MIXED + if config.per_channel + else nncf.QuantizationPreset.PERFORMANCE, target_device=nncf.TargetDevice.CPU, subset_size=num_samples, ) @@ -455,13 +454,13 @@ def _quantize_openvino( # Save the quantized model # Determine output format based on extension output_str = str(model_output) - if output_str.endswith('.onnx'): + if output_str.endswith(".onnx"): # Save as ONNX ov.save_model(quantized_model, output_str) else: # Save as OpenVINO IR - if not output_str.endswith('.xml'): - output_str = output_str + '.xml' + if not output_str.endswith(".xml"): + output_str = output_str + ".xml" ov.save_model(quantized_model, output_str) model_output = Path(output_str) @@ -490,15 +489,13 @@ def _create_nncf_dataset(model, data: CalibrationData, num_samples: int): if callable(data) and not isinstance(data, (list, Iterator)): data = data() - if isinstance(data, list): - if len(data) > 0 and isinstance(data[0], np.ndarray): - # List of arrays - if len(input_names) != 1: - raise ValueError( - f"Model has {len(input_names)} inputs, " - "but calibration data is a list of arrays." - ) - data = [{input_names[0]: arr} for arr in data] + if isinstance(data, list) and len(data) > 0 and isinstance(data[0], np.ndarray): + # List of arrays + if len(input_names) != 1: + raise ValueError( + f"Model has {len(input_names)} inputs, but calibration data is a list of arrays." + ) + data = [{input_names[0]: arr} for arr in data] # Convert to list if iterator if not isinstance(data, list): @@ -590,6 +587,7 @@ def quantize_for_tensorrt( # Convenience functions + def quantize_dynamic( model_input: str | Path, model_output: str | Path, diff --git a/tests/conftest.py b/tests/conftest.py index b8ab9de..750f6e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,22 +1,22 @@ """Pytest configuration and shared fixtures for polyinfer tests.""" -import os import sys -import pytest -import numpy as np from pathlib import Path +import numpy as np +import pytest + # Add src to path for development sys.path.insert(0, str(Path(__file__).parent.parent / "src")) import polyinfer as pi -from polyinfer.backends.registry import get_all_backends, get_backend - +from polyinfer.backends.registry import get_all_backends # ============================================================================= # Test Model Fixtures # ============================================================================= + @pytest.fixture(scope="session") def model_path(): """Get path to test model (YOLOv8n).""" @@ -34,6 +34,7 @@ def model_path(): # Try to download/export try: from ultralytics import YOLO + model = YOLO("yolov8n.pt") export_path = Path(__file__).parent.parent / "yolov8n.onnx" model.export(format="onnx") @@ -50,7 +51,7 @@ def simple_model_path(tmp_path_factory): """Create a simple ONNX model for basic tests.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper # Create a simple model: Y = X + 1 X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 224, 224]) @@ -58,21 +59,12 @@ def simple_model_path(tmp_path_factory): # Constant tensor of ones ones = helper.make_tensor( - "ones", - TensorProto.FLOAT, - [1, 3, 224, 224], - [1.0] * (1 * 3 * 224 * 224) + "ones", TensorProto.FLOAT, [1, 3, 224, 224], [1.0] * (1 * 3 * 224 * 224) ) add_node = helper.make_node("Add", ["X", "ones"], ["Y"]) - graph = helper.make_graph( - [add_node], - "simple_add", - [X], - [Y], - [ones] - ) + graph = helper.make_graph([add_node], "simple_add", [X], [Y], [ones]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) @@ -90,6 +82,7 @@ def simple_model_path(tmp_path_factory): # Input Data Fixtures # ============================================================================= + @pytest.fixture def yolo_input(): """Create input tensor for YOLOv8 (1x3x640x640).""" @@ -113,6 +106,7 @@ def batch_input(): # Backend/Device Discovery Fixtures # ============================================================================= + @pytest.fixture(scope="session") def available_backends(): """Get list of available backends.""" @@ -135,6 +129,7 @@ def all_backends(): # Device-specific Fixtures # ============================================================================= + @pytest.fixture def has_cuda(): """Check if CUDA is available.""" @@ -187,6 +182,7 @@ def has_vulkan(): # Markers # ============================================================================= + def pytest_configure(config): """Register custom markers.""" config.addinivalue_line("markers", "cuda: mark test as requiring CUDA") diff --git a/tests/test_backend_options.py b/tests/test_backend_options.py index 339a931..80c6867 100644 --- a/tests/test_backend_options.py +++ b/tests/test_backend_options.py @@ -4,25 +4,22 @@ are properly validated and passed through to the underlying engines. """ -import pytest import numpy as np -from pathlib import Path -from unittest.mock import patch, MagicMock +import pytest import polyinfer as pi -from polyinfer.backends.registry import get_backend - # ============================================================================= # Test Fixtures # ============================================================================= + @pytest.fixture def dummy_onnx_model(tmp_path): """Create a minimal ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("onnx not installed") @@ -60,6 +57,7 @@ def dummy_input(): # ONNX Runtime Backend Options Tests # ============================================================================= + class TestONNXRuntimeOptions: """Test ONNX Runtime backend options passthrough.""" @@ -210,6 +208,7 @@ def test_directml_options(self, dummy_onnx_model, dummy_input): # Native TensorRT Backend Options Tests # ============================================================================= + class TestNativeTensorRTOptions: """Test native TensorRT backend options.""" @@ -300,7 +299,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path): cache_path = tmp_path / "test_rebuild.engine" # First build - model1 = pi.load( + pi.load( dummy_onnx_model, backend="tensorrt", device="cuda", @@ -309,7 +308,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path): mtime1 = cache_path.stat().st_mtime # Should use cache (no rebuild) - model2 = pi.load( + pi.load( dummy_onnx_model, backend="tensorrt", device="cuda", @@ -319,7 +318,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path): assert mtime1 == mtime2, "Cache should be reused" # Force rebuild - model3 = pi.load( + pi.load( dummy_onnx_model, backend="tensorrt", device="cuda", @@ -353,6 +352,7 @@ def test_profiling_verbosity(self, dummy_onnx_model, dummy_input, tmp_path): # OpenVINO Backend Options Tests # ============================================================================= + class TestOpenVINOOptions: """Test OpenVINO backend options.""" @@ -410,6 +410,7 @@ def test_caching(self, dummy_onnx_model, dummy_input, tmp_path): # IREE Backend Options Tests # ============================================================================= + class TestIREEOptions: """Test IREE backend options.""" @@ -491,6 +492,7 @@ def test_save_mlir(self, dummy_onnx_model, dummy_input, tmp_path): # Options Validation Tests # ============================================================================= + class TestOptionsValidation: """Test that invalid options are handled properly.""" @@ -523,6 +525,7 @@ def test_invalid_device_raises(self, dummy_onnx_model): # Integration Tests # ============================================================================= + class TestOptionsIntegration: """Integration tests for options across backends.""" diff --git a/tests/test_backends.py b/tests/test_backends.py index 74f0cad..ecd6db4 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -1,6 +1,7 @@ """Tests for backend discovery and availability.""" import pytest + import polyinfer as pi from polyinfer.backends.registry import get_all_backends, get_backend @@ -123,7 +124,7 @@ class TestBackendPriority: def test_backends_have_priority(self): """All backends should have a priority value.""" all_backends = get_all_backends() - for name, backend in all_backends.items(): + for _name, backend in all_backends.items(): assert isinstance(backend.priority, int) assert backend.priority >= 0 diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index dd382cf..abbc30f 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,7 +1,7 @@ """Tests for benchmarking functionality.""" import pytest -import numpy as np + import polyinfer as pi @@ -94,7 +94,7 @@ def test_warmup_effect(self, model_path, yolo_input): model = pi.load(model_path, device="cpu") # No warmup - result_no_warmup = model.benchmark(yolo_input, warmup=0, iterations=10) + model.benchmark(yolo_input, warmup=0, iterations=10) # With warmup result_warmup = model.benchmark(yolo_input, warmup=10, iterations=10) @@ -115,12 +115,7 @@ def test_compare_basic(self, model_path): def test_compare_returns_results(self, model_path): """compare() should return benchmark results for each backend.""" - results = pi.compare( - model_path, - input_shape=(1, 3, 640, 640), - warmup=2, - iterations=5 - ) + results = pi.compare(model_path, input_shape=(1, 3, 640, 640), warmup=2, iterations=5) for result in results: assert "backend" in result @@ -132,11 +127,7 @@ def test_compare_returns_results(self, model_path): def test_compare_specific_device(self, model_path): """compare() should work with specific device.""" results = pi.compare( - model_path, - input_shape=(1, 3, 640, 640), - device="cpu", - warmup=2, - iterations=5 + model_path, input_shape=(1, 3, 640, 640), device="cpu", warmup=2, iterations=5 ) assert len(results) >= 1 @@ -222,7 +213,7 @@ def test_long_benchmark(self, model_path, yolo_input): result_long = model.benchmark(yolo_input, warmup=10, iterations=100) # Longer benchmark should generally have lower relative std - rel_std_short = result_short["std_ms"] / result_short["mean_ms"] + result_short["std_ms"] / result_short["mean_ms"] rel_std_long = result_long["std_ms"] / result_long["mean_ms"] # Not strictly enforced as it depends on system state @@ -230,7 +221,7 @@ def test_long_benchmark(self, model_path, yolo_input): def test_all_available_backends(self, model_path, yolo_input): """Benchmark all available backends.""" - backends = pi.list_backends() + pi.list_backends() devices = pi.list_devices() results = [] @@ -238,17 +229,21 @@ def test_all_available_backends(self, model_path, yolo_input): try: model = pi.load(model_path, device=device.name) result = model.benchmark(yolo_input, warmup=3, iterations=10) - results.append({ - "device": device.name, - "backend": result["backend"], - "fps": result["fps"], - "mean_ms": result["mean_ms"], - }) + results.append( + { + "device": device.name, + "backend": result["backend"], + "fps": result["fps"], + "mean_ms": result["mean_ms"], + } + ) except Exception as e: - results.append({ - "device": device.name, - "error": str(e), - }) + results.append( + { + "device": device.name, + "error": str(e), + } + ) # Should have at least one successful result successful = [r for r in results if "fps" in r] @@ -258,6 +253,8 @@ def test_all_available_backends(self, model_path, yolo_input): print("\n=== Benchmark Results ===") for r in results: if "fps" in r: - print(f"{r['device']:20} {r['backend']:25} {r['mean_ms']:8.2f} ms {r['fps']:8.1f} FPS") + print( + f"{r['device']:20} {r['backend']:25} {r['mean_ms']:8.2f} ms {r['fps']:8.1f} FPS" + ) else: print(f"{r['device']:20} ERROR: {r.get('error', 'unknown')}") diff --git a/tests/test_devices.py b/tests/test_devices.py index 3995385..01e1239 100644 --- a/tests/test_devices.py +++ b/tests/test_devices.py @@ -1,7 +1,8 @@ """Tests for device-specific functionality.""" -import pytest import numpy as np +import pytest + import polyinfer as pi diff --git a/tests/test_inference.py b/tests/test_inference.py index 42833ba..d24b34b 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -1,7 +1,8 @@ """Tests for inference correctness and consistency.""" -import pytest import numpy as np +import pytest + import polyinfer as pi diff --git a/tests/test_intel_devices.py b/tests/test_intel_devices.py index be319a8..5510b50 100644 --- a/tests/test_intel_devices.py +++ b/tests/test_intel_devices.py @@ -1,9 +1,14 @@ """Test polyinfer with Intel devices (CPU, iGPU, NPU).""" + +import os import sys + sys.path.insert(0, "src") import numpy as np + import polyinfer as pi +from polyinfer.backends.openvino import OpenVINOBackend # Check what's available print("=" * 60) @@ -14,12 +19,10 @@ print("Available devices:", pi.list_devices()) # Get OpenVINO backend directly to see raw device names -from polyinfer.backends.openvino import OpenVINOBackend ov_backend = OpenVINOBackend() print("\nOpenVINO raw devices:", ov_backend.get_available_devices()) # Test model path, use YOLOv8n if available -import os model_path = None for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx"]: if os.path.exists(path): @@ -30,11 +33,14 @@ print("\nNo test model found. Downloading yolov8n.onnx...") try: from ultralytics import YOLO + model = YOLO("yolov8n.pt") model.export(format="onnx") model_path = "yolov8n.onnx" except ImportError: - print("Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx") + print( + "Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx" + ) sys.exit(1) print(f"\nUsing model: {model_path}") @@ -64,7 +70,7 @@ # Benchmark bench = model.benchmark(input_data, warmup=5, iterations=20) print(f" Latency: {bench['mean_ms']:.2f} ms ({bench['fps']:.1f} FPS)") - results.append((device, description, bench['mean_ms'], bench['fps'])) + results.append((device, description, bench["mean_ms"], bench["fps"])) except Exception as e: print(f" ERROR: {e}") results.append((device, description, None, None)) diff --git a/tests/test_logging.py b/tests/test_logging.py index bbd838e..6d99ee5 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -27,14 +27,13 @@ def test_logging_exports_available(self): def test_logging_module_import(self): """Test direct import from logging module.""" from polyinfer._logging import ( - get_logger, - set_log_level, + configure_logging, + disable_logging, + enable_logging, get_log_level, get_log_level_name, - enable_logging, - disable_logging, - configure_logging, - LogContext, + get_logger, + set_log_level, ) # All should be callable @@ -237,7 +236,7 @@ def test_logger_hierarchy(self): """Test that child loggers inherit from parent.""" import polyinfer as pi - parent = pi.get_logger() + pi.get_logger() child = pi.get_logger("model") # Child's effective level should match parent diff --git a/tests/test_mlir.py b/tests/test_mlir.py index c6197da..be48f7d 100644 --- a/tests/test_mlir.py +++ b/tests/test_mlir.py @@ -1,16 +1,18 @@ """Tests for MLIR emission and compilation.""" -import pytest -import numpy as np -import polyinfer as pi -from pathlib import Path import tempfile +from pathlib import Path + +import numpy as np +import pytest +import polyinfer as pi # ============================================================================= # Fixtures # ============================================================================= + @pytest.fixture(scope="module") def model_path(): """Get path to test ONNX model.""" @@ -40,6 +42,7 @@ def temp_dir(): # MLIR Export Tests # ============================================================================= + class TestMLIRExport: """Tests for export_mlir functionality.""" @@ -57,6 +60,7 @@ def test_export_mlir_default_path(self, model_path, temp_dir): """Test MLIR export with default output path.""" # Copy model to temp dir to test default path behavior import shutil + temp_model = temp_dir / "test_model.onnx" shutil.copy(model_path, temp_model) @@ -125,6 +129,7 @@ def test_export_mlir_file_not_found(self, temp_dir): # MLIR Compilation Tests # ============================================================================= + class TestMLIRCompilation: """Tests for compile_mlir functionality.""" @@ -137,7 +142,9 @@ def mlir_file(self, model_path, temp_dir): def test_compile_mlir_cpu(self, mlir_file, temp_dir): """Test MLIR compilation for CPU.""" - vmfb_path = pi.compile_mlir(mlir_file, device="cpu", output_path=temp_dir / "model_cpu.vmfb") + vmfb_path = pi.compile_mlir( + mlir_file, device="cpu", output_path=temp_dir / "model_cpu.vmfb" + ) assert vmfb_path.exists() assert vmfb_path.suffix == ".vmfb" @@ -145,7 +152,9 @@ def test_compile_mlir_cpu(self, mlir_file, temp_dir): @pytest.mark.vulkan def test_compile_mlir_vulkan(self, mlir_file, temp_dir): """Test MLIR compilation for Vulkan.""" - vmfb_path = pi.compile_mlir(mlir_file, device="vulkan", output_path=temp_dir / "model_vulkan.vmfb") + vmfb_path = pi.compile_mlir( + mlir_file, device="vulkan", output_path=temp_dir / "model_vulkan.vmfb" + ) assert vmfb_path.exists() @@ -181,6 +190,7 @@ def test_compile_mlir_file_not_found(self, temp_dir): # End-to-End Workflow Tests # ============================================================================= + class TestMLIRWorkflow: """End-to-end tests for MLIR workflow.""" @@ -246,6 +256,7 @@ def test_mlir_output_consistency(self, model_path, temp_dir): # Backend Method Tests # ============================================================================= + class TestIREEBackendMethods: """Tests for IREEBackend emit_mlir and compile_mlir methods.""" @@ -265,7 +276,9 @@ def test_backend_compile_mlir(self, model_path, temp_dir): mlir = backend.emit_mlir(model_path, temp_dir / "model.mlir") # Then compile - vmfb_path = backend.compile_mlir(mlir.path, device="cpu", output_path=temp_dir / "model.vmfb") + vmfb_path = backend.compile_mlir( + mlir.path, device="cpu", output_path=temp_dir / "model.vmfb" + ) assert vmfb_path.exists() @@ -288,6 +301,7 @@ def test_backend_load_vmfb(self, model_path, temp_dir): # MLIR Content Analysis Tests # ============================================================================= + class TestMLIRContent: """Tests for MLIR content analysis.""" diff --git a/tests/test_quantization.py b/tests/test_quantization.py index a49d8dc..ceb5317 100644 --- a/tests/test_quantization.py +++ b/tests/test_quantization.py @@ -1,9 +1,9 @@ """Tests for quantization functionality.""" -import pytest +import importlib.util + import numpy as np -from pathlib import Path -import tempfile +import pytest import polyinfer as pi @@ -54,17 +54,13 @@ def simple_model(self, tmp_path): """Create a simple ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("ONNX not installed") # Create simple model: output = input * 2 - input_tensor = helper.make_tensor_value_info( - "input", TensorProto.FLOAT, [1, 3, 32, 32] - ) - output_tensor = helper.make_tensor_value_info( - "output", TensorProto.FLOAT, [1, 3, 32, 32] - ) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32]) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32]) const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0]) mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul") graph = helper.make_graph( @@ -143,16 +139,12 @@ def simple_model(self, tmp_path): """Create a simple ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("ONNX not installed") - input_tensor = helper.make_tensor_value_info( - "input", TensorProto.FLOAT, [1, 3, 32, 32] - ) - output_tensor = helper.make_tensor_value_info( - "output", TensorProto.FLOAT, [1, 3, 32, 32] - ) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32]) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32]) const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0]) mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul") graph = helper.make_graph( @@ -201,11 +193,10 @@ def test_static_quantization_with_dict_list(self, simple_model, tmp_path): output_path = tmp_path / "model_int8.onnx" calibration_data = [ - {"input": np.random.rand(1, 3, 32, 32).astype(np.float32)} - for _ in range(10) + {"input": np.random.rand(1, 3, 32, 32).astype(np.float32)} for _ in range(10) ] - result = pi.quantize_static( + pi.quantize_static( simple_model, output_path, calibration_data=calibration_data, @@ -218,7 +209,7 @@ def test_static_quantization_per_channel(self, simple_model, tmp_path, calibrati """Test static quantization with per-channel option.""" output_path = tmp_path / "model_int8.onnx" - result = pi.quantize_static( + pi.quantize_static( simple_model, output_path, calibration_data=calibration_data, @@ -227,11 +218,13 @@ def test_static_quantization_per_channel(self, simple_model, tmp_path, calibrati assert output_path.exists() - def test_static_quantization_entropy_calibration(self, simple_model, tmp_path, calibration_data): + def test_static_quantization_entropy_calibration( + self, simple_model, tmp_path, calibration_data + ): """Test static quantization with entropy calibration.""" output_path = tmp_path / "model_int8.onnx" - result = pi.quantize( + pi.quantize( simple_model, output_path, method="static", @@ -250,16 +243,12 @@ def simple_model(self, tmp_path): """Create a simple ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("ONNX not installed") - input_tensor = helper.make_tensor_value_info( - "input", TensorProto.FLOAT, [1, 3, 32, 32] - ) - output_tensor = helper.make_tensor_value_info( - "output", TensorProto.FLOAT, [1, 3, 32, 32] - ) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32]) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32]) const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0]) mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul") graph = helper.make_graph( @@ -274,9 +263,7 @@ def simple_model(self, tmp_path): def test_fp16_conversion(self, simple_model, tmp_path): """Test FP16 conversion.""" - try: - import onnxconverter_common - except ImportError: + if importlib.util.find_spec("onnxconverter_common") is None: pytest.skip("onnxconverter-common not installed") output_path = tmp_path / "model_fp16.onnx" @@ -288,9 +275,7 @@ def test_fp16_conversion(self, simple_model, tmp_path): def test_fp16_via_quantize(self, simple_model, tmp_path): """Test FP16 conversion via quantize().""" - try: - import onnxconverter_common - except ImportError: + if importlib.util.find_spec("onnxconverter_common") is None: pytest.skip("onnxconverter-common not installed") output_path = tmp_path / "model_fp16.onnx" @@ -306,9 +291,7 @@ def test_fp16_via_quantize(self, simple_model, tmp_path): def test_fp16_model_runs(self, simple_model, tmp_path): """Test that FP16 model can be loaded and run.""" - try: - import onnxconverter_common - except ImportError: + if importlib.util.find_spec("onnxconverter_common") is None: pytest.skip("onnxconverter-common not installed") output_path = tmp_path / "model_fp16.onnx" @@ -329,16 +312,12 @@ def simple_model(self, tmp_path): """Create a simple ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("ONNX not installed") - input_tensor = helper.make_tensor_value_info( - "input", TensorProto.FLOAT, [1, 3, 32, 32] - ) - output_tensor = helper.make_tensor_value_info( - "output", TensorProto.FLOAT, [1, 3, 32, 32] - ) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32]) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32]) const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0]) mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul") graph = helper.make_graph( @@ -385,16 +364,12 @@ def simple_model(self, tmp_path): """Create a simple ONNX model for testing.""" try: import onnx - from onnx import helper, TensorProto + from onnx import TensorProto, helper except ImportError: pytest.skip("ONNX not installed") - input_tensor = helper.make_tensor_value_info( - "input", TensorProto.FLOAT, [1, 3, 32, 32] - ) - output_tensor = helper.make_tensor_value_info( - "output", TensorProto.FLOAT, [1, 3, 32, 32] - ) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32]) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32]) const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0]) mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul") graph = helper.make_graph( @@ -415,10 +390,7 @@ def calibration_data(self): @pytest.mark.openvino def test_openvino_quantization(self, simple_model, tmp_path, calibration_data): """Test OpenVINO NNCF quantization.""" - try: - import nncf - import openvino - except ImportError: + if importlib.util.find_spec("nncf") is None or importlib.util.find_spec("openvino") is None: pytest.skip("OpenVINO/NNCF not installed") output_path = tmp_path / "model_int8.onnx" diff --git a/tests/test_yolov8.py b/tests/test_yolov8.py index 4196e09..a7447ae 100644 --- a/tests/test_yolov8.py +++ b/tests/test_yolov8.py @@ -4,16 +4,18 @@ backend and device combination, checking both correctness and performance. """ -import pytest -import numpy as np -import polyinfer as pi from pathlib import Path +import numpy as np +import pytest + +import polyinfer as pi # ============================================================================= # Fixtures # ============================================================================= + @pytest.fixture(scope="module") def yolov8_path(): """Get path to YOLOv8n ONNX model.""" @@ -30,6 +32,7 @@ def yolov8_path(): # Try to export try: from ultralytics import YOLO + model = YOLO("yolov8n.pt") export_path = Path(__file__).parent.parent / "yolov8n.onnx" model.export(format="onnx") @@ -58,6 +61,7 @@ def reference_output(yolov8_path, yolo_input): # Device Discovery # ============================================================================= + def get_all_device_backend_combinations(): """Get all valid (device, backend) combinations.""" combinations = [] @@ -73,6 +77,7 @@ def get_all_device_backend_combinations(): # YOLOv8 Tests by Backend # ============================================================================= + class TestYOLOv8ONNXRuntime: """YOLOv8 tests for ONNX Runtime backend.""" @@ -205,7 +210,7 @@ def test_vulkan(self, yolov8_path, yolo_input): total_elements = 1 * 84 * 8400 nan_count = np.sum(np.isnan(output)) nan_ratio = nan_count / total_elements - assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)" + assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)" assert not np.any(np.isinf(output)), "Output contains Inf" @@ -242,6 +247,7 @@ def test_cuda(self, yolov8_path, yolo_input, reference_output): # Performance Benchmarks # ============================================================================= + class TestYOLOv8Benchmarks: """Benchmark YOLOv8 across all backends.""" @@ -256,21 +262,25 @@ def test_benchmark_all_devices(self, yolov8_path, yolo_input): try: model = pi.load(yolov8_path, backend=backend_name, device=device) bench = model.benchmark(yolo_input, warmup=5, iterations=20) - results.append({ - "device": device, - "backend": backend_name, - "backend_name": model.backend_name, - "mean_ms": bench["mean_ms"], - "fps": bench["fps"], - "status": "success", - }) + results.append( + { + "device": device, + "backend": backend_name, + "backend_name": model.backend_name, + "mean_ms": bench["mean_ms"], + "fps": bench["fps"], + "status": "success", + } + ) except Exception as e: - results.append({ - "device": device, - "backend": backend_name, - "error": str(e), - "status": "error", - }) + results.append( + { + "device": device, + "backend": backend_name, + "error": str(e), + "status": "error", + } + ) # Print results print("\n" + "=" * 80) @@ -283,7 +293,9 @@ def test_benchmark_all_devices(self, yolov8_path, yolo_input): successful.sort(key=lambda r: r["mean_ms"]) for r in successful: - print(f"{r['device']:<15} {r['backend_name']:<25} {r['mean_ms']:>10.2f}ms {r['fps']:>9.1f}") + print( + f"{r['device']:<15} {r['backend_name']:<25} {r['mean_ms']:>10.2f}ms {r['fps']:>9.1f}" + ) print("-" * 80) @@ -304,6 +316,7 @@ def test_benchmark_openvino_devices(self, yolov8_path, yolo_input): pytest.skip("OpenVINO not available") from polyinfer.backends.openvino import OpenVINOBackend + ov_backend = OpenVINOBackend() raw_devices = ov_backend.get_available_devices() @@ -322,13 +335,17 @@ def test_benchmark_openvino_devices(self, yolov8_path, yolo_input): try: model = pi.load(yolov8_path, backend="openvino", device=pi_device) bench = model.benchmark(yolo_input, warmup=5, iterations=20) - results.append({ - "raw_device": raw_device, - "pi_device": pi_device, - "mean_ms": bench["mean_ms"], - "fps": bench["fps"], - }) - print(f" {raw_device} ({pi_device}): {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)") + results.append( + { + "raw_device": raw_device, + "pi_device": pi_device, + "mean_ms": bench["mean_ms"], + "fps": bench["fps"], + } + ) + print( + f" {raw_device} ({pi_device}): {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)" + ) except Exception as e: print(f" {raw_device} ({pi_device}): ERROR - {e}") @@ -348,11 +365,13 @@ def test_benchmark_iree_devices(self, yolov8_path, yolo_input): try: model = pi.load(yolov8_path, backend="iree", device=device) bench = model.benchmark(yolo_input, warmup=5, iterations=20) - results.append({ - "device": device, - "mean_ms": bench["mean_ms"], - "fps": bench["fps"], - }) + results.append( + { + "device": device, + "mean_ms": bench["mean_ms"], + "fps": bench["fps"], + } + ) print(f" {device}: {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)") except Exception as e: print(f" {device}: ERROR - {e}") @@ -364,6 +383,7 @@ def test_benchmark_iree_devices(self, yolov8_path, yolo_input): # Cross-Backend Consistency # ============================================================================= + class TestYOLOv8Consistency: """Test output consistency across backends.""" @@ -374,11 +394,7 @@ def test_all_backends_same_output_shape(self, yolov8_path, yolo_input): for device_info in pi.list_devices(): for backend_name in device_info.backends: try: - model = pi.load( - yolov8_path, - backend=backend_name, - device=device_info.name - ) + model = pi.load(yolov8_path, backend=backend_name, device=device_info.name) output = model(yolo_input) key = f"{backend_name}-{device_info.name}" shapes[key] = output.shape @@ -447,7 +463,7 @@ def test_iree_vulkan_vs_cpu(self, yolov8_path, yolo_input): total_elements = output_vulkan.size nan_count = np.sum(np.isnan(output_vulkan)) nan_ratio = nan_count / total_elements - assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)" + assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)" # Compare non-NaN values using correlation valid_mask = ~np.isnan(output_vulkan) @@ -461,6 +477,7 @@ def test_iree_vulkan_vs_cpu(self, yolov8_path, yolo_input): # Stress Tests # ============================================================================= + class TestYOLOv8Stress: """Stress tests for YOLOv8.""" @@ -500,7 +517,7 @@ def test_repeated_inference_vulkan(self, yolov8_path, yolo_input): nan_count = np.sum(np.isnan(output)) nan_ratio = nan_count / total_elements assert nan_ratio <= max_nan_ratio, ( - f"Run {i} has too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)" + f"Run {i} has too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)" ) assert not np.any(np.isinf(output)), f"Run {i} contains Inf" @@ -570,8 +587,12 @@ def test_different_random_inputs(self, yolov8_path): try: model = pi.load(model_path, backend=backend, device=device_info.name) bench = model.benchmark(input_data, warmup=5, iterations=20) - print(f"{device_info.name:15} {model.backend_name:25} {bench['mean_ms']:8.2f}ms {bench['fps']:8.1f} FPS") - results.append((device_info.name, model.backend_name, bench['mean_ms'], bench['fps'])) + print( + f"{device_info.name:15} {model.backend_name:25} {bench['mean_ms']:8.2f}ms {bench['fps']:8.1f} FPS" + ) + results.append( + (device_info.name, model.backend_name, bench["mean_ms"], bench["fps"]) + ) except Exception as e: print(f"{device_info.name:15} {backend:25} ERROR: {str(e)[:40]}") @@ -580,5 +601,5 @@ def test_different_random_inputs(self, yolov8_path): print("Summary (sorted by speed)") print("=" * 60) results.sort(key=lambda x: x[2]) - for device, backend, ms, fps in results: + for _device, backend, ms, fps in results: print(f"{backend:30} {ms:8.2f}ms {fps:8.1f} FPS") From 303c9b2aaac30e4663e91840036459b36dd6de01 Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:09:28 -0500 Subject: [PATCH 3/8] Fix mypy issues --- src/polyinfer/_logging.py | 6 +-- src/polyinfer/backends/_autoload.py | 6 +-- src/polyinfer/backends/base.py | 6 +-- src/polyinfer/backends/iree/backend.py | 32 +++++------ src/polyinfer/backends/onnxruntime/backend.py | 7 +-- src/polyinfer/backends/openvino/backend.py | 11 ++-- src/polyinfer/backends/tensorrt/backend.py | 18 +++---- src/polyinfer/compare.py | 6 +-- src/polyinfer/discovery.py | 53 +++++++++++-------- src/polyinfer/nvidia_setup.py | 12 ++--- src/polyinfer/quantization.py | 32 +++++++---- 11 files changed, 106 insertions(+), 83 deletions(-) diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py index d6c99ac..5ff28b7 100644 --- a/src/polyinfer/_logging.py +++ b/src/polyinfer/_logging.py @@ -34,7 +34,7 @@ _logger.setLevel(logging.WARNING) # Create console handler with formatting -_handler = logging.StreamHandler(sys.stderr) +_handler: logging.Handler = logging.StreamHandler(sys.stderr) _handler.setLevel(logging.DEBUG) # Handler passes everything, logger filters # Format: [LEVEL] polyinfer.module: message @@ -154,7 +154,7 @@ def configure_logging( level: str | int = "WARNING", format: str = "[%(levelname)s] %(name)s: %(message)s", stream=None, - filename: str = None, + filename: str | None = None, ) -> None: """Configure polyinfer logging with custom settings. @@ -237,7 +237,7 @@ def _log_backend_init(name: str, version: str, devices: list): logger.debug(f" Supported devices: {devices}") -def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float = None): +def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None): """Log inference operation.""" logger = get_logger("inference") if time_ms is not None: diff --git a/src/polyinfer/backends/_autoload.py b/src/polyinfer/backends/_autoload.py index fce79b4..d57fedf 100644 --- a/src/polyinfer/backends/_autoload.py +++ b/src/polyinfer/backends/_autoload.py @@ -227,7 +227,7 @@ def supported_devices(self) -> list[str]: def version(self) -> str: try: self._ensure_loaded() - return self._real_backend.version + return str(self._real_backend.version) except Exception: return "not loaded" @@ -321,7 +321,7 @@ def supported_devices(self) -> list[str]: def version(self) -> str: try: self._ensure_loaded() - return self._real_backend.version + return str(self._real_backend.version) except Exception: return "not loaded" @@ -354,5 +354,5 @@ def load(self, model_path: str, device: str = "cpu", **kwargs): # TODO: Narrow exception suppression to specific types once register_backend() # error conditions are documented. - with contextlib.supress(Exception): + with contextlib.suppress(Exception): register_backend("onnxruntime", LazyONNXRuntimeBackend) diff --git a/src/polyinfer/backends/base.py b/src/polyinfer/backends/base.py index a5ca904..02af84e 100644 --- a/src/polyinfer/backends/base.py +++ b/src/polyinfer/backends/base.py @@ -96,14 +96,14 @@ def benchmark( self(*inputs) # Benchmark - times = [] + times_list: list[float] = [] for _ in range(iterations): start = time.perf_counter() self(*inputs) elapsed = (time.perf_counter() - start) * 1000 # ms - times.append(elapsed) + times_list.append(elapsed) - times = np.array(times) + times = np.array(times_list) return { "backend": self.backend_name, "device": self.device, diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py index 09ed69b..a25b4b6 100644 --- a/src/polyinfer/backends/iree/backend.py +++ b/src/polyinfer/backends/iree/backend.py @@ -197,6 +197,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: inputs = tuple(np.ascontiguousarray(inp, dtype=np.float32) for inp in inputs) # Run inference + if self._func is None: + raise RuntimeError("Model function not initialized") outputs = self._func(*inputs) # Convert outputs to numpy @@ -255,7 +257,7 @@ def is_available(self) -> bool: return False # Need compiler tools or CLI tools as fallback - return IREE_COMPILER_AVAILABLE or (_get_iree_import_onnx() and _get_iree_compile()) + return IREE_COMPILER_AVAILABLE or bool(_get_iree_import_onnx() and _get_iree_compile()) def load( self, @@ -284,13 +286,13 @@ def load( _logger.debug(f"Loading model: {model_path}") - model_path = Path(model_path) + model_path_obj = Path(model_path) device_type = device.split(":")[0] if ":" in device else device # Determine paths target = DEVICE_TO_TARGET.get(device_type, "llvm-cpu") cache_dir = Path(kwargs.get("cache_dir", ".")) - vmfb_path = cache_dir / f"{model_path.stem}_{target}.vmfb" + vmfb_path = cache_dir / f"{model_path_obj.stem}_{target}.vmfb" _logger.debug(f"Target: {target}, cache path: {vmfb_path}") @@ -304,9 +306,9 @@ def load( if not IREE_COMPILER_AVAILABLE: # Try using CLI tools _logger.debug("Using CLI tools for compilation") - vmfb_path = self._compile_with_cli(model_path, target, vmfb_path, **kwargs) + vmfb_path = self._compile_with_cli(model_path_obj, target, vmfb_path, **kwargs) else: - vmfb_path = self._compile_with_api(model_path, target, vmfb_path, **kwargs) + vmfb_path = self._compile_with_api(model_path_obj, target, vmfb_path, **kwargs) _logger.info(f"Compilation complete: {vmfb_path}") return self._load_vmfb(vmfb_path, device) @@ -346,15 +348,15 @@ def emit_mlir( module @model { func.func @main_graph(... """ - model_path = Path(model_path) + model_path_obj = Path(model_path) - if not model_path.exists(): - raise FileNotFoundError(f"Model not found: {model_path}") + if not model_path_obj.exists(): + raise FileNotFoundError(f"Model not found: {model_path_obj}") # Determine output path - output_path = model_path.with_suffix(".mlir") if output_path is None else Path(output_path) + output_path_obj = model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path) - output_path.parent.mkdir(parents=True, exist_ok=True) + output_path_obj.parent.mkdir(parents=True, exist_ok=True) # Find IREE import tool iree_import = _get_iree_import_onnx() @@ -366,10 +368,10 @@ def emit_mlir( ) # Convert ONNX to MLIR - _logger.debug(f"Converting ONNX to MLIR: {model_path} -> {output_path}") + _logger.debug(f"Converting ONNX to MLIR: {model_path_obj} -> {output_path_obj}") try: subprocess.run( - [iree_import, str(model_path), "-o", str(output_path)], + [iree_import, str(model_path_obj), "-o", str(output_path_obj)], check=True, capture_output=True, text=True, @@ -383,12 +385,12 @@ def emit_mlir( # Load content if requested content = None if load_content: - content = output_path.read_text() + content = output_path_obj.read_text() return MLIROutput( - path=output_path, + path=output_path_obj, content=content, - source_model=model_path, + source_model=model_path_obj, dialect="iree", ) diff --git a/src/polyinfer/backends/onnxruntime/backend.py b/src/polyinfer/backends/onnxruntime/backend.py index a0c89ed..6ba7eec 100644 --- a/src/polyinfer/backends/onnxruntime/backend.py +++ b/src/polyinfer/backends/onnxruntime/backend.py @@ -87,7 +87,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: outputs = self._session.run(None, input_dict) if len(outputs) == 1: - return outputs[0] + result: np.ndarray = outputs[0] + return result return tuple(outputs) def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: @@ -188,7 +189,7 @@ def supported_devices(self) -> list[str]: @property def version(self) -> str: if ONNXRUNTIME_AVAILABLE: - return ort.__version__ + return str(ort.__version__) return "not installed" @property @@ -203,7 +204,7 @@ def get_available_providers(self) -> list[str]: """Get list of available execution providers.""" if not ONNXRUNTIME_AVAILABLE: return [] - return ort.get_available_providers() + return list(ort.get_available_providers()) def load( self, diff --git a/src/polyinfer/backends/openvino/backend.py b/src/polyinfer/backends/openvino/backend.py index c0300bc..d88f463 100644 --- a/src/polyinfer/backends/openvino/backend.py +++ b/src/polyinfer/backends/openvino/backend.py @@ -80,11 +80,11 @@ def output_names(self) -> list[str]: @property def input_shapes(self) -> list[tuple]: - return self._input_shapes + return [tuple(s) for s in self._input_shapes] @property def output_shapes(self) -> list[tuple]: - return self._output_shapes + return [tuple(s) for s in self._output_shapes] def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: """Run inference.""" @@ -103,7 +103,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: outputs.append(output_tensor.data.copy()) if len(outputs) == 1: - return outputs[0] + result: np.ndarray = outputs[0] + return result return tuple(outputs) def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: @@ -167,7 +168,7 @@ def supported_devices(self) -> list[str]: @property def version(self) -> str: if OPENVINO_AVAILABLE: - return ov.__version__ + return str(ov.__version__) return "not installed" @property @@ -182,7 +183,7 @@ def get_available_devices(self) -> list[str]: """Get raw OpenVINO device names.""" if not OPENVINO_AVAILABLE: return [] - return self.core.available_devices + return list(self.core.available_devices) def load( self, diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py index 65a7f89..fd836be 100644 --- a/src/polyinfer/backends/tensorrt/backend.py +++ b/src/polyinfer/backends/tensorrt/backend.py @@ -76,10 +76,10 @@ def __init__( # For static shapes, pre-allocate GPU buffers # For dynamic shapes, allocate lazily on first inference - self._d_inputs = {} - self._d_outputs = {} - self._h_outputs = {} - self._allocated_shapes = {} # Track allocated buffer shapes + self._d_inputs: dict[str, int] = {} + self._d_outputs: dict[str, int] = {} + self._h_outputs: dict[str, np.ndarray] = {} + self._allocated_shapes: dict[str, tuple[int, ...]] = {} # Track allocated buffer shapes if not self._has_dynamic_shapes: self._allocate_buffers() @@ -108,7 +108,7 @@ def input_shapes(self) -> list[tuple]: def output_shapes(self) -> list[tuple]: return self._output_shapes - def _allocate_buffers(self, input_shapes: dict[str, tuple] = None): + def _allocate_buffers(self, input_shapes: dict[str, tuple] | None = None): """Allocate GPU buffers for inputs and outputs. For dynamic shapes, input_shapes must be provided to determine output shapes. @@ -250,7 +250,7 @@ def supported_devices(self) -> list[str]: @property def version(self) -> str: if TENSORRT_AVAILABLE: - return trt.__version__ + return str(trt.__version__) return "not installed" @property @@ -337,9 +337,9 @@ def load( _logger.debug(f"Using CUDA device: {device_id}") # Check for cached engine - model_path = Path(model_path) + model_path_obj = Path(model_path) cache_path = kwargs.get("cache_path") - cache_path = model_path.with_suffix(".engine") if cache_path is None else Path(cache_path) + cache_path = model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path) # Try to load cached engine (unless force_rebuild) if cache_path.exists() and not kwargs.get("force_rebuild", False): @@ -348,7 +348,7 @@ def load( # Build engine from ONNX with full options _logger.info("Building TensorRT engine from ONNX (this may take a while)...") - engine = self._build_engine(model_path, **kwargs) + engine = self._build_engine(model_path_obj, **kwargs) # Cache the engine _logger.debug(f"Saving engine to: {cache_path}") diff --git a/src/polyinfer/compare.py b/src/polyinfer/compare.py index 28186b9..1eafc88 100644 --- a/src/polyinfer/compare.py +++ b/src/polyinfer/compare.py @@ -44,14 +44,14 @@ def benchmark( model(*inputs) # Benchmark - times = [] + times_list: list[float] = [] for _ in range(iterations): start = time.perf_counter() model(*inputs) elapsed = (time.perf_counter() - start) * 1000 - times.append(elapsed) + times_list.append(elapsed) - times = np.array(times) + times = np.array(times_list) return { "backend": backend, "device": device, diff --git a/src/polyinfer/discovery.py b/src/polyinfer/discovery.py index a6ff539..1a600fe 100644 --- a/src/polyinfer/discovery.py +++ b/src/polyinfer/discovery.py @@ -90,7 +90,7 @@ def list_devices() -> list[DeviceInfo]: cpu (cpu) - backends: [onnxruntime, openvino] cuda:0 (cuda) - backends: [onnxruntime] """ - devices = {} + devices: dict[str, dict[str, list[str] | str]] = {} # Collect devices from all backends for backend_name in _list_backends(available_only=True): @@ -102,18 +102,22 @@ def list_devices() -> list[DeviceInfo]: "backends": [], "type": device.split(":")[0] if ":" in device else device, } - devices[device]["backends"].append(backend_name) + backends_list = devices[device]["backends"] + if isinstance(backends_list, list): + backends_list.append(backend_name) except Exception: continue # Build DeviceInfo list - result = [] + result: list[DeviceInfo] = [] for name, info in sorted(devices.items()): + device_type = info["type"] + backends = info["backends"] result.append( DeviceInfo( name=name, - device_type=info["type"], - backends=info["backends"], + device_type=str(device_type), + backends=list(backends) if isinstance(backends, list) else [backends], ) ) @@ -178,29 +182,34 @@ def system_info() -> dict: } # Backend info + backends_dict = info["backends"] for name in _list_backends(available_only=False): try: backend = _get_backend(name) - info["backends"][name] = { - "available": backend.is_available(), - "version": backend.version, - "devices": backend.supported_devices, - "priority": backend.priority, - } + if isinstance(backends_dict, dict): + backends_dict[name] = { + "available": backend.is_available(), + "version": backend.version, + "devices": backend.supported_devices, + "priority": backend.priority, + } except Exception as e: - info["backends"][name] = { - "available": False, - "error": str(e), - } + if isinstance(backends_dict, dict): + backends_dict[name] = { + "available": False, + "error": str(e), + } # Device info + devices_list = info["devices"] for device in list_devices(): - info["devices"].append( - { - "name": device.name, - "type": device.device_type, - "backends": device.backends, - } - ) + if isinstance(devices_list, list): + devices_list.append( + { + "name": device.name, + "type": device.device_type, + "backends": device.backends, + } + ) return info diff --git a/src/polyinfer/nvidia_setup.py b/src/polyinfer/nvidia_setup.py index cc7bc90..e603556 100644 --- a/src/polyinfer/nvidia_setup.py +++ b/src/polyinfer/nvidia_setup.py @@ -439,7 +439,7 @@ def get_nvidia_info() -> dict: Dictionary with information about found NVIDIA packages and libraries. """ site_packages = _get_site_packages() - info = { + info: dict = { "site_packages": str(site_packages), "library_directories": [], "libraries": {}, @@ -502,17 +502,17 @@ def fix_onnxruntime_conflict(prefer: str = "cuda") -> bool: import subprocess try: - import importlib.metadata as metadata + import importlib.metadata as pkg_metadata except ImportError: - import importlib_metadata as metadata + import importlib_metadata as pkg_metadata # type: ignore # Check which variants are installed - installed = [] + installed: list[str] = [] for pkg in ["onnxruntime", "onnxruntime-gpu", "onnxruntime-directml"]: try: - metadata.version(pkg) + pkg_metadata.version(pkg) installed.append(pkg) - except metadata.PackageNotFoundError: + except pkg_metadata.PackageNotFoundError: pass if len(installed) <= 1: diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py index c57fb78..dca5128 100644 --- a/src/polyinfer/quantization.py +++ b/src/polyinfer/quantization.py @@ -268,6 +268,8 @@ def _quantize_onnxruntime( ort_calib_method = calib_method_map.get(config.calibration_method, ORTCalibMethod.MinMax) # Create calibration data reader + if calibration_data is None: + raise ValueError("calibration_data is required for static quantization") data_reader = _create_ort_calibration_reader(model_input, calibration_data, num_samples) quantize_static( @@ -368,6 +370,9 @@ def get_next(self) -> dict[str, np.ndarray] | None: if self._count >= self.num_samples: return None + if self._data_iter is None: + return None + try: batch = next(self._data_iter) self._count += 1 @@ -375,7 +380,9 @@ def get_next(self) -> dict[str, np.ndarray] | None: # Handle single array input if isinstance(batch, np.ndarray): return {self._input_names[0]: batch} - return batch + # Ensure we return the correct type + result: dict[str, np.ndarray] = batch + return result except StopIteration: return None @@ -480,40 +487,43 @@ def _quantize_openvino( def _create_nncf_dataset(model, data: CalibrationData, num_samples: int): """Create NNCF Dataset from calibration data.""" + from typing import Any + import nncf # Get input names input_names = [inp.any_name for inp in model.inputs] - # Normalize data + # Normalize data to a list + normalized_data: Any = data if callable(data) and not isinstance(data, (list, Iterator)): - data = data() + normalized_data = data() - if isinstance(data, list) and len(data) > 0 and isinstance(data[0], np.ndarray): + if isinstance(normalized_data, list) and len(normalized_data) > 0 and isinstance(normalized_data[0], np.ndarray): # List of arrays if len(input_names) != 1: raise ValueError( f"Model has {len(input_names)} inputs, but calibration data is a list of arrays." ) - data = [{input_names[0]: arr} for arr in data] + normalized_data = [{input_names[0]: arr} for arr in normalized_data] # Convert to list if iterator - if not isinstance(data, list): - data = list(data) + if not isinstance(normalized_data, list): + normalized_data = list(normalized_data) # Limit samples - data = data[:num_samples] + data_list: list[Any] = normalized_data[:num_samples] # Transform function for NNCF - def transform_fn(data_item): + def transform_fn(data_item: Any) -> tuple[Any, ...]: if isinstance(data_item, dict): # Return as tuple of arrays in input order return tuple(data_item[name] for name in input_names) elif isinstance(data_item, np.ndarray): return (data_item,) - return data_item + return (data_item,) - return nncf.Dataset(data, transform_fn) + return nncf.Dataset(data_list, transform_fn) def quantize_for_tensorrt( From 830cd2e688bb29991251df52895cf661479f880a Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:11:18 -0500 Subject: [PATCH 4/8] Format for satisfying ruff --- src/polyinfer/_logging.py | 4 +++- src/polyinfer/backends/iree/backend.py | 4 +++- src/polyinfer/backends/tensorrt/backend.py | 4 +++- src/polyinfer/quantization.py | 6 +++++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py index 5ff28b7..d10bc50 100644 --- a/src/polyinfer/_logging.py +++ b/src/polyinfer/_logging.py @@ -237,7 +237,9 @@ def _log_backend_init(name: str, version: str, devices: list): logger.debug(f" Supported devices: {devices}") -def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None): +def _log_inference( + backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None +): """Log inference operation.""" logger = get_logger("inference") if time_ms is not None: diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py index a25b4b6..79b70c4 100644 --- a/src/polyinfer/backends/iree/backend.py +++ b/src/polyinfer/backends/iree/backend.py @@ -354,7 +354,9 @@ def emit_mlir( raise FileNotFoundError(f"Model not found: {model_path_obj}") # Determine output path - output_path_obj = model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path) + output_path_obj = ( + model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path) + ) output_path_obj.parent.mkdir(parents=True, exist_ok=True) diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py index fd836be..16a59b8 100644 --- a/src/polyinfer/backends/tensorrt/backend.py +++ b/src/polyinfer/backends/tensorrt/backend.py @@ -339,7 +339,9 @@ def load( # Check for cached engine model_path_obj = Path(model_path) cache_path = kwargs.get("cache_path") - cache_path = model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path) + cache_path = ( + model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path) + ) # Try to load cached engine (unless force_rebuild) if cache_path.exists() and not kwargs.get("force_rebuild", False): diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py index dca5128..881a74d 100644 --- a/src/polyinfer/quantization.py +++ b/src/polyinfer/quantization.py @@ -499,7 +499,11 @@ def _create_nncf_dataset(model, data: CalibrationData, num_samples: int): if callable(data) and not isinstance(data, (list, Iterator)): normalized_data = data() - if isinstance(normalized_data, list) and len(normalized_data) > 0 and isinstance(normalized_data[0], np.ndarray): + if ( + isinstance(normalized_data, list) + and len(normalized_data) > 0 + and isinstance(normalized_data[0], np.ndarray) + ): # List of arrays if len(input_names) != 1: raise ValueError( From 4caa6e966799fbfe0589cc22d81e21c535673a44 Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:14:58 -0500 Subject: [PATCH 5/8] Fix mypy --- src/polyinfer/backends/tensorrt/backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py index 16a59b8..b2e37bb 100644 --- a/src/polyinfer/backends/tensorrt/backend.py +++ b/src/polyinfer/backends/tensorrt/backend.py @@ -211,7 +211,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]: cudart.cudaStreamSynchronize(self._stream) if len(outputs) == 1: - return outputs[0] + result: np.ndarray = outputs[0] + return result return tuple(outputs) def __del__(self): From 99763d24fe8f5a3c05a8c626d8954c0ef543ea00 Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:20:38 -0500 Subject: [PATCH 6/8] Fix intel devices test for CI --- tests/test_intel_devices.py | 215 +++++++++++++++++++++++------------- 1 file changed, 137 insertions(+), 78 deletions(-) diff --git a/tests/test_intel_devices.py b/tests/test_intel_devices.py index 5510b50..186e06a 100644 --- a/tests/test_intel_devices.py +++ b/tests/test_intel_devices.py @@ -1,88 +1,147 @@ -"""Test polyinfer with Intel devices (CPU, iGPU, NPU).""" +"""Test polyinfer with Intel devices (CPU, iGPU, NPU). + +These tests require Intel hardware and OpenVINO to be properly configured. +They are marked with intel_gpu or npu markers and will be skipped in CI. +""" import os import sys -sys.path.insert(0, "src") - import numpy as np +import pytest + +sys.path.insert(0, "src") import polyinfer as pi -from polyinfer.backends.openvino import OpenVINOBackend - -# Check what's available -print("=" * 60) -print("PolyInfer: Intel Device Test") -print("=" * 60) - -print("\nAvailable backends:", pi.list_backends()) -print("Available devices:", pi.list_devices()) - -# Get OpenVINO backend directly to see raw device names -ov_backend = OpenVINOBackend() -print("\nOpenVINO raw devices:", ov_backend.get_available_devices()) - -# Test model path, use YOLOv8n if available -model_path = None -for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx"]: - if os.path.exists(path): - model_path = path - break - -if model_path is None: - print("\nNo test model found. Downloading yolov8n.onnx...") - try: - from ultralytics import YOLO - - model = YOLO("yolov8n.pt") - model.export(format="onnx") - model_path = "yolov8n.onnx" - except ImportError: - print( - "Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx" - ) - sys.exit(1) - -print(f"\nUsing model: {model_path}") - -# Create test input (YOLOv8n expects 1x3x640x640) -input_data = np.random.rand(1, 3, 640, 640).astype(np.float32) - -# Test each device -devices_to_test = [ - ("cpu", "CPU (Intel Core Ultra 9)"), - ("intel-gpu", "Intel iGPU"), - ("intel-gpu:0", "Intel iGPU (explicit)"), - ("npu", "Intel NPU (AI Boost)"), + +# Check if OpenVINO is available +try: + from polyinfer.backends.openvino import OpenVINOBackend + + OPENVINO_AVAILABLE = True + ov_backend = OpenVINOBackend() + AVAILABLE_DEVICES = ov_backend.get_available_devices() +except ImportError: + OPENVINO_AVAILABLE = False + AVAILABLE_DEVICES = [] + + +def _get_test_model(): + """Get a test model path, or None if not available.""" + for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx", "tests/yolov8n.onnx"]: + if os.path.exists(path): + return path + return None + + +# Skip all tests if no model available or OpenVINO not installed +pytestmark = [ + pytest.mark.skipif(not OPENVINO_AVAILABLE, reason="OpenVINO not available"), ] -print("\n" + "=" * 60) -print("Running benchmarks...") -print("=" * 60) - -results = [] -for device, description in devices_to_test: - try: - print(f"\n[{device}] {description}") - model = pi.load(model_path, backend="openvino", device=device) - print(f" Backend: {model.backend_name}") - - # Benchmark - bench = model.benchmark(input_data, warmup=5, iterations=20) - print(f" Latency: {bench['mean_ms']:.2f} ms ({bench['fps']:.1f} FPS)") - results.append((device, description, bench["mean_ms"], bench["fps"])) - except Exception as e: - print(f" ERROR: {e}") - results.append((device, description, None, None)) - -# Summary -print("\n" + "=" * 60) -print("Summary") -print("=" * 60) -print(f"{'Device':<20} {'Description':<30} {'Latency':>10} {'FPS':>10}") -print("-" * 70) -for device, desc, latency, fps in results: - if latency: - print(f"{device:<20} {desc:<30} {latency:>8.2f}ms {fps:>9.1f}") + +@pytest.fixture +def test_model(): + """Fixture providing a test model path.""" + model_path = _get_test_model() + if model_path is None: + pytest.skip("No test model available (yolov8n.onnx)") + return model_path + + +@pytest.fixture +def test_input(): + """Fixture providing test input data for YOLOv8n (1x3x640x640).""" + return np.random.rand(1, 3, 640, 640).astype(np.float32) + + +class TestIntelCPU: + """Tests for Intel CPU inference.""" + + def test_cpu_inference(self, test_model, test_input): + """Test inference on CPU.""" + model = pi.load(test_model, backend="openvino", device="cpu") + assert model.backend_name == "openvino" + output = model(test_input) + assert output is not None + + def test_cpu_benchmark(self, test_model, test_input): + """Test benchmarking on CPU.""" + model = pi.load(test_model, backend="openvino", device="cpu") + bench = model.benchmark(test_input, warmup=2, iterations=5) + assert "mean_ms" in bench + assert "fps" in bench + assert bench["mean_ms"] > 0 + + +@pytest.mark.intel_gpu +class TestIntelGPU: + """Tests for Intel iGPU inference.""" + + @pytest.fixture(autouse=True) + def check_gpu_available(self): + """Skip if Intel GPU not available.""" + if "GPU" not in AVAILABLE_DEVICES and "GPU.0" not in AVAILABLE_DEVICES: + pytest.skip("Intel GPU not available") + + def test_igpu_inference(self, test_model, test_input): + """Test inference on Intel iGPU.""" + model = pi.load(test_model, backend="openvino", device="intel-gpu") + assert model.backend_name == "openvino" + output = model(test_input) + assert output is not None + + def test_igpu_benchmark(self, test_model, test_input): + """Test benchmarking on Intel iGPU.""" + model = pi.load(test_model, backend="openvino", device="intel-gpu") + bench = model.benchmark(test_input, warmup=2, iterations=5) + assert "mean_ms" in bench + assert "fps" in bench + assert bench["mean_ms"] > 0 + + +@pytest.mark.npu +class TestIntelNPU: + """Tests for Intel NPU (AI Boost) inference.""" + + @pytest.fixture(autouse=True) + def check_npu_available(self): + """Skip if Intel NPU not available.""" + if "NPU" not in AVAILABLE_DEVICES: + pytest.skip("Intel NPU not available") + + def test_npu_inference(self, test_model, test_input): + """Test inference on Intel NPU.""" + model = pi.load(test_model, backend="openvino", device="npu") + assert model.backend_name == "openvino" + output = model(test_input) + assert output is not None + + def test_npu_benchmark(self, test_model, test_input): + """Test benchmarking on Intel NPU.""" + model = pi.load(test_model, backend="openvino", device="npu") + bench = model.benchmark(test_input, warmup=2, iterations=5) + assert "mean_ms" in bench + assert "fps" in bench + assert bench["mean_ms"] > 0 + + +if __name__ == "__main__": + # When run as a script, print device info + print("=" * 60) + print("PolyInfer: Intel Device Test") + print("=" * 60) + + print("\nAvailable backends:", pi.list_backends()) + print("Available devices:", pi.list_devices()) + + if OPENVINO_AVAILABLE: + print("\nOpenVINO raw devices:", AVAILABLE_DEVICES) + else: + print("\nOpenVINO not available") + + model_path = _get_test_model() + if model_path: + print(f"\nTest model found: {model_path}") else: - print(f"{device:<20} {desc:<30} {'FAILED':>10} {'-':>10}") + print("\nNo test model found. Please provide yolov8n.onnx") From dcf385b10cb2bf7e7bd47056f2c503045ced359f Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:30:11 -0500 Subject: [PATCH 7/8] Add onnxruntime dep in dev dependencies --- pyproject.toml | 2 ++ tests/test_backends.py | 9 ++++++++- tests/test_mlir.py | 6 ++++++ tests/test_quantization.py | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 533672c..86d2133 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,6 +151,8 @@ dev = [ "pytest-benchmark>=4.0", "ruff>=0.1", "mypy>=1.0", + # Include onnxruntime so basic tests can run + "onnxruntime>=1.17", ] [project.urls] diff --git a/tests/test_backends.py b/tests/test_backends.py index ecd6db4..fd7c025 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -5,6 +5,10 @@ import polyinfer as pi from polyinfer.backends.registry import get_all_backends, get_backend +# Check if any backend is available +_BACKENDS = pi.list_backends() +_HAS_ANY_BACKEND = len(_BACKENDS) > 0 + class TestBackendDiscovery: """Test backend discovery functionality.""" @@ -14,6 +18,7 @@ def test_list_backends_returns_list(self): backends = pi.list_backends() assert isinstance(backends, list) + @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed") def test_list_backends_not_empty(self): """At least one backend should be available.""" backends = pi.list_backends() @@ -24,8 +29,9 @@ def test_list_devices_returns_list(self): devices = pi.list_devices() assert isinstance(devices, list) + @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed") def test_cpu_always_available(self): - """CPU device should always be available.""" + """CPU device should always be available when backends are installed.""" devices = pi.list_devices() device_names = [d.name for d in devices] assert "cpu" in device_names, "CPU device not found" @@ -128,6 +134,7 @@ def test_backends_have_priority(self): assert isinstance(backend.priority, int) assert backend.priority >= 0 + @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed") def test_select_backend_for_cpu(self): """Auto-selection should work for CPU.""" from polyinfer.discovery import select_backend diff --git a/tests/test_mlir.py b/tests/test_mlir.py index be48f7d..9e11eb0 100644 --- a/tests/test_mlir.py +++ b/tests/test_mlir.py @@ -8,6 +8,12 @@ import polyinfer as pi +# Check if IREE is available +IREE_AVAILABLE = pi.is_available("iree") + +# Skip all tests in this module if IREE is not available +pytestmark = pytest.mark.skipif(not IREE_AVAILABLE, reason="IREE backend not available") + # ============================================================================= # Fixtures # ============================================================================= diff --git a/tests/test_quantization.py b/tests/test_quantization.py index ceb5317..7d78122 100644 --- a/tests/test_quantization.py +++ b/tests/test_quantization.py @@ -7,6 +7,17 @@ import polyinfer as pi +# Check if onnxruntime quantization is available +try: + from onnxruntime.quantization import quantize_dynamic as _ # noqa: F401 + + ONNXRUNTIME_QUANT_AVAILABLE = True +except ImportError: + ONNXRUNTIME_QUANT_AVAILABLE = False + +# Check if any backend can load models +_HAS_ANY_BACKEND = len(pi.list_backends()) > 0 + class TestQuantizationAPI: """Test quantization API availability and basic functionality.""" @@ -46,6 +57,9 @@ def test_calibration_method_values(self): assert pi.CalibrationMethod.PERCENTILE.value == "percentile" +@pytest.mark.skipif( + not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed" +) class TestDynamicQuantization: """Test dynamic quantization (no calibration needed).""" @@ -116,6 +130,7 @@ def test_quantize_function_dynamic(self, simple_model, tmp_path): assert output_path.exists() assert result.method == "dynamic" + @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed") def test_quantized_model_loads(self, simple_model, tmp_path): """Test that quantized model can be loaded and run.""" output_path = tmp_path / "model_int8.onnx" @@ -131,6 +146,9 @@ def test_quantized_model_loads(self, simple_model, tmp_path): assert output.shape == (1, 3, 32, 32) +@pytest.mark.skipif( + not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed" +) class TestStaticQuantization: """Test static quantization (requires calibration).""" @@ -304,6 +322,9 @@ def test_fp16_model_runs(self, simple_model, tmp_path): assert output is not None +@pytest.mark.skipif( + not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed" +) class TestQuantizationResult: """Test QuantizationResult dataclass.""" From b06707ba24de0c3f4d258227146dae3c18232e1f Mon Sep 17 00:00:00 2001 From: Athrva Date: Sat, 20 Dec 2025 14:33:33 -0500 Subject: [PATCH 8/8] Fix opset version in test --- tests/test_backend_options.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_backend_options.py b/tests/test_backend_options.py index 80c6867..fb445a1 100644 --- a/tests/test_backend_options.py +++ b/tests/test_backend_options.py @@ -40,7 +40,8 @@ def dummy_onnx_model(tmp_path): [const_tensor], ) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = 8 # Use IR version 8 for broader compatibility model_path = tmp_path / "test_model.onnx" onnx.save(model, str(model_path))