From 58a34f15760dfee844ea7007a2bf043ab6d8d47d Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 11:40:19 -0500
Subject: [PATCH 1/8] Add GitHub Actions CI/CD pipeline

---
 .github/workflows/ci.yml      | 231 ++++++++++++++++++++++++++++++++++
 .github/workflows/release.yml | 104 +++++++++++++++
 2 files changed, 335 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..cbf7a94
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,231 @@
+# GitHub Actions CI/CD Pipeline for PolyInfer
+# Cross-platform testing on Windows, Linux, and macOS
+# Uses FREE GitHub Actions plan (GitHub-hosted runners, CPU-only)
+
+name: CI
+
+on:
+  push:
+    branches: [master, main]
+  pull_request:
+    branches: [master, main]
+  workflow_dispatch:  # Allow manual trigger
+
+# Cancel in-progress runs for the same branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  PYTHONDONTWRITEBYTECODE: 1
+  PYTHONUNBUFFERED: 1
+
+jobs:
+  # ============================================
+  # Code Quality Checks (Fast, runs first)
+  # ============================================
+  lint:
+    name: Lint & Type Check
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ruff mypy
+
+      - name: Run ruff linter
+        run: ruff check src/ tests/
+
+      - name: Run ruff formatter check
+        run: ruff format --check src/ tests/
+
+      - name: Install package for type checking
+        run: pip install -e .
+
+      - name: Run mypy type checker
+        run: mypy src/polyinfer/
+
+  # ============================================
+  # Cross-Platform Tests (CPU-only)
+  # ============================================
+  test:
+    name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
+    needs: lint  # Only run tests if linting passes
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+        include:
+          # Only test one Python version on macOS to save minutes (10x cost)
+          - os: macos-latest
+            python-version: "3.11"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Install package with dev dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: List available backends
+        run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())"
+
+      - name: Run CPU-only tests
+        run: |
+          pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)"
+
+  # ============================================
+  # Backend-Specific Tests (Optional CPU backends)
+  # ============================================
+  test-backends:
+    name: Test - ${{ matrix.backend }} Backend
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    needs: lint
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - backend: openvino
+            install: ".[openvino,dev]"
+            marker: "openvino"
+          - backend: iree
+            install: ".[iree,dev]"
+            marker: "iree"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install package with ${{ matrix.backend }} backend
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e "${{ matrix.install }}"
+
+      - name: List available backends
+        run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())"
+
+      - name: Run ${{ matrix.backend }} tests
+        run: |
+          pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)"
+        continue-on-error: true  # Backend tests are optional on CPU runners
+
+  # ============================================
+  # Build & Package Verification
+  # ============================================
+  build:
+    name: Build Package
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    needs: lint
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build twine
+
+      - name: Build package
+        run: python -m build
+
+      - name: Check package with twine
+        run: twine check dist/*
+
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+          retention-days: 7
+
+  # ============================================
+  # Windows DirectML Test (Windows-specific)
+  # ============================================
+  test-directml:
+    name: Test - DirectML (Windows)
+    runs-on: windows-latest
+    timeout-minutes: 15
+    needs: lint
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install package with DirectML backend
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[amd,dev]"
+
+      - name: List available backends
+        run: python -c "import polyinfer as pi; print('Available backends:', pi.list_backends()); print('Available devices:', pi.list_devices())"
+
+      - name: Run DirectML-compatible tests
+        run: |
+          pytest tests/ -v --tb=short -m "not (cuda or tensorrt or intel_gpu or npu or vulkan or benchmark or slow)"
+        continue-on-error: true  # DirectML may not work on GitHub runners without GPU
+
+  # ============================================
+  # Summary Job (Required status check)
+  # ============================================
+  ci-success:
+    name: CI Success
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    needs: [lint, test, build]
+    if: always()
+    steps:
+      - name: Check all jobs passed
+        run: |
+          if [[ "${{ needs.lint.result }}" != "success" ]]; then
+            echo "Lint job failed"
+            exit 1
+          fi
+          if [[ "${{ needs.test.result }}" != "success" ]]; then
+            echo "Test job failed"
+            exit 1
+          fi
+          if [[ "${{ needs.build.result }}" != "success" ]]; then
+            echo "Build job failed"
+            exit 1
+          fi
+          echo "All required jobs passed!"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..76d5013
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,104 @@
+# GitHub Actions Release Pipeline for PolyInfer
+# Publishes to PyPI when a GitHub release is created
+# Uses trusted publishing (OIDC) - no API tokens needed
+
+name: Release
+
+on:
+  release:
+    types: [published]
+
+env:
+  PYTHONDONTWRITEBYTECODE: 1
+  PYTHONUNBUFFERED: 1
+
+jobs:
+  # ============================================
+  # Run Tests Before Publishing
+  # ============================================
+  test:
+    name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
+    strategy:
+      fail-fast: true  # Fail fast for releases
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ["3.11"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Install package with dev dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Run CPU-only tests
+        run: |
+          pytest tests/ -v --tb=short -m "not (cuda or tensorrt or directml or intel_gpu or npu or vulkan or benchmark or slow)"
+
+  # ============================================
+  # Build Package
+  # ============================================
+  build:
+    name: Build Package
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    needs: test
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build twine
+
+      - name: Build package
+        run: python -m build
+
+      - name: Check package with twine
+        run: twine check dist/*
+
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+  # ============================================
+  # Publish to PyPI
+  # ============================================
+  publish:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    needs: build
+    environment: pypi  # Must match the environment name in PyPI trusted publishing
+    permissions:
+      id-token: write  # Required for trusted publishing
+
+    steps:
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        # No API token needed - uses OIDC trusted publishing

From 68a5add198fb123b0c9531c5b9bf43018fb81de4 Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 13:35:54 -0500
Subject: [PATCH 2/8] Fix linting issues

---
 src/polyinfer/__init__.py                     |  51 ++++----
 src/polyinfer/_logging.py                     |   9 +-
 src/polyinfer/backends/__init__.py            |   6 +-
 src/polyinfer/backends/_autoload.py           |  25 ++--
 src/polyinfer/backends/base.py                |  13 +-
 src/polyinfer/backends/iree/__init__.py       |   4 +-
 src/polyinfer/backends/iree/backend.py        |  59 ++++-----
 src/polyinfer/backends/onnxruntime/backend.py |  48 +++++---
 src/polyinfer/backends/openvino/backend.py    |  21 ++--
 src/polyinfer/backends/registry.py            |  14 +--
 src/polyinfer/backends/tensorrt/backend.py    |  41 +++----
 src/polyinfer/cli.py                          |  21 +++-
 src/polyinfer/compare.py                      |  12 +-
 src/polyinfer/config.py                       |   2 +-
 src/polyinfer/discovery.py                    |  20 +--
 src/polyinfer/mlir.py                         |   6 +-
 src/polyinfer/model.py                        |  13 +-
 src/polyinfer/nvidia_setup.py                 |  15 ++-
 src/polyinfer/quantization.py                 | 114 +++++++++---------
 tests/conftest.py                             |  30 ++---
 tests/test_backend_options.py                 |  21 ++--
 tests/test_backends.py                        |   3 +-
 tests/test_benchmark.py                       |  49 ++++----
 tests/test_devices.py                         |   3 +-
 tests/test_inference.py                       |   3 +-
 tests/test_intel_devices.py                   |  14 ++-
 tests/test_logging.py                         |  13 +-
 tests/test_mlir.py                            |  28 +++--
 tests/test_quantization.py                    |  86 +++++--------
 tests/test_yolov8.py                          | 103 +++++++++-------
 30 files changed, 437 insertions(+), 410 deletions(-)

diff --git a/src/polyinfer/__init__.py b/src/polyinfer/__init__.py
index 0104c0f..a5f728d 100644
--- a/src/polyinfer/__init__.py
+++ b/src/polyinfer/__init__.py
@@ -28,40 +28,39 @@
 
 # Auto-setup NVIDIA libraries BEFORE importing anything else
 # This ensures CUDA, cuDNN, TensorRT DLLs are findable
-from polyinfer import nvidia_setup as _nvidia_setup
-from polyinfer.nvidia_setup import fix_onnxruntime_conflict, get_nvidia_info, setup_tensorrt_paths
-
-from polyinfer.model import load, Model
+from polyinfer import nvidia_setup as _nvidia_setup  # noqa: F401
+from polyinfer._logging import (
+    LogContext,
+    configure_logging,
+    disable_logging,
+    enable_logging,
+    get_log_level,
+    get_log_level_name,
+    get_logger,
+    set_log_level,
+)
+from polyinfer.compare import benchmark, compare
+from polyinfer.config import InferenceConfig
 from polyinfer.discovery import (
-    list_backends,
-    list_devices,
     get_backend,
     is_available,
+    list_backends,
+    list_devices,
 )
-from polyinfer.config import InferenceConfig
-from polyinfer.compare import compare, benchmark
-from polyinfer.mlir import export_mlir, compile_mlir, MLIROutput
+from polyinfer.mlir import MLIROutput, compile_mlir, export_mlir
+from polyinfer.model import Model, load
+from polyinfer.nvidia_setup import fix_onnxruntime_conflict, get_nvidia_info, setup_tensorrt_paths
 from polyinfer.quantization import (
-    quantize,
-    quantize_dynamic,
-    quantize_static,
-    convert_to_fp16,
-    quantize_for_tensorrt,
-    QuantizationResult,
+    CalibrationMethod,
     QuantizationConfig,
     QuantizationMethod,
+    QuantizationResult,
     QuantizationType,
-    CalibrationMethod,
-)
-from polyinfer._logging import (
-    get_logger,
-    set_log_level,
-    get_log_level,
-    get_log_level_name,
-    enable_logging,
-    disable_logging,
-    configure_logging,
-    LogContext,
+    convert_to_fp16,
+    quantize,
+    quantize_dynamic,
+    quantize_for_tensorrt,
+    quantize_static,
 )
 
 __all__ = [
diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py
index 4fdbd24..d6c99ac 100644
--- a/src/polyinfer/_logging.py
+++ b/src/polyinfer/_logging.py
@@ -26,7 +26,6 @@
 
 import logging
 import sys
-from typing import Union
 
 # Create the polyinfer logger hierarchy
 _logger = logging.getLogger("polyinfer")
@@ -85,7 +84,7 @@ def get_logger(name: str = "") -> logging.Logger:
     return _logger
 
 
-def set_log_level(level: Union[str, int]) -> None:
+def set_log_level(level: str | int) -> None:
     """Set the global polyinfer log level.
 
     Args:
@@ -128,7 +127,7 @@ def get_log_level_name() -> str:
     return logging.getLevelName(level)
 
 
-def enable_logging(level: Union[str, int] = "INFO") -> None:
+def enable_logging(level: str | int = "INFO") -> None:
     """Enable logging with specified level.
 
     Convenience function to quickly enable verbose logging.
@@ -152,7 +151,7 @@ def disable_logging() -> None:
 
 
 def configure_logging(
-    level: Union[str, int] = "WARNING",
+    level: str | int = "WARNING",
     format: str = "[%(levelname)s] %(name)s: %(message)s",
     stream=None,
     filename: str = None,
@@ -205,7 +204,7 @@ class LogContext:
         >>> # Back to previous level
     """
 
-    def __init__(self, level: Union[str, int]):
+    def __init__(self, level: str | int):
         self.new_level = level
         self.old_level = None
 
diff --git a/src/polyinfer/backends/__init__.py b/src/polyinfer/backends/__init__.py
index afbd419..1ffd8b5 100644
--- a/src/polyinfer/backends/__init__.py
+++ b/src/polyinfer/backends/__init__.py
@@ -2,11 +2,11 @@
 
 from polyinfer.backends.base import Backend, CompiledModel
 from polyinfer.backends.registry import (
-    register_backend,
+    BackendInfo,
     get_backend,
-    list_backends,
     get_backends_for_device,
-    BackendInfo,
+    list_backends,
+    register_backend,
 )
 
 __all__ = [
diff --git a/src/polyinfer/backends/_autoload.py b/src/polyinfer/backends/_autoload.py
index 488874d..fce79b4 100644
--- a/src/polyinfer/backends/_autoload.py
+++ b/src/polyinfer/backends/_autoload.py
@@ -1,7 +1,9 @@
 """Auto-load and register available backends."""
 
-import sys
+import contextlib
 import logging
+import sys
+
 from polyinfer.backends.registry import register_backend
 
 # Use logging module directly to avoid circular imports
@@ -69,6 +71,7 @@ def _should_use_lazy_onnxruntime() -> bool:
     # Check if onnxruntime-gpu is installed (vs plain onnxruntime)
     try:
         import importlib.metadata as metadata
+
         metadata.version("onnxruntime-gpu")
         return True  # onnxruntime-gpu installed, use lazy loading
     except Exception:
@@ -202,13 +205,14 @@ def _ensure_loaded(cls):
             cls._import_attempted = True
             try:
                 from polyinfer.backends.iree.backend import IREEBackend
+
                 cls._real_backend = IREEBackend()
             except ImportError as e:
                 cls._import_error = RuntimeError(
                     f"IREE not available: {e}. "
                     "Install with: pip install iree-base-runtime iree-base-compiler"
                 )
-                raise cls._import_error
+                raise cls._import_error from e
 
         @property
         def name(self) -> str:
@@ -234,6 +238,7 @@ def priority(self) -> int:
         def is_available(self) -> bool:
             try:
                 import importlib.metadata as metadata
+
                 metadata.version("iree-base-runtime")
                 return True
             except Exception:
@@ -245,10 +250,8 @@ def load(self, model_path: str, device: str = "cpu", **kwargs):
             self._ensure_loaded()
             return self._real_backend.load(model_path, device, **kwargs)
 
-    try:
+    with contextlib.suppress(Exception):
         register_backend("iree", LazyIREEBackend)
-    except Exception:
-        pass
 
 
 def _register_lazy_onnxruntime():
@@ -282,13 +285,14 @@ def _ensure_loaded(cls):
             cls._import_attempted = True
             try:
                 from polyinfer.backends.onnxruntime.backend import ONNXRuntimeBackend
+
                 cls._real_backend = ONNXRuntimeBackend()
             except ImportError as e:
                 cls._import_error = RuntimeError(
                     f"ONNX Runtime not available: {e}. "
                     "Install with: pip install onnxruntime or onnxruntime-gpu"
                 )
-                raise cls._import_error
+                raise cls._import_error from e
 
         @property
         def name(self) -> str:
@@ -301,6 +305,7 @@ def supported_devices(self) -> list[str]:
             devices = ["cpu"]
             try:
                 import importlib.metadata as metadata
+
                 # If onnxruntime-gpu is installed, CUDA devices are likely available
                 metadata.version("onnxruntime-gpu")
                 devices.append("cuda")
@@ -328,12 +333,14 @@ def is_available(self) -> bool:
             # Check if onnxruntime package exists without importing it
             try:
                 import importlib.metadata as metadata
+
                 metadata.version("onnxruntime")
                 return True
             except Exception:
                 pass
             try:
                 import importlib.metadata as metadata
+
                 metadata.version("onnxruntime-gpu")
                 return True
             except Exception:
@@ -345,7 +352,7 @@ def load(self, model_path: str, device: str = "cpu", **kwargs):
             self._ensure_loaded()
             return self._real_backend.load(model_path, device, **kwargs)
 
-    try:
+    # TODO: Narrow exception suppression to specific types once register_backend()
+    #   error conditions are documented.
+    with contextlib.supress(Exception):
         register_backend("onnxruntime", LazyONNXRuntimeBackend)
-    except Exception:
-        pass  # Registration failed, skip silently
diff --git a/src/polyinfer/backends/base.py b/src/polyinfer/backends/base.py
index 75ee11f..a5ca904 100644
--- a/src/polyinfer/backends/base.py
+++ b/src/polyinfer/backends/base.py
@@ -1,9 +1,10 @@
 """Base classes for all backends."""
 
+import time
 from abc import ABC, abstractmethod
-from typing import Any, Union
+from typing import Any
+
 import numpy as np
-import time
 
 
 class CompiledModel(ABC):
@@ -45,7 +46,7 @@ def output_shapes(self) -> list[tuple]:
         return []
 
     @abstractmethod
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference on input tensors.
 
         Args:
@@ -56,9 +57,7 @@ def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, .
         """
         ...
 
-    def run(
-        self, inputs: dict[str, np.ndarray]
-    ) -> dict[str, np.ndarray]:
+    def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
         """Run inference with named inputs/outputs.
 
         Args:
@@ -74,7 +73,7 @@ def run(
         if isinstance(outputs, np.ndarray):
             outputs = (outputs,)
 
-        return dict(zip(self.output_names, outputs))
+        return dict(zip(self.output_names, outputs, strict=False))
 
     def benchmark(
         self,
diff --git a/src/polyinfer/backends/iree/__init__.py b/src/polyinfer/backends/iree/__init__.py
index c649a62..c973aa3 100644
--- a/src/polyinfer/backends/iree/__init__.py
+++ b/src/polyinfer/backends/iree/__init__.py
@@ -12,11 +12,11 @@
 """
 
 from polyinfer.backends.iree.backend import (
+    DEVICE_TO_DRIVER,
+    DEVICE_TO_TARGET,
     IREEBackend,
     IREEModel,
     MLIROutput,
-    DEVICE_TO_TARGET,
-    DEVICE_TO_DRIVER,
 )
 
 __all__ = [
diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py
index 0d7fa07..09ed69b 100644
--- a/src/polyinfer/backends/iree/backend.py
+++ b/src/polyinfer/backends/iree/backend.py
@@ -1,16 +1,16 @@
 """IREE backend implementation."""
 
-from pathlib import Path
-from typing import Union
-import numpy as np
+import shutil
 import subprocess
-import tempfile
 import sys
-import shutil
+import tempfile
 from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
 
-from polyinfer.backends.base import Backend, CompiledModel
 from polyinfer._logging import get_logger
+from polyinfer.backends.base import Backend, CompiledModel
 
 _logger = get_logger("backends.iree")
 
@@ -25,6 +25,7 @@ class MLIROutput:
         source_model: Path to the source ONNX model
         dialect: The MLIR dialect used (e.g., 'iree')
     """
+
     path: Path
     content: str | None = None
     source_model: Path | None = None
@@ -54,12 +55,13 @@ def save(self, output_path: str | Path) -> Path:
 
         return output_path
 
+
 # Check if IREE is available
 try:
     import iree.runtime as iree_rt
 
     IREE_RUNTIME_AVAILABLE = True
-    _logger.debug(f"IREE Runtime available")
+    _logger.debug("IREE Runtime available")
 except ImportError:
     IREE_RUNTIME_AVAILABLE = False
     iree_rt = None
@@ -155,10 +157,7 @@ def __init__(
         driver = DEVICE_TO_DRIVER.get(device_type, "local-task")
 
         # Load the module using the simpler BoundModule API
-        self._module = iree_rt.load_vm_flatbuffer_file(
-            str(vmfb_path),
-            driver=driver
-        )
+        self._module = iree_rt.load_vm_flatbuffer_file(str(vmfb_path), driver=driver)
 
         # Find the main inference function
         self._func = None
@@ -192,7 +191,7 @@ def input_names(self) -> list[str]:
     def output_names(self) -> list[str]:
         return self._output_names
 
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference."""
         # Ensure inputs are contiguous float32
         inputs = tuple(np.ascontiguousarray(inp, dtype=np.float32) for inp in inputs)
@@ -225,14 +224,14 @@ def supported_devices(self) -> list[str]:
 
         # Check for Vulkan
         try:
-            config = iree_rt.Config(driver_name="vulkan")
+            iree_rt.Config(driver_name="vulkan")
             devices.append("vulkan")
         except Exception:
             pass
 
         # Check for CUDA
         try:
-            config = iree_rt.Config(driver_name="cuda")
+            iree_rt.Config(driver_name="cuda")
             devices.append("cuda")
         except Exception:
             pass
@@ -255,13 +254,8 @@ def is_available(self) -> bool:
         if not IREE_RUNTIME_AVAILABLE:
             return False
 
-        # Also need compiler tools to be useful
-        if not IREE_COMPILER_AVAILABLE:
-            # Check for CLI tools as fallback
-            if not _get_iree_import_onnx() or not _get_iree_compile():
-                return False
-
-        return True
+        # Need compiler tools or CLI tools as fallback
+        return IREE_COMPILER_AVAILABLE or (_get_iree_import_onnx() and _get_iree_compile())
 
     def load(
         self,
@@ -358,10 +352,7 @@ def emit_mlir(
             raise FileNotFoundError(f"Model not found: {model_path}")
 
         # Determine output path
-        if output_path is None:
-            output_path = model_path.with_suffix(".mlir")
-        else:
-            output_path = Path(output_path)
+        output_path = model_path.with_suffix(".mlir") if output_path is None else Path(output_path)
 
         output_path.parent.mkdir(parents=True, exist_ok=True)
 
@@ -377,7 +368,7 @@ def emit_mlir(
         # Convert ONNX to MLIR
         _logger.debug(f"Converting ONNX to MLIR: {model_path} -> {output_path}")
         try:
-            result = subprocess.run(
+            subprocess.run(
                 [iree_import, str(model_path), "-o", str(output_path)],
                 check=True,
                 capture_output=True,
@@ -387,7 +378,7 @@ def emit_mlir(
         except subprocess.CalledProcessError as e:
             error_msg = e.stderr if e.stderr else str(e)
             _logger.error(f"ONNX to MLIR conversion failed: {error_msg}")
-            raise RuntimeError(f"ONNX to MLIR conversion failed: {error_msg}")
+            raise RuntimeError(f"ONNX to MLIR conversion failed: {error_msg}") from e
 
         # Load content if requested
         content = None
@@ -459,7 +450,8 @@ def compile_mlir(
             str(mlir_path),
             f"--iree-hal-target-backends={target}",
             f"--iree-opt-level=O{opt_level}",
-            "-o", str(output_path),
+            "-o",
+            str(output_path),
         ]
 
         # Add target-specific flags
@@ -470,7 +462,7 @@ def compile_mlir(
             subprocess.run(cmd, check=True, capture_output=True, text=True)
         except subprocess.CalledProcessError as e:
             error_msg = e.stderr if e.stderr else str(e)
-            raise RuntimeError(f"MLIR compilation failed: {error_msg}")
+            raise RuntimeError(f"MLIR compilation failed: {error_msg}") from e
 
         return output_path
 
@@ -523,7 +515,7 @@ def _compile_with_cli(
 
         try:
             # ONNX -> MLIR
-            result = subprocess.run(
+            subprocess.run(
                 [iree_import, str(onnx_path), "-o", str(mlir_path)],
                 check=True,
                 capture_output=True,
@@ -537,19 +529,20 @@ def _compile_with_cli(
                 str(mlir_path),
                 f"--iree-hal-target-backends={target}",
                 f"--iree-opt-level=O{opt_level}",
-                "-o", str(vmfb_path),
+                "-o",
+                str(vmfb_path),
             ]
 
             # Add target-specific flags
             if target == "llvm-cpu":
                 cmd.append("--iree-llvmcpu-target-cpu=host")
 
-            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+            subprocess.run(cmd, check=True, capture_output=True, text=True)
             return vmfb_path
 
         except subprocess.CalledProcessError as e:
             error_msg = e.stderr if e.stderr else str(e)
-            raise RuntimeError(f"IREE compilation failed: {error_msg}")
+            raise RuntimeError(f"IREE compilation failed: {error_msg}") from e
 
         finally:
             if mlir_path.exists():
diff --git a/src/polyinfer/backends/onnxruntime/backend.py b/src/polyinfer/backends/onnxruntime/backend.py
index fbca8bb..a0c89ed 100644
--- a/src/polyinfer/backends/onnxruntime/backend.py
+++ b/src/polyinfer/backends/onnxruntime/backend.py
@@ -1,10 +1,9 @@
 """ONNX Runtime backend implementation."""
 
-from typing import Union
 import numpy as np
 
-from polyinfer.backends.base import Backend, CompiledModel
 from polyinfer._logging import get_logger
+from polyinfer.backends.base import Backend, CompiledModel
 
 _logger = get_logger("backends.onnxruntime")
 
@@ -79,10 +78,10 @@ def provider(self) -> str:
         """Return the active execution provider."""
         return self._provider
 
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference."""
         # Build input dict
-        input_dict = {name: arr for name, arr in zip(self._input_names, inputs)}
+        input_dict = {name: arr for name, arr in zip(self._input_names, inputs, strict=False)}
 
         # Run inference
         outputs = self._session.run(None, input_dict)
@@ -94,7 +93,7 @@ def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, .
     def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
         """Run inference with named inputs/outputs."""
         outputs = self._session.run(None, inputs)
-        return dict(zip(self._output_names, outputs))
+        return dict(zip(self._output_names, outputs, strict=False))
 
 
 def _verify_tensorrt_ep_works() -> bool:
@@ -112,6 +111,7 @@ def _verify_tensorrt_ep_works() -> bool:
     if sys.platform == "win32":
         # On Windows, check if nvinfer DLLs are findable
         import ctypes
+
         try:
             ctypes.CDLL("nvinfer_10.dll")
             return True
@@ -130,7 +130,7 @@ def _verify_tensorrt_ep_works() -> bool:
         # First check if already loaded (from our preload)
         try:
             # Try to find the symbol in already-loaded libraries
-            ctypes.CDLL(None).nvinfer_version
+            _ = ctypes.CDLL(None).nvinfer_version
             return True
         except (OSError, AttributeError):
             pass
@@ -276,6 +276,7 @@ def load(
         if device_type == "tensorrt":
             _logger.debug("Setting up TensorRT paths for TensorRT EP")
             from polyinfer.nvidia_setup import setup_tensorrt_paths
+
             setup_tensorrt_paths()
 
         # Get providers for device
@@ -312,7 +313,9 @@ def load(
                     if "cudnn_conv_algo_search" in kwargs:
                         opts["cudnn_conv_algo_search"] = kwargs["cudnn_conv_algo_search"]
                     if "do_copy_in_default_stream" in kwargs:
-                        opts["do_copy_in_default_stream"] = str(int(kwargs["do_copy_in_default_stream"]))
+                        opts["do_copy_in_default_stream"] = str(
+                            int(kwargs["do_copy_in_default_stream"])
+                        )
 
                 elif provider == "TensorrtExecutionProvider":
                     opts["device_id"] = str(device_id)
@@ -324,7 +327,9 @@ def load(
                     opts["trt_engine_cache_path"] = kwargs.get("cache_dir", "./trt_cache")
                     # Optimization
                     if "builder_optimization_level" in kwargs:
-                        opts["trt_builder_optimization_level"] = str(kwargs["builder_optimization_level"])
+                        opts["trt_builder_optimization_level"] = str(
+                            kwargs["builder_optimization_level"]
+                        )
                     if "timing_cache_path" in kwargs:
                         opts["trt_timing_cache_path"] = kwargs["timing_cache_path"]
                         opts["trt_timing_cache_enable"] = "True"
@@ -336,7 +341,9 @@ def load(
                     if "min_subgraph_size" in kwargs:
                         opts["trt_min_subgraph_size"] = str(kwargs["min_subgraph_size"])
                     if "max_partition_iterations" in kwargs:
-                        opts["trt_max_partition_iterations"] = str(kwargs["max_partition_iterations"])
+                        opts["trt_max_partition_iterations"] = str(
+                            kwargs["max_partition_iterations"]
+                        )
                     # DLA
                     if kwargs.get("dla_enable", False):
                         opts["trt_dla_enable"] = "True"
@@ -345,10 +352,7 @@ def load(
                     if kwargs.get("force_sequential_engine_build", False):
                         opts["trt_force_sequential_engine_build"] = "True"
 
-                elif provider == "DmlExecutionProvider":
-                    opts["device_id"] = str(device_id)
-
-                elif provider == "ROCMExecutionProvider":
+                elif provider == "DmlExecutionProvider" or provider == "ROCMExecutionProvider":
                     opts["device_id"] = str(device_id)
 
                 provider_options.append(opts)
@@ -398,14 +402,22 @@ def load(
                 if "TensorrtExecutionProvider" in providers:
                     # Try falling back to CUDA EP
                     fallback_providers = [p for p in providers if p != "TensorrtExecutionProvider"]
-                    fallback_options = [
-                        opt for i, opt in enumerate(provider_options)
-                        if providers[i] != "TensorrtExecutionProvider"
-                    ] if provider_options else None
+                    fallback_options = (
+                        [
+                            opt
+                            for i, opt in enumerate(provider_options)
+                            if providers[i] != "TensorrtExecutionProvider"
+                        ]
+                        if provider_options
+                        else None
+                    )
 
                     if fallback_providers:
-                        _logger.warning(f"TensorRT EP failed, falling back to {fallback_providers[0]}")
+                        _logger.warning(
+                            f"TensorRT EP failed, falling back to {fallback_providers[0]}"
+                        )
                         import warnings
+
                         warnings.warn(
                             f"TensorRT EP failed ({error_msg[:100]}...), "
                             f"falling back to {fallback_providers[0]}",
diff --git a/src/polyinfer/backends/openvino/backend.py b/src/polyinfer/backends/openvino/backend.py
index d1caccd..c0300bc 100644
--- a/src/polyinfer/backends/openvino/backend.py
+++ b/src/polyinfer/backends/openvino/backend.py
@@ -1,17 +1,18 @@
 """OpenVINO backend implementation."""
 
-from typing import Union
 import numpy as np
 
-from polyinfer.backends.base import Backend, CompiledModel
 from polyinfer._logging import get_logger
+from polyinfer.backends.base import Backend, CompiledModel
 
 _logger = get_logger("backends.openvino")
 
 # Check if OpenVINO is available
 try:
     import openvino as ov
-    from openvino import Core, CompiledModel as OVCompiledModel, Tensor as OVTensor
+    from openvino import CompiledModel as OVCompiledModel
+    from openvino import Core
+    from openvino import Tensor as OVTensor
 
     OPENVINO_AVAILABLE = True
     _logger.debug(f"OpenVINO {ov.__version__} available")
@@ -24,10 +25,10 @@
 
 # Performance hint mapping
 PERF_HINTS = {
-    0: "LATENCY",       # Optimize for low latency
-    1: "THROUGHPUT",    # Optimize for throughput
-    2: "LATENCY",       # Default to latency
-    3: "LATENCY",       # Max optimization = latency focused
+    0: "LATENCY",  # Optimize for low latency
+    1: "THROUGHPUT",  # Optimize for throughput
+    2: "LATENCY",  # Default to latency
+    3: "LATENCY",  # Max optimization = latency focused
 }
 
 
@@ -85,7 +86,7 @@ def input_shapes(self) -> list[tuple]:
     def output_shapes(self) -> list[tuple]:
         return self._output_shapes
 
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference."""
         # Set inputs (must wrap in OVTensor)
         for i, data in enumerate(inputs):
@@ -155,7 +156,9 @@ def supported_devices(self) -> list[str]:
             if dev == "CPU":
                 devices.append("cpu")
             elif dev.startswith("GPU"):
-                devices.append(f"intel-gpu:{dev.replace('GPU.', '')}" if "." in dev else "intel-gpu")
+                devices.append(
+                    f"intel-gpu:{dev.replace('GPU.', '')}" if "." in dev else "intel-gpu"
+                )
             elif dev == "NPU":
                 devices.append("npu")
 
diff --git a/src/polyinfer/backends/registry.py b/src/polyinfer/backends/registry.py
index bf41dc0..62895da 100644
--- a/src/polyinfer/backends/registry.py
+++ b/src/polyinfer/backends/registry.py
@@ -1,9 +1,9 @@
 """Backend registry for managing available inference backends."""
 
 from dataclasses import dataclass
-from typing import Type
-from polyinfer.backends.base import Backend
+
 from polyinfer._logging import get_logger
+from polyinfer.backends.base import Backend
 
 _logger = get_logger("backends.registry")
 
@@ -13,7 +13,7 @@ class BackendInfo:
     """Information about a registered backend."""
 
     name: str
-    backend_class: Type[Backend]
+    backend_class: type[Backend]
     instance: Backend | None = None
     available: bool | None = None  # Lazily computed
 
@@ -34,7 +34,7 @@ def is_available(self) -> bool:
 _backends: dict[str, BackendInfo] = {}
 
 
-def register_backend(name: str, backend_class: Type[Backend]) -> None:
+def register_backend(name: str, backend_class: type[Backend]) -> None:
     """Register a backend class.
 
     Args:
@@ -67,8 +67,7 @@ def get_backend(name: str) -> Backend:
     if not info.is_available():
         _logger.error(f"Backend '{name}' is not available")
         raise RuntimeError(
-            f"Backend '{name}' is not available. "
-            f"Install it with: pip install polyinfer[{name}]"
+            f"Backend '{name}' is not available. Install it with: pip install polyinfer[{name}]"
         )
 
     _logger.debug(f"Retrieved backend: {name}")
@@ -129,8 +128,7 @@ def get_best_backend(device: str) -> Backend:
     if not backends:
         available = list_backends()
         raise RuntimeError(
-            f"No backend available for device '{device}'. "
-            f"Available backends: {available}"
+            f"No backend available for device '{device}'. Available backends: {available}"
         )
     return backends[0]
 
diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py
index cc1d919..65a7f89 100644
--- a/src/polyinfer/backends/tensorrt/backend.py
+++ b/src/polyinfer/backends/tensorrt/backend.py
@@ -1,11 +1,11 @@
 """Native TensorRT backend implementation."""
 
 from pathlib import Path
-from typing import Union
+
 import numpy as np
 
-from polyinfer.backends.base import Backend, CompiledModel
 from polyinfer._logging import get_logger
+from polyinfer.backends.base import Backend, CompiledModel
 
 _logger = get_logger("backends.tensorrt")
 
@@ -120,10 +120,7 @@ def _allocate_buffers(self, input_shapes: dict[str, tuple] = None):
 
         # Allocate input buffers
         for name in self._input_names:
-            if input_shapes:
-                shape = input_shapes[name]
-            else:
-                shape = self._bindings[name]["shape"]
+            shape = input_shapes[name] if input_shapes else self._bindings[name]["shape"]
 
             dtype = self._bindings[name]["dtype"]
             size = int(np.prod(shape)) * np.dtype(dtype).itemsize
@@ -168,18 +165,18 @@ def _allocate_buffers(self, input_shapes: dict[str, tuple] = None):
             self._h_outputs[name] = np.empty(shape, dtype=dtype)
             self._allocated_shapes[name] = shape
 
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference."""
         # For dynamic shapes, ensure buffers are allocated for current input shapes
         if self._has_dynamic_shapes:
             input_shapes = {
                 name: tuple(data.shape)
-                for name, data in zip(self._input_names, inputs)
+                for name, data in zip(self._input_names, inputs, strict=False)
             }
             self._allocate_buffers(input_shapes)
 
         # Copy inputs to GPU
-        for name, data in zip(self._input_names, inputs):
+        for name, data in zip(self._input_names, inputs, strict=False):
             data = np.ascontiguousarray(data)
             cudart.cudaMemcpyAsync(
                 self._d_inputs[name],
@@ -342,10 +339,7 @@ def load(
         # Check for cached engine
         model_path = Path(model_path)
         cache_path = kwargs.get("cache_path")
-        if cache_path is None:
-            cache_path = model_path.with_suffix(".engine")
-        else:
-            cache_path = Path(cache_path)
+        cache_path = model_path.with_suffix(".engine") if cache_path is None else Path(cache_path)
 
         # Try to load cached engine (unless force_rebuild)
         if cache_path.exists() and not kwargs.get("force_rebuild", False):
@@ -402,15 +396,13 @@ def _build_engine(
         if kwargs.get("int8", False):
             config.set_flag(trt.BuilderFlag.INT8)
             _logger.debug("INT8 precision enabled")
-        if kwargs.get("bf16", False):
-            if hasattr(trt.BuilderFlag, "BF16"):
-                config.set_flag(trt.BuilderFlag.BF16)
-        if kwargs.get("fp8", False):
-            if hasattr(trt.BuilderFlag, "FP8"):
-                config.set_flag(trt.BuilderFlag.FP8)
-        if not kwargs.get("tf32", True):  # TF32 enabled by default on Ampere+
-            if hasattr(trt.BuilderFlag, "TF32"):
-                config.clear_flag(trt.BuilderFlag.TF32)
+        if kwargs.get("bf16", False) and hasattr(trt.BuilderFlag, "BF16"):
+            config.set_flag(trt.BuilderFlag.BF16)
+        if kwargs.get("fp8", False) and hasattr(trt.BuilderFlag, "FP8"):
+            config.set_flag(trt.BuilderFlag.FP8)
+        if not kwargs.get("tf32", True) and hasattr(trt.BuilderFlag, "TF32"):
+            # TF32 enabled by default on Ampere+
+            config.clear_flag(trt.BuilderFlag.TF32)
         if kwargs.get("strict_types", False):
             if hasattr(trt.BuilderFlag, "STRICT_TYPES"):
                 config.set_flag(trt.BuilderFlag.STRICT_TYPES)
@@ -428,9 +420,8 @@ def _build_engine(
             config.avg_timing_iterations = avg_timing
 
         # === Sparsity (Ampere+) ===
-        if kwargs.get("sparsity", False):
-            if hasattr(trt.BuilderFlag, "SPARSE_WEIGHTS"):
-                config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS)
+        if kwargs.get("sparsity", False) and hasattr(trt.BuilderFlag, "SPARSE_WEIGHTS"):
+            config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS)
 
         # === Timing cache ===
         timing_cache_path = kwargs.get("timing_cache_path")
diff --git a/src/polyinfer/cli.py b/src/polyinfer/cli.py
index 2883757..bb15bf9 100644
--- a/src/polyinfer/cli.py
+++ b/src/polyinfer/cli.py
@@ -6,9 +6,10 @@
 
 def cmd_info(args):
     """Show system information."""
-    import polyinfer as pi
     import json
 
+    import polyinfer as pi
+
     info = pi.discovery.system_info()
 
     if args.json:
@@ -36,9 +37,10 @@ def cmd_info(args):
 
 def cmd_benchmark(args):
     """Benchmark a model."""
-    import polyinfer as pi
     import numpy as np
 
+    import polyinfer as pi
+
     # Parse input shape
     input_shape = tuple(int(x) for x in args.input_shape.split(","))
 
@@ -74,9 +76,10 @@ def cmd_benchmark(args):
 
 def cmd_run(args):
     """Run inference on a model."""
-    import polyinfer as pi
     import numpy as np
 
+    import polyinfer as pi
+
     # Load model
     model = pi.load(args.model, device=args.device, backend=args.backend)
     print(f"Loaded: {model}")
@@ -119,16 +122,22 @@ def main():
     bench_parser.add_argument("model", help="Path to ONNX model")
     bench_parser.add_argument("--device", "-d", default="cpu", help="Target device")
     bench_parser.add_argument("--backend", "-b", help="Specific backend to use")
-    bench_parser.add_argument("--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)")
+    bench_parser.add_argument(
+        "--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)"
+    )
     bench_parser.add_argument("--warmup", "-w", type=int, default=10, help="Warmup iterations")
-    bench_parser.add_argument("--iterations", "-n", type=int, default=100, help="Benchmark iterations")
+    bench_parser.add_argument(
+        "--iterations", "-n", type=int, default=100, help="Benchmark iterations"
+    )
 
     # Run command
     run_parser = subparsers.add_parser("run", help="Run inference")
     run_parser.add_argument("model", help="Path to ONNX model")
     run_parser.add_argument("--device", "-d", default="cpu", help="Target device")
     run_parser.add_argument("--backend", "-b", help="Specific backend to use")
-    run_parser.add_argument("--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)")
+    run_parser.add_argument(
+        "--input-shape", "-s", default="1,3,224,224", help="Input shape (comma-separated)"
+    )
 
     args = parser.parse_args()
 
diff --git a/src/polyinfer/compare.py b/src/polyinfer/compare.py
index 5f65a75..28186b9 100644
--- a/src/polyinfer/compare.py
+++ b/src/polyinfer/compare.py
@@ -1,11 +1,12 @@
 """Benchmarking and comparison utilities for PolyInfer."""
 
+import time
 from pathlib import Path
 from typing import Any
+
 import numpy as np
-import time
 
-from polyinfer.discovery import list_backends, get_backend
+from polyinfer.discovery import get_backend, list_backends
 
 
 def benchmark(
@@ -113,6 +114,7 @@ def compare(
         if input_shape is None:
             # Try to get shape from model
             import onnx
+
             model = onnx.load(str(model_path))
             input_info = model.graph.input[0]
             shape = []
@@ -181,7 +183,7 @@ def compare(
         fastest = successful[0]["mean_ms"]
         for r in successful:
             marker = " <-- FASTEST" if r["mean_ms"] == fastest else ""
-            slowdown = f" ({r['mean_ms']/fastest:.2f}x)" if r["mean_ms"] != fastest else ""
+            slowdown = f" ({r['mean_ms'] / fastest:.2f}x)" if r["mean_ms"] != fastest else ""
             print(
                 f"{r['backend']:25s}: {r['mean_ms']:6.2f} ms ({r['fps']:5.1f} FPS){slowdown}{marker}"
             )
@@ -213,9 +215,9 @@ def compare_all_devices(
 
     for device_info in list_devices():
         device = device_info.name
-        print(f"\n{'='*60}")
+        print(f"\n{'=' * 60}")
         print(f"Device: {device}")
-        print(f"{'='*60}")
+        print(f"{'=' * 60}")
 
         device_results = []
         for backend_name in device_info.backends:
diff --git a/src/polyinfer/config.py b/src/polyinfer/config.py
index c01723f..3fec6d8 100644
--- a/src/polyinfer/config.py
+++ b/src/polyinfer/config.py
@@ -1,7 +1,7 @@
 """Configuration classes for PolyInfer."""
 
 from dataclasses import dataclass, field
-from typing import Literal, Any
+from typing import Any, Literal
 
 
 @dataclass
diff --git a/src/polyinfer/discovery.py b/src/polyinfer/discovery.py
index 49d9769..a6ff539 100644
--- a/src/polyinfer/discovery.py
+++ b/src/polyinfer/discovery.py
@@ -2,13 +2,17 @@
 
 from dataclasses import dataclass
 
+from polyinfer.backends.base import Backend
 from polyinfer.backends.registry import (
-    list_backends as _list_backends,
     get_backend as _get_backend,
+)
+from polyinfer.backends.registry import (
     get_backends_for_device,
     get_best_backend,
 )
-from polyinfer.backends.base import Backend
+from polyinfer.backends.registry import (
+    list_backends as _list_backends,
+)
 
 
 @dataclass
@@ -191,10 +195,12 @@ def system_info() -> dict:
 
     # Device info
     for device in list_devices():
-        info["devices"].append({
-            "name": device.name,
-            "type": device.device_type,
-            "backends": device.backends,
-        })
+        info["devices"].append(
+            {
+                "name": device.name,
+                "type": device.device_type,
+                "backends": device.backends,
+            }
+        )
 
     return info
diff --git a/src/polyinfer/mlir.py b/src/polyinfer/mlir.py
index d87b809..711779b 100644
--- a/src/polyinfer/mlir.py
+++ b/src/polyinfer/mlir.py
@@ -145,15 +145,13 @@ def compile_mlir(
     )
 
 
-# Re-export MLIROutput for convenience
 def __getattr__(name: str):
+    """Lazy import for MLIROutput to avoid loading IREE at module import time."""
     if name == "MLIROutput":
         from polyinfer.backends.iree.backend import MLIROutput
+
         return MLIROutput
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
-# For static type checkers
-from polyinfer.backends.iree.backend import MLIROutput
-
 __all__ = ["export_mlir", "compile_mlir", "MLIROutput"]
diff --git a/src/polyinfer/model.py b/src/polyinfer/model.py
index ff26856..7a083a4 100644
--- a/src/polyinfer/model.py
+++ b/src/polyinfer/model.py
@@ -1,13 +1,13 @@
 """Unified model loading and inference for PolyInfer."""
 
 from pathlib import Path
-from typing import Union
+
 import numpy as np
 
+from polyinfer._logging import get_logger
 from polyinfer.backends.base import CompiledModel
-from polyinfer.discovery import select_backend, get_backend
 from polyinfer.config import InferenceConfig
-from polyinfer._logging import get_logger
+from polyinfer.discovery import get_backend, select_backend
 
 _logger = get_logger("model")
 
@@ -73,7 +73,9 @@ def __init__(
             _logger.debug(f"Auto-selecting backend for device: {device}")
             self._backend = select_backend(device)
 
-        _logger.debug(f"Selected backend: {self._backend.name} (priority: {self._backend.priority})")
+        _logger.debug(
+            f"Selected backend: {self._backend.name} (priority: {self._backend.priority})"
+        )
 
         # Load the model
         _logger.debug(f"Loading with device: {device}")
@@ -122,6 +124,7 @@ def _normalize_backend(backend: str | None, device: str) -> tuple[str | None, st
             # Check if native tensorrt backend is available
             try:
                 from polyinfer.backends.registry import _backends
+
                 if "tensorrt" in _backends and _backends["tensorrt"].is_available():
                     return "tensorrt", device  # Use native
             except Exception:
@@ -162,7 +165,7 @@ def output_shapes(self) -> list[tuple]:
         """Return output tensor shapes."""
         return self._model.output_shapes
 
-    def __call__(self, *inputs: np.ndarray) -> Union[np.ndarray, tuple[np.ndarray, ...]]:
+    def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference.
 
         Args:
diff --git a/src/polyinfer/nvidia_setup.py b/src/polyinfer/nvidia_setup.py
index 6d026f7..cc7bc90 100644
--- a/src/polyinfer/nvidia_setup.py
+++ b/src/polyinfer/nvidia_setup.py
@@ -6,11 +6,11 @@
 The setup happens automatically when polyinfer is imported.
 """
 
+import logging
 import os
 import sys
 import warnings
 from pathlib import Path
-import logging
 
 # Create logger directly since _logging may not be imported yet
 _logger = logging.getLogger("polyinfer.nvidia_setup")
@@ -76,10 +76,9 @@ def _find_nvidia_dll_dirs() -> list[Path]:
     nvidia_root = site_packages / "nvidia"
     if nvidia_root.exists():
         for subdir in nvidia_root.rglob("bin"):
-            if subdir.is_dir() and subdir not in dll_dirs:
-                # Check if it contains DLLs
-                if any(subdir.glob("*.dll")):
-                    dll_dirs.append(subdir)
+            # Check if it's a directory not already added that contains DLLs
+            if subdir.is_dir() and subdir not in dll_dirs and any(subdir.glob("*.dll")):
+                dll_dirs.append(subdir)
 
     # TensorRT root
     tensorrt_root = site_packages / "tensorrt_libs"
@@ -194,9 +193,8 @@ def _find_tensorrt_lib_dirs() -> list[Path]:
 
         for sys_path in system_tensorrt_paths:
             p = Path(sys_path)
-            if p.exists() and p not in tensorrt_dirs:
-                if any(p.glob("libnvinfer.so*")):
-                    tensorrt_dirs.append(p)
+            if p.exists() and p not in tensorrt_dirs and any(p.glob("libnvinfer.so*")):
+                tensorrt_dirs.append(p)
 
     return tensorrt_dirs
 
@@ -380,6 +378,7 @@ def _check_onnxruntime_conflicts():
     if len(installed) > 1 and sys.platform == "win32":
         try:
             import onnxruntime as ort
+
             providers = ort.get_available_providers()
 
             has_cuda = "CUDAExecutionProvider" in providers
diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py
index 56e387c..c57fb78 100644
--- a/src/polyinfer/quantization.py
+++ b/src/polyinfer/quantization.py
@@ -24,21 +24,25 @@ def data_reader():
     pi.quantize("model.onnx", "model_fp16.onnx", dtype="fp16")
 """
 
+import importlib.util
+from collections.abc import Callable, Iterator
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Callable, Iterator, Union, Any
+
 import numpy as np
 
 
 class QuantizationMethod(Enum):
     """Quantization methods available."""
-    DYNAMIC = "dynamic"      # Dynamic INT8 (no calibration needed)
-    STATIC = "static"        # Static INT8 (requires calibration data)
+
+    DYNAMIC = "dynamic"  # Dynamic INT8 (no calibration needed)
+    STATIC = "static"  # Static INT8 (requires calibration data)
 
 
 class QuantizationType(Enum):
     """Target quantization data type."""
+
     INT8 = "int8"
     UINT8 = "uint8"
     INT4 = "int4"
@@ -48,14 +52,16 @@ class QuantizationType(Enum):
 
 class CalibrationMethod(Enum):
     """Calibration methods for static quantization."""
-    MINMAX = "minmax"        # Min-max calibration
-    ENTROPY = "entropy"      # Entropy-based calibration (KL divergence)
+
+    MINMAX = "minmax"  # Min-max calibration
+    ENTROPY = "entropy"  # Entropy-based calibration (KL divergence)
     PERCENTILE = "percentile"  # Percentile-based calibration
 
 
 @dataclass
 class QuantizationConfig:
     """Configuration for quantization."""
+
     method: QuantizationMethod = QuantizationMethod.DYNAMIC
     dtype: QuantizationType = QuantizationType.INT8
     calibration_method: CalibrationMethod = CalibrationMethod.MINMAX
@@ -69,6 +75,7 @@ class QuantizationConfig:
 @dataclass
 class QuantizationResult:
     """Result of quantization operation."""
+
     input_path: Path
     output_path: Path
     backend: str
@@ -80,12 +87,12 @@ class QuantizationResult:
 
 
 # Type alias for calibration data
-CalibrationData = Union[
-    Iterator[dict[str, np.ndarray]],  # Iterator yielding input dicts
-    Callable[[], Iterator[dict[str, np.ndarray]]],  # Factory function
-    list[dict[str, np.ndarray]],  # List of input dicts
-    list[np.ndarray],  # List of input arrays (single input models)
-]
+CalibrationData = (
+    Iterator[dict[str, np.ndarray]]  # Iterator yielding input dicts
+    | Callable[[], Iterator[dict[str, np.ndarray]]]  # Factory function
+    | list[dict[str, np.ndarray]]  # List of input dicts
+    | list[np.ndarray]  # List of input arrays (single input models)
+)
 
 
 def quantize(
@@ -167,11 +174,7 @@ def quantize(
         if quant_dtype in (QuantizationType.FP16, QuantizationType.BF16):
             backend = "onnxruntime"
         elif quant_method == QuantizationMethod.STATIC:
-            try:
-                import nncf
-                backend = "openvino"
-            except ImportError:
-                backend = "onnxruntime"
+            backend = "openvino" if importlib.util.find_spec("nncf") is not None else "onnxruntime"
         else:
             backend = "onnxruntime"
 
@@ -212,18 +215,16 @@ def _quantize_onnxruntime(
     """Quantize using ONNX Runtime quantization tools."""
     try:
         from onnxruntime.quantization import (
+            QuantFormat,
+            QuantType,
             quantize_dynamic,
             quantize_static,
-            QuantType,
-            QuantFormat,
-            CalibrationDataReader,
         )
         from onnxruntime.quantization.calibrate import CalibrationMethod as ORTCalibMethod
-    except ImportError:
+    except ImportError as e:
         raise ImportError(
-            "onnxruntime quantization not available. "
-            "Install with: pip install onnxruntime"
-        )
+            "onnxruntime quantization not available. Install with: pip install onnxruntime"
+        ) from e
 
     # Map dtype to QuantType
     dtype_map = {
@@ -267,9 +268,7 @@ def _quantize_onnxruntime(
         ort_calib_method = calib_method_map.get(config.calibration_method, ORTCalibMethod.MinMax)
 
         # Create calibration data reader
-        data_reader = _create_ort_calibration_reader(
-            model_input, calibration_data, num_samples
-        )
+        data_reader = _create_ort_calibration_reader(model_input, calibration_data, num_samples)
 
         quantize_static(
             model_input=str(model_input),
@@ -303,13 +302,13 @@ def _quantize_onnxruntime(
 def _convert_to_fp16_onnx(model_input: Path, model_output: Path) -> None:
     """Convert ONNX model to FP16."""
     try:
-        from onnxconverter_common import float16
         import onnx
-    except ImportError:
+        from onnxconverter_common import float16
+    except ImportError as e:
         raise ImportError(
             "FP16 conversion requires onnxconverter-common. "
             "Install with: pip install onnxconverter-common"
-        )
+        ) from e
 
     model = onnx.load(str(model_input))
     model_fp16 = float16.convert_float_to_float16(model, keep_io_types=True)
@@ -332,6 +331,7 @@ def __init__(
 
         # Get input names from model
         import onnxruntime as ort
+
         sess = ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
         self._input_names = [inp.name for inp in sess.get_inputs()]
         del sess
@@ -350,16 +350,15 @@ def _setup_iterator(self):
 
         if isinstance(data, list):
             # Convert list to iterator
-            if len(data) > 0:
-                if isinstance(data[0], np.ndarray):
-                    # List of arrays - wrap in dicts
-                    if len(self._input_names) != 1:
-                        raise ValueError(
-                            f"Model has {len(self._input_names)} inputs, "
-                            "but calibration data is a list of arrays. "
-                            "Use list of dicts instead."
-                        )
-                    data = [{self._input_names[0]: arr} for arr in data]
+            if len(data) > 0 and isinstance(data[0], np.ndarray):
+                # List of arrays - wrap in dicts
+                if len(self._input_names) != 1:
+                    raise ValueError(
+                        f"Model has {len(self._input_names)} inputs, "
+                        "but calibration data is a list of arrays. "
+                        "Use list of dicts instead."
+                    )
+                data = [{self._input_names[0]: arr} for arr in data]
             self._data_iter = iter(data)
         else:
             self._data_iter = data
@@ -406,11 +405,10 @@ def _quantize_openvino(
     try:
         import nncf
         import openvino as ov
-    except ImportError:
+    except ImportError as e:
         raise ImportError(
-            "OpenVINO NNCF not available. "
-            "Install with: pip install openvino nncf"
-        )
+            "OpenVINO NNCF not available. Install with: pip install openvino nncf"
+        ) from e
 
     original_size = model_input.stat().st_size / (1024 * 1024)
 
@@ -422,9 +420,8 @@ def _quantize_openvino(
     if config.dtype == QuantizationType.FP16:
         # OpenVINO doesn't have direct FP16 quantization, use compress_to_fp16
         try:
-            from openvino.runtime import serialize
             # Compile with FP16 inference precision hint
-            model_fp16 = ov.compile_model(model, "CPU", {"INFERENCE_PRECISION_HINT": "f16"})
+            ov.compile_model(model, "CPU", {"INFERENCE_PRECISION_HINT": "f16"})
             # For saving, we need to serialize the original model
             # OpenVINO FP16 is handled at compile time, not model level
             # Fall back to ONNX Runtime for FP16
@@ -447,7 +444,9 @@ def _quantize_openvino(
     quantized_model = nncf.quantize(
         model,
         nncf_dataset,
-        preset=nncf.QuantizationPreset.MIXED if config.per_channel else nncf.QuantizationPreset.PERFORMANCE,
+        preset=nncf.QuantizationPreset.MIXED
+        if config.per_channel
+        else nncf.QuantizationPreset.PERFORMANCE,
         target_device=nncf.TargetDevice.CPU,
         subset_size=num_samples,
     )
@@ -455,13 +454,13 @@ def _quantize_openvino(
     # Save the quantized model
     # Determine output format based on extension
     output_str = str(model_output)
-    if output_str.endswith('.onnx'):
+    if output_str.endswith(".onnx"):
         # Save as ONNX
         ov.save_model(quantized_model, output_str)
     else:
         # Save as OpenVINO IR
-        if not output_str.endswith('.xml'):
-            output_str = output_str + '.xml'
+        if not output_str.endswith(".xml"):
+            output_str = output_str + ".xml"
         ov.save_model(quantized_model, output_str)
         model_output = Path(output_str)
 
@@ -490,15 +489,13 @@ def _create_nncf_dataset(model, data: CalibrationData, num_samples: int):
     if callable(data) and not isinstance(data, (list, Iterator)):
         data = data()
 
-    if isinstance(data, list):
-        if len(data) > 0 and isinstance(data[0], np.ndarray):
-            # List of arrays
-            if len(input_names) != 1:
-                raise ValueError(
-                    f"Model has {len(input_names)} inputs, "
-                    "but calibration data is a list of arrays."
-                )
-            data = [{input_names[0]: arr} for arr in data]
+    if isinstance(data, list) and len(data) > 0 and isinstance(data[0], np.ndarray):
+        # List of arrays
+        if len(input_names) != 1:
+            raise ValueError(
+                f"Model has {len(input_names)} inputs, but calibration data is a list of arrays."
+            )
+        data = [{input_names[0]: arr} for arr in data]
 
     # Convert to list if iterator
     if not isinstance(data, list):
@@ -590,6 +587,7 @@ def quantize_for_tensorrt(
 
 # Convenience functions
 
+
 def quantize_dynamic(
     model_input: str | Path,
     model_output: str | Path,
diff --git a/tests/conftest.py b/tests/conftest.py
index b8ab9de..750f6e0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,22 @@
 """Pytest configuration and shared fixtures for polyinfer tests."""
 
-import os
 import sys
-import pytest
-import numpy as np
 from pathlib import Path
 
+import numpy as np
+import pytest
+
 # Add src to path for development
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
 import polyinfer as pi
-from polyinfer.backends.registry import get_all_backends, get_backend
-
+from polyinfer.backends.registry import get_all_backends
 
 # =============================================================================
 # Test Model Fixtures
 # =============================================================================
 
+
 @pytest.fixture(scope="session")
 def model_path():
     """Get path to test model (YOLOv8n)."""
@@ -34,6 +34,7 @@ def model_path():
     # Try to download/export
     try:
         from ultralytics import YOLO
+
         model = YOLO("yolov8n.pt")
         export_path = Path(__file__).parent.parent / "yolov8n.onnx"
         model.export(format="onnx")
@@ -50,7 +51,7 @@ def simple_model_path(tmp_path_factory):
     """Create a simple ONNX model for basic tests."""
     try:
         import onnx
-        from onnx import helper, TensorProto
+        from onnx import TensorProto, helper
 
         # Create a simple model: Y = X + 1
         X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 224, 224])
@@ -58,21 +59,12 @@ def simple_model_path(tmp_path_factory):
 
         # Constant tensor of ones
         ones = helper.make_tensor(
-            "ones",
-            TensorProto.FLOAT,
-            [1, 3, 224, 224],
-            [1.0] * (1 * 3 * 224 * 224)
+            "ones", TensorProto.FLOAT, [1, 3, 224, 224], [1.0] * (1 * 3 * 224 * 224)
         )
 
         add_node = helper.make_node("Add", ["X", "ones"], ["Y"])
 
-        graph = helper.make_graph(
-            [add_node],
-            "simple_add",
-            [X],
-            [Y],
-            [ones]
-        )
+        graph = helper.make_graph([add_node], "simple_add", [X], [Y], [ones])
 
         model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
 
@@ -90,6 +82,7 @@ def simple_model_path(tmp_path_factory):
 # Input Data Fixtures
 # =============================================================================
 
+
 @pytest.fixture
 def yolo_input():
     """Create input tensor for YOLOv8 (1x3x640x640)."""
@@ -113,6 +106,7 @@ def batch_input():
 # Backend/Device Discovery Fixtures
 # =============================================================================
 
+
 @pytest.fixture(scope="session")
 def available_backends():
     """Get list of available backends."""
@@ -135,6 +129,7 @@ def all_backends():
 # Device-specific Fixtures
 # =============================================================================
 
+
 @pytest.fixture
 def has_cuda():
     """Check if CUDA is available."""
@@ -187,6 +182,7 @@ def has_vulkan():
 # Markers
 # =============================================================================
 
+
 def pytest_configure(config):
     """Register custom markers."""
     config.addinivalue_line("markers", "cuda: mark test as requiring CUDA")
diff --git a/tests/test_backend_options.py b/tests/test_backend_options.py
index 339a931..80c6867 100644
--- a/tests/test_backend_options.py
+++ b/tests/test_backend_options.py
@@ -4,25 +4,22 @@
 are properly validated and passed through to the underlying engines.
 """
 
-import pytest
 import numpy as np
-from pathlib import Path
-from unittest.mock import patch, MagicMock
+import pytest
 
 import polyinfer as pi
-from polyinfer.backends.registry import get_backend
-
 
 # =============================================================================
 # Test Fixtures
 # =============================================================================
 
+
 @pytest.fixture
 def dummy_onnx_model(tmp_path):
     """Create a minimal ONNX model for testing."""
     try:
         import onnx
-        from onnx import helper, TensorProto
+        from onnx import TensorProto, helper
     except ImportError:
         pytest.skip("onnx not installed")
 
@@ -60,6 +57,7 @@ def dummy_input():
 # ONNX Runtime Backend Options Tests
 # =============================================================================
 
+
 class TestONNXRuntimeOptions:
     """Test ONNX Runtime backend options passthrough."""
 
@@ -210,6 +208,7 @@ def test_directml_options(self, dummy_onnx_model, dummy_input):
 # Native TensorRT Backend Options Tests
 # =============================================================================
 
+
 class TestNativeTensorRTOptions:
     """Test native TensorRT backend options."""
 
@@ -300,7 +299,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path):
         cache_path = tmp_path / "test_rebuild.engine"
 
         # First build
-        model1 = pi.load(
+        pi.load(
             dummy_onnx_model,
             backend="tensorrt",
             device="cuda",
@@ -309,7 +308,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path):
         mtime1 = cache_path.stat().st_mtime
 
         # Should use cache (no rebuild)
-        model2 = pi.load(
+        pi.load(
             dummy_onnx_model,
             backend="tensorrt",
             device="cuda",
@@ -319,7 +318,7 @@ def test_force_rebuild(self, dummy_onnx_model, dummy_input, tmp_path):
         assert mtime1 == mtime2, "Cache should be reused"
 
         # Force rebuild
-        model3 = pi.load(
+        pi.load(
             dummy_onnx_model,
             backend="tensorrt",
             device="cuda",
@@ -353,6 +352,7 @@ def test_profiling_verbosity(self, dummy_onnx_model, dummy_input, tmp_path):
 # OpenVINO Backend Options Tests
 # =============================================================================
 
+
 class TestOpenVINOOptions:
     """Test OpenVINO backend options."""
 
@@ -410,6 +410,7 @@ def test_caching(self, dummy_onnx_model, dummy_input, tmp_path):
 # IREE Backend Options Tests
 # =============================================================================
 
+
 class TestIREEOptions:
     """Test IREE backend options."""
 
@@ -491,6 +492,7 @@ def test_save_mlir(self, dummy_onnx_model, dummy_input, tmp_path):
 # Options Validation Tests
 # =============================================================================
 
+
 class TestOptionsValidation:
     """Test that invalid options are handled properly."""
 
@@ -523,6 +525,7 @@ def test_invalid_device_raises(self, dummy_onnx_model):
 # Integration Tests
 # =============================================================================
 
+
 class TestOptionsIntegration:
     """Integration tests for options across backends."""
 
diff --git a/tests/test_backends.py b/tests/test_backends.py
index 74f0cad..ecd6db4 100644
--- a/tests/test_backends.py
+++ b/tests/test_backends.py
@@ -1,6 +1,7 @@
 """Tests for backend discovery and availability."""
 
 import pytest
+
 import polyinfer as pi
 from polyinfer.backends.registry import get_all_backends, get_backend
 
@@ -123,7 +124,7 @@ class TestBackendPriority:
     def test_backends_have_priority(self):
         """All backends should have a priority value."""
         all_backends = get_all_backends()
-        for name, backend in all_backends.items():
+        for _name, backend in all_backends.items():
             assert isinstance(backend.priority, int)
             assert backend.priority >= 0
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index dd382cf..abbc30f 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,7 +1,7 @@
 """Tests for benchmarking functionality."""
 
 import pytest
-import numpy as np
+
 import polyinfer as pi
 
 
@@ -94,7 +94,7 @@ def test_warmup_effect(self, model_path, yolo_input):
         model = pi.load(model_path, device="cpu")
 
         # No warmup
-        result_no_warmup = model.benchmark(yolo_input, warmup=0, iterations=10)
+        model.benchmark(yolo_input, warmup=0, iterations=10)
 
         # With warmup
         result_warmup = model.benchmark(yolo_input, warmup=10, iterations=10)
@@ -115,12 +115,7 @@ def test_compare_basic(self, model_path):
 
     def test_compare_returns_results(self, model_path):
         """compare() should return benchmark results for each backend."""
-        results = pi.compare(
-            model_path,
-            input_shape=(1, 3, 640, 640),
-            warmup=2,
-            iterations=5
-        )
+        results = pi.compare(model_path, input_shape=(1, 3, 640, 640), warmup=2, iterations=5)
 
         for result in results:
             assert "backend" in result
@@ -132,11 +127,7 @@ def test_compare_returns_results(self, model_path):
     def test_compare_specific_device(self, model_path):
         """compare() should work with specific device."""
         results = pi.compare(
-            model_path,
-            input_shape=(1, 3, 640, 640),
-            device="cpu",
-            warmup=2,
-            iterations=5
+            model_path, input_shape=(1, 3, 640, 640), device="cpu", warmup=2, iterations=5
         )
 
         assert len(results) >= 1
@@ -222,7 +213,7 @@ def test_long_benchmark(self, model_path, yolo_input):
         result_long = model.benchmark(yolo_input, warmup=10, iterations=100)
 
         # Longer benchmark should generally have lower relative std
-        rel_std_short = result_short["std_ms"] / result_short["mean_ms"]
+        result_short["std_ms"] / result_short["mean_ms"]
         rel_std_long = result_long["std_ms"] / result_long["mean_ms"]
 
         # Not strictly enforced as it depends on system state
@@ -230,7 +221,7 @@ def test_long_benchmark(self, model_path, yolo_input):
 
     def test_all_available_backends(self, model_path, yolo_input):
         """Benchmark all available backends."""
-        backends = pi.list_backends()
+        pi.list_backends()
         devices = pi.list_devices()
 
         results = []
@@ -238,17 +229,21 @@ def test_all_available_backends(self, model_path, yolo_input):
             try:
                 model = pi.load(model_path, device=device.name)
                 result = model.benchmark(yolo_input, warmup=3, iterations=10)
-                results.append({
-                    "device": device.name,
-                    "backend": result["backend"],
-                    "fps": result["fps"],
-                    "mean_ms": result["mean_ms"],
-                })
+                results.append(
+                    {
+                        "device": device.name,
+                        "backend": result["backend"],
+                        "fps": result["fps"],
+                        "mean_ms": result["mean_ms"],
+                    }
+                )
             except Exception as e:
-                results.append({
-                    "device": device.name,
-                    "error": str(e),
-                })
+                results.append(
+                    {
+                        "device": device.name,
+                        "error": str(e),
+                    }
+                )
 
         # Should have at least one successful result
         successful = [r for r in results if "fps" in r]
@@ -258,6 +253,8 @@ def test_all_available_backends(self, model_path, yolo_input):
         print("\n=== Benchmark Results ===")
         for r in results:
             if "fps" in r:
-                print(f"{r['device']:20} {r['backend']:25} {r['mean_ms']:8.2f} ms  {r['fps']:8.1f} FPS")
+                print(
+                    f"{r['device']:20} {r['backend']:25} {r['mean_ms']:8.2f} ms  {r['fps']:8.1f} FPS"
+                )
             else:
                 print(f"{r['device']:20} ERROR: {r.get('error', 'unknown')}")
diff --git a/tests/test_devices.py b/tests/test_devices.py
index 3995385..01e1239 100644
--- a/tests/test_devices.py
+++ b/tests/test_devices.py
@@ -1,7 +1,8 @@
 """Tests for device-specific functionality."""
 
-import pytest
 import numpy as np
+import pytest
+
 import polyinfer as pi
 
 
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 42833ba..d24b34b 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -1,7 +1,8 @@
 """Tests for inference correctness and consistency."""
 
-import pytest
 import numpy as np
+import pytest
+
 import polyinfer as pi
 
 
diff --git a/tests/test_intel_devices.py b/tests/test_intel_devices.py
index be319a8..5510b50 100644
--- a/tests/test_intel_devices.py
+++ b/tests/test_intel_devices.py
@@ -1,9 +1,14 @@
 """Test polyinfer with Intel devices (CPU, iGPU, NPU)."""
+
+import os
 import sys
+
 sys.path.insert(0, "src")
 
 import numpy as np
+
 import polyinfer as pi
+from polyinfer.backends.openvino import OpenVINOBackend
 
 # Check what's available
 print("=" * 60)
@@ -14,12 +19,10 @@
 print("Available devices:", pi.list_devices())
 
 # Get OpenVINO backend directly to see raw device names
-from polyinfer.backends.openvino import OpenVINOBackend
 ov_backend = OpenVINOBackend()
 print("\nOpenVINO raw devices:", ov_backend.get_available_devices())
 
 # Test model path, use YOLOv8n if available
-import os
 model_path = None
 for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx"]:
     if os.path.exists(path):
@@ -30,11 +33,14 @@
     print("\nNo test model found. Downloading yolov8n.onnx...")
     try:
         from ultralytics import YOLO
+
         model = YOLO("yolov8n.pt")
         model.export(format="onnx")
         model_path = "yolov8n.onnx"
     except ImportError:
-        print("Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx")
+        print(
+            "Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx"
+        )
         sys.exit(1)
 
 print(f"\nUsing model: {model_path}")
@@ -64,7 +70,7 @@
         # Benchmark
         bench = model.benchmark(input_data, warmup=5, iterations=20)
         print(f"  Latency: {bench['mean_ms']:.2f} ms ({bench['fps']:.1f} FPS)")
-        results.append((device, description, bench['mean_ms'], bench['fps']))
+        results.append((device, description, bench["mean_ms"], bench["fps"]))
     except Exception as e:
         print(f"  ERROR: {e}")
         results.append((device, description, None, None))
diff --git a/tests/test_logging.py b/tests/test_logging.py
index bbd838e..6d99ee5 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -27,14 +27,13 @@ def test_logging_exports_available(self):
     def test_logging_module_import(self):
         """Test direct import from logging module."""
         from polyinfer._logging import (
-            get_logger,
-            set_log_level,
+            configure_logging,
+            disable_logging,
+            enable_logging,
             get_log_level,
             get_log_level_name,
-            enable_logging,
-            disable_logging,
-            configure_logging,
-            LogContext,
+            get_logger,
+            set_log_level,
         )
 
         # All should be callable
@@ -237,7 +236,7 @@ def test_logger_hierarchy(self):
         """Test that child loggers inherit from parent."""
         import polyinfer as pi
 
-        parent = pi.get_logger()
+        pi.get_logger()
         child = pi.get_logger("model")
 
         # Child's effective level should match parent
diff --git a/tests/test_mlir.py b/tests/test_mlir.py
index c6197da..be48f7d 100644
--- a/tests/test_mlir.py
+++ b/tests/test_mlir.py
@@ -1,16 +1,18 @@
 """Tests for MLIR emission and compilation."""
 
-import pytest
-import numpy as np
-import polyinfer as pi
-from pathlib import Path
 import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
 
+import polyinfer as pi
 
 # =============================================================================
 # Fixtures
 # =============================================================================
 
+
 @pytest.fixture(scope="module")
 def model_path():
     """Get path to test ONNX model."""
@@ -40,6 +42,7 @@ def temp_dir():
 # MLIR Export Tests
 # =============================================================================
 
+
 class TestMLIRExport:
     """Tests for export_mlir functionality."""
 
@@ -57,6 +60,7 @@ def test_export_mlir_default_path(self, model_path, temp_dir):
         """Test MLIR export with default output path."""
         # Copy model to temp dir to test default path behavior
         import shutil
+
         temp_model = temp_dir / "test_model.onnx"
         shutil.copy(model_path, temp_model)
 
@@ -125,6 +129,7 @@ def test_export_mlir_file_not_found(self, temp_dir):
 # MLIR Compilation Tests
 # =============================================================================
 
+
 class TestMLIRCompilation:
     """Tests for compile_mlir functionality."""
 
@@ -137,7 +142,9 @@ def mlir_file(self, model_path, temp_dir):
 
     def test_compile_mlir_cpu(self, mlir_file, temp_dir):
         """Test MLIR compilation for CPU."""
-        vmfb_path = pi.compile_mlir(mlir_file, device="cpu", output_path=temp_dir / "model_cpu.vmfb")
+        vmfb_path = pi.compile_mlir(
+            mlir_file, device="cpu", output_path=temp_dir / "model_cpu.vmfb"
+        )
 
         assert vmfb_path.exists()
         assert vmfb_path.suffix == ".vmfb"
@@ -145,7 +152,9 @@ def test_compile_mlir_cpu(self, mlir_file, temp_dir):
     @pytest.mark.vulkan
     def test_compile_mlir_vulkan(self, mlir_file, temp_dir):
         """Test MLIR compilation for Vulkan."""
-        vmfb_path = pi.compile_mlir(mlir_file, device="vulkan", output_path=temp_dir / "model_vulkan.vmfb")
+        vmfb_path = pi.compile_mlir(
+            mlir_file, device="vulkan", output_path=temp_dir / "model_vulkan.vmfb"
+        )
 
         assert vmfb_path.exists()
 
@@ -181,6 +190,7 @@ def test_compile_mlir_file_not_found(self, temp_dir):
 # End-to-End Workflow Tests
 # =============================================================================
 
+
 class TestMLIRWorkflow:
     """End-to-end tests for MLIR workflow."""
 
@@ -246,6 +256,7 @@ def test_mlir_output_consistency(self, model_path, temp_dir):
 # Backend Method Tests
 # =============================================================================
 
+
 class TestIREEBackendMethods:
     """Tests for IREEBackend emit_mlir and compile_mlir methods."""
 
@@ -265,7 +276,9 @@ def test_backend_compile_mlir(self, model_path, temp_dir):
         mlir = backend.emit_mlir(model_path, temp_dir / "model.mlir")
 
         # Then compile
-        vmfb_path = backend.compile_mlir(mlir.path, device="cpu", output_path=temp_dir / "model.vmfb")
+        vmfb_path = backend.compile_mlir(
+            mlir.path, device="cpu", output_path=temp_dir / "model.vmfb"
+        )
 
         assert vmfb_path.exists()
 
@@ -288,6 +301,7 @@ def test_backend_load_vmfb(self, model_path, temp_dir):
 # MLIR Content Analysis Tests
 # =============================================================================
 
+
 class TestMLIRContent:
     """Tests for MLIR content analysis."""
 
diff --git a/tests/test_quantization.py b/tests/test_quantization.py
index a49d8dc..ceb5317 100644
--- a/tests/test_quantization.py
+++ b/tests/test_quantization.py
@@ -1,9 +1,9 @@
 """Tests for quantization functionality."""
 
-import pytest
+import importlib.util
+
 import numpy as np
-from pathlib import Path
-import tempfile
+import pytest
 
 import polyinfer as pi
 
@@ -54,17 +54,13 @@ def simple_model(self, tmp_path):
         """Create a simple ONNX model for testing."""
         try:
             import onnx
-            from onnx import helper, TensorProto
+            from onnx import TensorProto, helper
         except ImportError:
             pytest.skip("ONNX not installed")
 
         # Create simple model: output = input * 2
-        input_tensor = helper.make_tensor_value_info(
-            "input", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
-        output_tensor = helper.make_tensor_value_info(
-            "output", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
+        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32])
+        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32])
         const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0])
         mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul")
         graph = helper.make_graph(
@@ -143,16 +139,12 @@ def simple_model(self, tmp_path):
         """Create a simple ONNX model for testing."""
         try:
             import onnx
-            from onnx import helper, TensorProto
+            from onnx import TensorProto, helper
         except ImportError:
             pytest.skip("ONNX not installed")
 
-        input_tensor = helper.make_tensor_value_info(
-            "input", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
-        output_tensor = helper.make_tensor_value_info(
-            "output", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
+        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32])
+        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32])
         const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0])
         mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul")
         graph = helper.make_graph(
@@ -201,11 +193,10 @@ def test_static_quantization_with_dict_list(self, simple_model, tmp_path):
         output_path = tmp_path / "model_int8.onnx"
 
         calibration_data = [
-            {"input": np.random.rand(1, 3, 32, 32).astype(np.float32)}
-            for _ in range(10)
+            {"input": np.random.rand(1, 3, 32, 32).astype(np.float32)} for _ in range(10)
         ]
 
-        result = pi.quantize_static(
+        pi.quantize_static(
             simple_model,
             output_path,
             calibration_data=calibration_data,
@@ -218,7 +209,7 @@ def test_static_quantization_per_channel(self, simple_model, tmp_path, calibrati
         """Test static quantization with per-channel option."""
         output_path = tmp_path / "model_int8.onnx"
 
-        result = pi.quantize_static(
+        pi.quantize_static(
             simple_model,
             output_path,
             calibration_data=calibration_data,
@@ -227,11 +218,13 @@ def test_static_quantization_per_channel(self, simple_model, tmp_path, calibrati
 
         assert output_path.exists()
 
-    def test_static_quantization_entropy_calibration(self, simple_model, tmp_path, calibration_data):
+    def test_static_quantization_entropy_calibration(
+        self, simple_model, tmp_path, calibration_data
+    ):
         """Test static quantization with entropy calibration."""
         output_path = tmp_path / "model_int8.onnx"
 
-        result = pi.quantize(
+        pi.quantize(
             simple_model,
             output_path,
             method="static",
@@ -250,16 +243,12 @@ def simple_model(self, tmp_path):
         """Create a simple ONNX model for testing."""
         try:
             import onnx
-            from onnx import helper, TensorProto
+            from onnx import TensorProto, helper
         except ImportError:
             pytest.skip("ONNX not installed")
 
-        input_tensor = helper.make_tensor_value_info(
-            "input", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
-        output_tensor = helper.make_tensor_value_info(
-            "output", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
+        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32])
+        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32])
         const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0])
         mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul")
         graph = helper.make_graph(
@@ -274,9 +263,7 @@ def simple_model(self, tmp_path):
 
     def test_fp16_conversion(self, simple_model, tmp_path):
         """Test FP16 conversion."""
-        try:
-            import onnxconverter_common
-        except ImportError:
+        if importlib.util.find_spec("onnxconverter_common") is None:
             pytest.skip("onnxconverter-common not installed")
 
         output_path = tmp_path / "model_fp16.onnx"
@@ -288,9 +275,7 @@ def test_fp16_conversion(self, simple_model, tmp_path):
 
     def test_fp16_via_quantize(self, simple_model, tmp_path):
         """Test FP16 conversion via quantize()."""
-        try:
-            import onnxconverter_common
-        except ImportError:
+        if importlib.util.find_spec("onnxconverter_common") is None:
             pytest.skip("onnxconverter-common not installed")
 
         output_path = tmp_path / "model_fp16.onnx"
@@ -306,9 +291,7 @@ def test_fp16_via_quantize(self, simple_model, tmp_path):
 
     def test_fp16_model_runs(self, simple_model, tmp_path):
         """Test that FP16 model can be loaded and run."""
-        try:
-            import onnxconverter_common
-        except ImportError:
+        if importlib.util.find_spec("onnxconverter_common") is None:
             pytest.skip("onnxconverter-common not installed")
 
         output_path = tmp_path / "model_fp16.onnx"
@@ -329,16 +312,12 @@ def simple_model(self, tmp_path):
         """Create a simple ONNX model for testing."""
         try:
             import onnx
-            from onnx import helper, TensorProto
+            from onnx import TensorProto, helper
         except ImportError:
             pytest.skip("ONNX not installed")
 
-        input_tensor = helper.make_tensor_value_info(
-            "input", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
-        output_tensor = helper.make_tensor_value_info(
-            "output", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
+        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32])
+        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32])
         const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0])
         mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul")
         graph = helper.make_graph(
@@ -385,16 +364,12 @@ def simple_model(self, tmp_path):
         """Create a simple ONNX model for testing."""
         try:
             import onnx
-            from onnx import helper, TensorProto
+            from onnx import TensorProto, helper
         except ImportError:
             pytest.skip("ONNX not installed")
 
-        input_tensor = helper.make_tensor_value_info(
-            "input", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
-        output_tensor = helper.make_tensor_value_info(
-            "output", TensorProto.FLOAT, [1, 3, 32, 32]
-        )
+        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 32, 32])
+        output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 32, 32])
         const_tensor = helper.make_tensor("const", TensorProto.FLOAT, [1], [2.0])
         mul_node = helper.make_node("Mul", ["input", "const"], ["output"], name="mul")
         graph = helper.make_graph(
@@ -415,10 +390,7 @@ def calibration_data(self):
     @pytest.mark.openvino
     def test_openvino_quantization(self, simple_model, tmp_path, calibration_data):
         """Test OpenVINO NNCF quantization."""
-        try:
-            import nncf
-            import openvino
-        except ImportError:
+        if importlib.util.find_spec("nncf") is None or importlib.util.find_spec("openvino") is None:
             pytest.skip("OpenVINO/NNCF not installed")
 
         output_path = tmp_path / "model_int8.onnx"
diff --git a/tests/test_yolov8.py b/tests/test_yolov8.py
index 4196e09..a7447ae 100644
--- a/tests/test_yolov8.py
+++ b/tests/test_yolov8.py
@@ -4,16 +4,18 @@
 backend and device combination, checking both correctness and performance.
 """
 
-import pytest
-import numpy as np
-import polyinfer as pi
 from pathlib import Path
 
+import numpy as np
+import pytest
+
+import polyinfer as pi
 
 # =============================================================================
 # Fixtures
 # =============================================================================
 
+
 @pytest.fixture(scope="module")
 def yolov8_path():
     """Get path to YOLOv8n ONNX model."""
@@ -30,6 +32,7 @@ def yolov8_path():
     # Try to export
     try:
         from ultralytics import YOLO
+
         model = YOLO("yolov8n.pt")
         export_path = Path(__file__).parent.parent / "yolov8n.onnx"
         model.export(format="onnx")
@@ -58,6 +61,7 @@ def reference_output(yolov8_path, yolo_input):
 # Device Discovery
 # =============================================================================
 
+
 def get_all_device_backend_combinations():
     """Get all valid (device, backend) combinations."""
     combinations = []
@@ -73,6 +77,7 @@ def get_all_device_backend_combinations():
 # YOLOv8 Tests by Backend
 # =============================================================================
 
+
 class TestYOLOv8ONNXRuntime:
     """YOLOv8 tests for ONNX Runtime backend."""
 
@@ -205,7 +210,7 @@ def test_vulkan(self, yolov8_path, yolo_input):
         total_elements = 1 * 84 * 8400
         nan_count = np.sum(np.isnan(output))
         nan_ratio = nan_count / total_elements
-        assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)"
+        assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)"
 
         assert not np.any(np.isinf(output)), "Output contains Inf"
 
@@ -242,6 +247,7 @@ def test_cuda(self, yolov8_path, yolo_input, reference_output):
 # Performance Benchmarks
 # =============================================================================
 
+
 class TestYOLOv8Benchmarks:
     """Benchmark YOLOv8 across all backends."""
 
@@ -256,21 +262,25 @@ def test_benchmark_all_devices(self, yolov8_path, yolo_input):
                 try:
                     model = pi.load(yolov8_path, backend=backend_name, device=device)
                     bench = model.benchmark(yolo_input, warmup=5, iterations=20)
-                    results.append({
-                        "device": device,
-                        "backend": backend_name,
-                        "backend_name": model.backend_name,
-                        "mean_ms": bench["mean_ms"],
-                        "fps": bench["fps"],
-                        "status": "success",
-                    })
+                    results.append(
+                        {
+                            "device": device,
+                            "backend": backend_name,
+                            "backend_name": model.backend_name,
+                            "mean_ms": bench["mean_ms"],
+                            "fps": bench["fps"],
+                            "status": "success",
+                        }
+                    )
                 except Exception as e:
-                    results.append({
-                        "device": device,
-                        "backend": backend_name,
-                        "error": str(e),
-                        "status": "error",
-                    })
+                    results.append(
+                        {
+                            "device": device,
+                            "backend": backend_name,
+                            "error": str(e),
+                            "status": "error",
+                        }
+                    )
 
         # Print results
         print("\n" + "=" * 80)
@@ -283,7 +293,9 @@ def test_benchmark_all_devices(self, yolov8_path, yolo_input):
         successful.sort(key=lambda r: r["mean_ms"])
 
         for r in successful:
-            print(f"{r['device']:<15} {r['backend_name']:<25} {r['mean_ms']:>10.2f}ms {r['fps']:>9.1f}")
+            print(
+                f"{r['device']:<15} {r['backend_name']:<25} {r['mean_ms']:>10.2f}ms {r['fps']:>9.1f}"
+            )
 
         print("-" * 80)
 
@@ -304,6 +316,7 @@ def test_benchmark_openvino_devices(self, yolov8_path, yolo_input):
             pytest.skip("OpenVINO not available")
 
         from polyinfer.backends.openvino import OpenVINOBackend
+
         ov_backend = OpenVINOBackend()
         raw_devices = ov_backend.get_available_devices()
 
@@ -322,13 +335,17 @@ def test_benchmark_openvino_devices(self, yolov8_path, yolo_input):
             try:
                 model = pi.load(yolov8_path, backend="openvino", device=pi_device)
                 bench = model.benchmark(yolo_input, warmup=5, iterations=20)
-                results.append({
-                    "raw_device": raw_device,
-                    "pi_device": pi_device,
-                    "mean_ms": bench["mean_ms"],
-                    "fps": bench["fps"],
-                })
-                print(f"  {raw_device} ({pi_device}): {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)")
+                results.append(
+                    {
+                        "raw_device": raw_device,
+                        "pi_device": pi_device,
+                        "mean_ms": bench["mean_ms"],
+                        "fps": bench["fps"],
+                    }
+                )
+                print(
+                    f"  {raw_device} ({pi_device}): {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)"
+                )
             except Exception as e:
                 print(f"  {raw_device} ({pi_device}): ERROR - {e}")
 
@@ -348,11 +365,13 @@ def test_benchmark_iree_devices(self, yolov8_path, yolo_input):
             try:
                 model = pi.load(yolov8_path, backend="iree", device=device)
                 bench = model.benchmark(yolo_input, warmup=5, iterations=20)
-                results.append({
-                    "device": device,
-                    "mean_ms": bench["mean_ms"],
-                    "fps": bench["fps"],
-                })
+                results.append(
+                    {
+                        "device": device,
+                        "mean_ms": bench["mean_ms"],
+                        "fps": bench["fps"],
+                    }
+                )
                 print(f"  {device}: {bench['mean_ms']:.2f}ms ({bench['fps']:.1f} FPS)")
             except Exception as e:
                 print(f"  {device}: ERROR - {e}")
@@ -364,6 +383,7 @@ def test_benchmark_iree_devices(self, yolov8_path, yolo_input):
 # Cross-Backend Consistency
 # =============================================================================
 
+
 class TestYOLOv8Consistency:
     """Test output consistency across backends."""
 
@@ -374,11 +394,7 @@ def test_all_backends_same_output_shape(self, yolov8_path, yolo_input):
         for device_info in pi.list_devices():
             for backend_name in device_info.backends:
                 try:
-                    model = pi.load(
-                        yolov8_path,
-                        backend=backend_name,
-                        device=device_info.name
-                    )
+                    model = pi.load(yolov8_path, backend=backend_name, device=device_info.name)
                     output = model(yolo_input)
                     key = f"{backend_name}-{device_info.name}"
                     shapes[key] = output.shape
@@ -447,7 +463,7 @@ def test_iree_vulkan_vs_cpu(self, yolov8_path, yolo_input):
         total_elements = output_vulkan.size
         nan_count = np.sum(np.isnan(output_vulkan))
         nan_ratio = nan_count / total_elements
-        assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)"
+        assert nan_ratio <= 0.001, f"Too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)"
 
         # Compare non-NaN values using correlation
         valid_mask = ~np.isnan(output_vulkan)
@@ -461,6 +477,7 @@ def test_iree_vulkan_vs_cpu(self, yolov8_path, yolo_input):
 # Stress Tests
 # =============================================================================
 
+
 class TestYOLOv8Stress:
     """Stress tests for YOLOv8."""
 
@@ -500,7 +517,7 @@ def test_repeated_inference_vulkan(self, yolov8_path, yolo_input):
             nan_count = np.sum(np.isnan(output))
             nan_ratio = nan_count / total_elements
             assert nan_ratio <= max_nan_ratio, (
-                f"Run {i} has too many NaN values: {nan_count} ({nan_ratio*100:.2f}%)"
+                f"Run {i} has too many NaN values: {nan_count} ({nan_ratio * 100:.2f}%)"
             )
 
             assert not np.any(np.isinf(output)), f"Run {i} contains Inf"
@@ -570,8 +587,12 @@ def test_different_random_inputs(self, yolov8_path):
             try:
                 model = pi.load(model_path, backend=backend, device=device_info.name)
                 bench = model.benchmark(input_data, warmup=5, iterations=20)
-                print(f"{device_info.name:15} {model.backend_name:25} {bench['mean_ms']:8.2f}ms {bench['fps']:8.1f} FPS")
-                results.append((device_info.name, model.backend_name, bench['mean_ms'], bench['fps']))
+                print(
+                    f"{device_info.name:15} {model.backend_name:25} {bench['mean_ms']:8.2f}ms {bench['fps']:8.1f} FPS"
+                )
+                results.append(
+                    (device_info.name, model.backend_name, bench["mean_ms"], bench["fps"])
+                )
             except Exception as e:
                 print(f"{device_info.name:15} {backend:25} ERROR: {str(e)[:40]}")
 
@@ -580,5 +601,5 @@ def test_different_random_inputs(self, yolov8_path):
     print("Summary (sorted by speed)")
     print("=" * 60)
     results.sort(key=lambda x: x[2])
-    for device, backend, ms, fps in results:
+    for _device, backend, ms, fps in results:
         print(f"{backend:30} {ms:8.2f}ms {fps:8.1f} FPS")

From 303c9b2aaac30e4663e91840036459b36dd6de01 Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:09:28 -0500
Subject: [PATCH 3/8] Fix mypy issues

---
 src/polyinfer/_logging.py                     |  6 +--
 src/polyinfer/backends/_autoload.py           |  6 +--
 src/polyinfer/backends/base.py                |  6 +--
 src/polyinfer/backends/iree/backend.py        | 32 +++++------
 src/polyinfer/backends/onnxruntime/backend.py |  7 +--
 src/polyinfer/backends/openvino/backend.py    | 11 ++--
 src/polyinfer/backends/tensorrt/backend.py    | 18 +++----
 src/polyinfer/compare.py                      |  6 +--
 src/polyinfer/discovery.py                    | 53 +++++++++++--------
 src/polyinfer/nvidia_setup.py                 | 12 ++---
 src/polyinfer/quantization.py                 | 32 +++++++----
 11 files changed, 106 insertions(+), 83 deletions(-)

diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py
index d6c99ac..5ff28b7 100644
--- a/src/polyinfer/_logging.py
+++ b/src/polyinfer/_logging.py
@@ -34,7 +34,7 @@
 _logger.setLevel(logging.WARNING)
 
 # Create console handler with formatting
-_handler = logging.StreamHandler(sys.stderr)
+_handler: logging.Handler = logging.StreamHandler(sys.stderr)
 _handler.setLevel(logging.DEBUG)  # Handler passes everything, logger filters
 
 # Format: [LEVEL] polyinfer.module: message
@@ -154,7 +154,7 @@ def configure_logging(
     level: str | int = "WARNING",
     format: str = "[%(levelname)s] %(name)s: %(message)s",
     stream=None,
-    filename: str = None,
+    filename: str | None = None,
 ) -> None:
     """Configure polyinfer logging with custom settings.
 
@@ -237,7 +237,7 @@ def _log_backend_init(name: str, version: str, devices: list):
     logger.debug(f"  Supported devices: {devices}")
 
 
-def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float = None):
+def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None):
     """Log inference operation."""
     logger = get_logger("inference")
     if time_ms is not None:
diff --git a/src/polyinfer/backends/_autoload.py b/src/polyinfer/backends/_autoload.py
index fce79b4..d57fedf 100644
--- a/src/polyinfer/backends/_autoload.py
+++ b/src/polyinfer/backends/_autoload.py
@@ -227,7 +227,7 @@ def supported_devices(self) -> list[str]:
         def version(self) -> str:
             try:
                 self._ensure_loaded()
-                return self._real_backend.version
+                return str(self._real_backend.version)
             except Exception:
                 return "not loaded"
 
@@ -321,7 +321,7 @@ def supported_devices(self) -> list[str]:
         def version(self) -> str:
             try:
                 self._ensure_loaded()
-                return self._real_backend.version
+                return str(self._real_backend.version)
             except Exception:
                 return "not loaded"
 
@@ -354,5 +354,5 @@ def load(self, model_path: str, device: str = "cpu", **kwargs):
 
     # TODO: Narrow exception suppression to specific types once register_backend()
     #   error conditions are documented.
-    with contextlib.supress(Exception):
+    with contextlib.suppress(Exception):
         register_backend("onnxruntime", LazyONNXRuntimeBackend)
diff --git a/src/polyinfer/backends/base.py b/src/polyinfer/backends/base.py
index a5ca904..02af84e 100644
--- a/src/polyinfer/backends/base.py
+++ b/src/polyinfer/backends/base.py
@@ -96,14 +96,14 @@ def benchmark(
             self(*inputs)
 
         # Benchmark
-        times = []
+        times_list: list[float] = []
         for _ in range(iterations):
             start = time.perf_counter()
             self(*inputs)
             elapsed = (time.perf_counter() - start) * 1000  # ms
-            times.append(elapsed)
+            times_list.append(elapsed)
 
-        times = np.array(times)
+        times = np.array(times_list)
         return {
             "backend": self.backend_name,
             "device": self.device,
diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py
index 09ed69b..a25b4b6 100644
--- a/src/polyinfer/backends/iree/backend.py
+++ b/src/polyinfer/backends/iree/backend.py
@@ -197,6 +197,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         inputs = tuple(np.ascontiguousarray(inp, dtype=np.float32) for inp in inputs)
 
         # Run inference
+        if self._func is None:
+            raise RuntimeError("Model function not initialized")
         outputs = self._func(*inputs)
 
         # Convert outputs to numpy
@@ -255,7 +257,7 @@ def is_available(self) -> bool:
             return False
 
         # Need compiler tools or CLI tools as fallback
-        return IREE_COMPILER_AVAILABLE or (_get_iree_import_onnx() and _get_iree_compile())
+        return IREE_COMPILER_AVAILABLE or bool(_get_iree_import_onnx() and _get_iree_compile())
 
     def load(
         self,
@@ -284,13 +286,13 @@ def load(
 
         _logger.debug(f"Loading model: {model_path}")
 
-        model_path = Path(model_path)
+        model_path_obj = Path(model_path)
         device_type = device.split(":")[0] if ":" in device else device
 
         # Determine paths
         target = DEVICE_TO_TARGET.get(device_type, "llvm-cpu")
         cache_dir = Path(kwargs.get("cache_dir", "."))
-        vmfb_path = cache_dir / f"{model_path.stem}_{target}.vmfb"
+        vmfb_path = cache_dir / f"{model_path_obj.stem}_{target}.vmfb"
 
         _logger.debug(f"Target: {target}, cache path: {vmfb_path}")
 
@@ -304,9 +306,9 @@ def load(
         if not IREE_COMPILER_AVAILABLE:
             # Try using CLI tools
             _logger.debug("Using CLI tools for compilation")
-            vmfb_path = self._compile_with_cli(model_path, target, vmfb_path, **kwargs)
+            vmfb_path = self._compile_with_cli(model_path_obj, target, vmfb_path, **kwargs)
         else:
-            vmfb_path = self._compile_with_api(model_path, target, vmfb_path, **kwargs)
+            vmfb_path = self._compile_with_api(model_path_obj, target, vmfb_path, **kwargs)
 
         _logger.info(f"Compilation complete: {vmfb_path}")
         return self._load_vmfb(vmfb_path, device)
@@ -346,15 +348,15 @@ def emit_mlir(
             module @model {
               func.func @main_graph(...
         """
-        model_path = Path(model_path)
+        model_path_obj = Path(model_path)
 
-        if not model_path.exists():
-            raise FileNotFoundError(f"Model not found: {model_path}")
+        if not model_path_obj.exists():
+            raise FileNotFoundError(f"Model not found: {model_path_obj}")
 
         # Determine output path
-        output_path = model_path.with_suffix(".mlir") if output_path is None else Path(output_path)
+        output_path_obj = model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path)
 
-        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path_obj.parent.mkdir(parents=True, exist_ok=True)
 
         # Find IREE import tool
         iree_import = _get_iree_import_onnx()
@@ -366,10 +368,10 @@ def emit_mlir(
             )
 
         # Convert ONNX to MLIR
-        _logger.debug(f"Converting ONNX to MLIR: {model_path} -> {output_path}")
+        _logger.debug(f"Converting ONNX to MLIR: {model_path_obj} -> {output_path_obj}")
         try:
             subprocess.run(
-                [iree_import, str(model_path), "-o", str(output_path)],
+                [iree_import, str(model_path_obj), "-o", str(output_path_obj)],
                 check=True,
                 capture_output=True,
                 text=True,
@@ -383,12 +385,12 @@ def emit_mlir(
         # Load content if requested
         content = None
         if load_content:
-            content = output_path.read_text()
+            content = output_path_obj.read_text()
 
         return MLIROutput(
-            path=output_path,
+            path=output_path_obj,
             content=content,
-            source_model=model_path,
+            source_model=model_path_obj,
             dialect="iree",
         )
 
diff --git a/src/polyinfer/backends/onnxruntime/backend.py b/src/polyinfer/backends/onnxruntime/backend.py
index a0c89ed..6ba7eec 100644
--- a/src/polyinfer/backends/onnxruntime/backend.py
+++ b/src/polyinfer/backends/onnxruntime/backend.py
@@ -87,7 +87,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         outputs = self._session.run(None, input_dict)
 
         if len(outputs) == 1:
-            return outputs[0]
+            result: np.ndarray = outputs[0]
+            return result
         return tuple(outputs)
 
     def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
@@ -188,7 +189,7 @@ def supported_devices(self) -> list[str]:
     @property
     def version(self) -> str:
         if ONNXRUNTIME_AVAILABLE:
-            return ort.__version__
+            return str(ort.__version__)
         return "not installed"
 
     @property
@@ -203,7 +204,7 @@ def get_available_providers(self) -> list[str]:
         """Get list of available execution providers."""
         if not ONNXRUNTIME_AVAILABLE:
             return []
-        return ort.get_available_providers()
+        return list(ort.get_available_providers())
 
     def load(
         self,
diff --git a/src/polyinfer/backends/openvino/backend.py b/src/polyinfer/backends/openvino/backend.py
index c0300bc..d88f463 100644
--- a/src/polyinfer/backends/openvino/backend.py
+++ b/src/polyinfer/backends/openvino/backend.py
@@ -80,11 +80,11 @@ def output_names(self) -> list[str]:
 
     @property
     def input_shapes(self) -> list[tuple]:
-        return self._input_shapes
+        return [tuple(s) for s in self._input_shapes]
 
     @property
     def output_shapes(self) -> list[tuple]:
-        return self._output_shapes
+        return [tuple(s) for s in self._output_shapes]
 
     def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         """Run inference."""
@@ -103,7 +103,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
             outputs.append(output_tensor.data.copy())
 
         if len(outputs) == 1:
-            return outputs[0]
+            result: np.ndarray = outputs[0]
+            return result
         return tuple(outputs)
 
     def run(self, inputs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
@@ -167,7 +168,7 @@ def supported_devices(self) -> list[str]:
     @property
     def version(self) -> str:
         if OPENVINO_AVAILABLE:
-            return ov.__version__
+            return str(ov.__version__)
         return "not installed"
 
     @property
@@ -182,7 +183,7 @@ def get_available_devices(self) -> list[str]:
         """Get raw OpenVINO device names."""
         if not OPENVINO_AVAILABLE:
             return []
-        return self.core.available_devices
+        return list(self.core.available_devices)
 
     def load(
         self,
diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py
index 65a7f89..fd836be 100644
--- a/src/polyinfer/backends/tensorrt/backend.py
+++ b/src/polyinfer/backends/tensorrt/backend.py
@@ -76,10 +76,10 @@ def __init__(
 
         # For static shapes, pre-allocate GPU buffers
         # For dynamic shapes, allocate lazily on first inference
-        self._d_inputs = {}
-        self._d_outputs = {}
-        self._h_outputs = {}
-        self._allocated_shapes = {}  # Track allocated buffer shapes
+        self._d_inputs: dict[str, int] = {}
+        self._d_outputs: dict[str, int] = {}
+        self._h_outputs: dict[str, np.ndarray] = {}
+        self._allocated_shapes: dict[str, tuple[int, ...]] = {}  # Track allocated buffer shapes
 
         if not self._has_dynamic_shapes:
             self._allocate_buffers()
@@ -108,7 +108,7 @@ def input_shapes(self) -> list[tuple]:
     def output_shapes(self) -> list[tuple]:
         return self._output_shapes
 
-    def _allocate_buffers(self, input_shapes: dict[str, tuple] = None):
+    def _allocate_buffers(self, input_shapes: dict[str, tuple] | None = None):
         """Allocate GPU buffers for inputs and outputs.
 
         For dynamic shapes, input_shapes must be provided to determine output shapes.
@@ -250,7 +250,7 @@ def supported_devices(self) -> list[str]:
     @property
     def version(self) -> str:
         if TENSORRT_AVAILABLE:
-            return trt.__version__
+            return str(trt.__version__)
         return "not installed"
 
     @property
@@ -337,9 +337,9 @@ def load(
         _logger.debug(f"Using CUDA device: {device_id}")
 
         # Check for cached engine
-        model_path = Path(model_path)
+        model_path_obj = Path(model_path)
         cache_path = kwargs.get("cache_path")
-        cache_path = model_path.with_suffix(".engine") if cache_path is None else Path(cache_path)
+        cache_path = model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path)
 
         # Try to load cached engine (unless force_rebuild)
         if cache_path.exists() and not kwargs.get("force_rebuild", False):
@@ -348,7 +348,7 @@ def load(
 
         # Build engine from ONNX with full options
         _logger.info("Building TensorRT engine from ONNX (this may take a while)...")
-        engine = self._build_engine(model_path, **kwargs)
+        engine = self._build_engine(model_path_obj, **kwargs)
 
         # Cache the engine
         _logger.debug(f"Saving engine to: {cache_path}")
diff --git a/src/polyinfer/compare.py b/src/polyinfer/compare.py
index 28186b9..1eafc88 100644
--- a/src/polyinfer/compare.py
+++ b/src/polyinfer/compare.py
@@ -44,14 +44,14 @@ def benchmark(
             model(*inputs)
 
         # Benchmark
-        times = []
+        times_list: list[float] = []
         for _ in range(iterations):
             start = time.perf_counter()
             model(*inputs)
             elapsed = (time.perf_counter() - start) * 1000
-            times.append(elapsed)
+            times_list.append(elapsed)
 
-        times = np.array(times)
+        times = np.array(times_list)
         return {
             "backend": backend,
             "device": device,
diff --git a/src/polyinfer/discovery.py b/src/polyinfer/discovery.py
index a6ff539..1a600fe 100644
--- a/src/polyinfer/discovery.py
+++ b/src/polyinfer/discovery.py
@@ -90,7 +90,7 @@ def list_devices() -> list[DeviceInfo]:
         cpu (cpu) - backends: [onnxruntime, openvino]
         cuda:0 (cuda) - backends: [onnxruntime]
     """
-    devices = {}
+    devices: dict[str, dict[str, list[str] | str]] = {}
 
     # Collect devices from all backends
     for backend_name in _list_backends(available_only=True):
@@ -102,18 +102,22 @@ def list_devices() -> list[DeviceInfo]:
                         "backends": [],
                         "type": device.split(":")[0] if ":" in device else device,
                     }
-                devices[device]["backends"].append(backend_name)
+                backends_list = devices[device]["backends"]
+                if isinstance(backends_list, list):
+                    backends_list.append(backend_name)
         except Exception:
             continue
 
     # Build DeviceInfo list
-    result = []
+    result: list[DeviceInfo] = []
     for name, info in sorted(devices.items()):
+        device_type = info["type"]
+        backends = info["backends"]
         result.append(
             DeviceInfo(
                 name=name,
-                device_type=info["type"],
-                backends=info["backends"],
+                device_type=str(device_type),
+                backends=list(backends) if isinstance(backends, list) else [backends],
             )
         )
 
@@ -178,29 +182,34 @@ def system_info() -> dict:
     }
 
     # Backend info
+    backends_dict = info["backends"]
     for name in _list_backends(available_only=False):
         try:
             backend = _get_backend(name)
-            info["backends"][name] = {
-                "available": backend.is_available(),
-                "version": backend.version,
-                "devices": backend.supported_devices,
-                "priority": backend.priority,
-            }
+            if isinstance(backends_dict, dict):
+                backends_dict[name] = {
+                    "available": backend.is_available(),
+                    "version": backend.version,
+                    "devices": backend.supported_devices,
+                    "priority": backend.priority,
+                }
         except Exception as e:
-            info["backends"][name] = {
-                "available": False,
-                "error": str(e),
-            }
+            if isinstance(backends_dict, dict):
+                backends_dict[name] = {
+                    "available": False,
+                    "error": str(e),
+                }
 
     # Device info
+    devices_list = info["devices"]
     for device in list_devices():
-        info["devices"].append(
-            {
-                "name": device.name,
-                "type": device.device_type,
-                "backends": device.backends,
-            }
-        )
+        if isinstance(devices_list, list):
+            devices_list.append(
+                {
+                    "name": device.name,
+                    "type": device.device_type,
+                    "backends": device.backends,
+                }
+            )
 
     return info
diff --git a/src/polyinfer/nvidia_setup.py b/src/polyinfer/nvidia_setup.py
index cc7bc90..e603556 100644
--- a/src/polyinfer/nvidia_setup.py
+++ b/src/polyinfer/nvidia_setup.py
@@ -439,7 +439,7 @@ def get_nvidia_info() -> dict:
         Dictionary with information about found NVIDIA packages and libraries.
     """
     site_packages = _get_site_packages()
-    info = {
+    info: dict = {
         "site_packages": str(site_packages),
         "library_directories": [],
         "libraries": {},
@@ -502,17 +502,17 @@ def fix_onnxruntime_conflict(prefer: str = "cuda") -> bool:
     import subprocess
 
     try:
-        import importlib.metadata as metadata
+        import importlib.metadata as pkg_metadata
     except ImportError:
-        import importlib_metadata as metadata
+        import importlib_metadata as pkg_metadata  # type: ignore
 
     # Check which variants are installed
-    installed = []
+    installed: list[str] = []
     for pkg in ["onnxruntime", "onnxruntime-gpu", "onnxruntime-directml"]:
         try:
-            metadata.version(pkg)
+            pkg_metadata.version(pkg)
             installed.append(pkg)
-        except metadata.PackageNotFoundError:
+        except pkg_metadata.PackageNotFoundError:
             pass
 
     if len(installed) <= 1:
diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py
index c57fb78..dca5128 100644
--- a/src/polyinfer/quantization.py
+++ b/src/polyinfer/quantization.py
@@ -268,6 +268,8 @@ def _quantize_onnxruntime(
         ort_calib_method = calib_method_map.get(config.calibration_method, ORTCalibMethod.MinMax)
 
         # Create calibration data reader
+        if calibration_data is None:
+            raise ValueError("calibration_data is required for static quantization")
         data_reader = _create_ort_calibration_reader(model_input, calibration_data, num_samples)
 
         quantize_static(
@@ -368,6 +370,9 @@ def get_next(self) -> dict[str, np.ndarray] | None:
         if self._count >= self.num_samples:
             return None
 
+        if self._data_iter is None:
+            return None
+
         try:
             batch = next(self._data_iter)
             self._count += 1
@@ -375,7 +380,9 @@ def get_next(self) -> dict[str, np.ndarray] | None:
             # Handle single array input
             if isinstance(batch, np.ndarray):
                 return {self._input_names[0]: batch}
-            return batch
+            # Ensure we return the correct type
+            result: dict[str, np.ndarray] = batch
+            return result
         except StopIteration:
             return None
 
@@ -480,40 +487,43 @@ def _quantize_openvino(
 
 def _create_nncf_dataset(model, data: CalibrationData, num_samples: int):
     """Create NNCF Dataset from calibration data."""
+    from typing import Any
+
     import nncf
 
     # Get input names
     input_names = [inp.any_name for inp in model.inputs]
 
-    # Normalize data
+    # Normalize data to a list
+    normalized_data: Any = data
     if callable(data) and not isinstance(data, (list, Iterator)):
-        data = data()
+        normalized_data = data()
 
-    if isinstance(data, list) and len(data) > 0 and isinstance(data[0], np.ndarray):
+    if isinstance(normalized_data, list) and len(normalized_data) > 0 and isinstance(normalized_data[0], np.ndarray):
         # List of arrays
         if len(input_names) != 1:
             raise ValueError(
                 f"Model has {len(input_names)} inputs, but calibration data is a list of arrays."
             )
-        data = [{input_names[0]: arr} for arr in data]
+        normalized_data = [{input_names[0]: arr} for arr in normalized_data]
 
     # Convert to list if iterator
-    if not isinstance(data, list):
-        data = list(data)
+    if not isinstance(normalized_data, list):
+        normalized_data = list(normalized_data)
 
     # Limit samples
-    data = data[:num_samples]
+    data_list: list[Any] = normalized_data[:num_samples]
 
     # Transform function for NNCF
-    def transform_fn(data_item):
+    def transform_fn(data_item: Any) -> tuple[Any, ...]:
         if isinstance(data_item, dict):
             # Return as tuple of arrays in input order
             return tuple(data_item[name] for name in input_names)
         elif isinstance(data_item, np.ndarray):
             return (data_item,)
-        return data_item
+        return (data_item,)
 
-    return nncf.Dataset(data, transform_fn)
+    return nncf.Dataset(data_list, transform_fn)
 
 
 def quantize_for_tensorrt(

From 830cd2e688bb29991251df52895cf661479f880a Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:11:18 -0500
Subject: [PATCH 4/8] Format for satisfying ruff

---
 src/polyinfer/_logging.py                  | 4 +++-
 src/polyinfer/backends/iree/backend.py     | 4 +++-
 src/polyinfer/backends/tensorrt/backend.py | 4 +++-
 src/polyinfer/quantization.py              | 6 +++++-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/polyinfer/_logging.py b/src/polyinfer/_logging.py
index 5ff28b7..d10bc50 100644
--- a/src/polyinfer/_logging.py
+++ b/src/polyinfer/_logging.py
@@ -237,7 +237,9 @@ def _log_backend_init(name: str, version: str, devices: list):
     logger.debug(f"  Supported devices: {devices}")
 
 
-def _log_inference(backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None):
+def _log_inference(
+    backend: str, input_shapes: list, output_shapes: list, time_ms: float | None = None
+):
     """Log inference operation."""
     logger = get_logger("inference")
     if time_ms is not None:
diff --git a/src/polyinfer/backends/iree/backend.py b/src/polyinfer/backends/iree/backend.py
index a25b4b6..79b70c4 100644
--- a/src/polyinfer/backends/iree/backend.py
+++ b/src/polyinfer/backends/iree/backend.py
@@ -354,7 +354,9 @@ def emit_mlir(
             raise FileNotFoundError(f"Model not found: {model_path_obj}")
 
         # Determine output path
-        output_path_obj = model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path)
+        output_path_obj = (
+            model_path_obj.with_suffix(".mlir") if output_path is None else Path(output_path)
+        )
 
         output_path_obj.parent.mkdir(parents=True, exist_ok=True)
 
diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py
index fd836be..16a59b8 100644
--- a/src/polyinfer/backends/tensorrt/backend.py
+++ b/src/polyinfer/backends/tensorrt/backend.py
@@ -339,7 +339,9 @@ def load(
         # Check for cached engine
         model_path_obj = Path(model_path)
         cache_path = kwargs.get("cache_path")
-        cache_path = model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path)
+        cache_path = (
+            model_path_obj.with_suffix(".engine") if cache_path is None else Path(cache_path)
+        )
 
         # Try to load cached engine (unless force_rebuild)
         if cache_path.exists() and not kwargs.get("force_rebuild", False):
diff --git a/src/polyinfer/quantization.py b/src/polyinfer/quantization.py
index dca5128..881a74d 100644
--- a/src/polyinfer/quantization.py
+++ b/src/polyinfer/quantization.py
@@ -499,7 +499,11 @@ def _create_nncf_dataset(model, data: CalibrationData, num_samples: int):
     if callable(data) and not isinstance(data, (list, Iterator)):
         normalized_data = data()
 
-    if isinstance(normalized_data, list) and len(normalized_data) > 0 and isinstance(normalized_data[0], np.ndarray):
+    if (
+        isinstance(normalized_data, list)
+        and len(normalized_data) > 0
+        and isinstance(normalized_data[0], np.ndarray)
+    ):
         # List of arrays
         if len(input_names) != 1:
             raise ValueError(

From 4caa6e966799fbfe0589cc22d81e21c535673a44 Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:14:58 -0500
Subject: [PATCH 5/8] Fix mypy

---
 src/polyinfer/backends/tensorrt/backend.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/polyinfer/backends/tensorrt/backend.py b/src/polyinfer/backends/tensorrt/backend.py
index 16a59b8..b2e37bb 100644
--- a/src/polyinfer/backends/tensorrt/backend.py
+++ b/src/polyinfer/backends/tensorrt/backend.py
@@ -211,7 +211,8 @@ def __call__(self, *inputs: np.ndarray) -> np.ndarray | tuple[np.ndarray, ...]:
         cudart.cudaStreamSynchronize(self._stream)
 
         if len(outputs) == 1:
-            return outputs[0]
+            result: np.ndarray = outputs[0]
+            return result
         return tuple(outputs)
 
     def __del__(self):

From 99763d24fe8f5a3c05a8c626d8954c0ef543ea00 Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:20:38 -0500
Subject: [PATCH 6/8] Fix intel devices test for CI

---
 tests/test_intel_devices.py | 215 +++++++++++++++++++++++-------------
 1 file changed, 137 insertions(+), 78 deletions(-)

diff --git a/tests/test_intel_devices.py b/tests/test_intel_devices.py
index 5510b50..186e06a 100644
--- a/tests/test_intel_devices.py
+++ b/tests/test_intel_devices.py
@@ -1,88 +1,147 @@
-"""Test polyinfer with Intel devices (CPU, iGPU, NPU)."""
+"""Test polyinfer with Intel devices (CPU, iGPU, NPU).
+
+These tests require Intel hardware and OpenVINO to be properly configured.
+They are marked with intel_gpu or npu markers and will be skipped in CI.
+"""
 
 import os
 import sys
 
-sys.path.insert(0, "src")
-
 import numpy as np
+import pytest
+
+sys.path.insert(0, "src")
 
 import polyinfer as pi
-from polyinfer.backends.openvino import OpenVINOBackend
-
-# Check what's available
-print("=" * 60)
-print("PolyInfer: Intel Device Test")
-print("=" * 60)
-
-print("\nAvailable backends:", pi.list_backends())
-print("Available devices:", pi.list_devices())
-
-# Get OpenVINO backend directly to see raw device names
-ov_backend = OpenVINOBackend()
-print("\nOpenVINO raw devices:", ov_backend.get_available_devices())
-
-# Test model path, use YOLOv8n if available
-model_path = None
-for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx"]:
-    if os.path.exists(path):
-        model_path = path
-        break
-
-if model_path is None:
-    print("\nNo test model found. Downloading yolov8n.onnx...")
-    try:
-        from ultralytics import YOLO
-
-        model = YOLO("yolov8n.pt")
-        model.export(format="onnx")
-        model_path = "yolov8n.onnx"
-    except ImportError:
-        print(
-            "Please provide a model: pip install ultralytics && yolo export model=yolov8n.pt format=onnx"
-        )
-        sys.exit(1)
-
-print(f"\nUsing model: {model_path}")
-
-# Create test input (YOLOv8n expects 1x3x640x640)
-input_data = np.random.rand(1, 3, 640, 640).astype(np.float32)
-
-# Test each device
-devices_to_test = [
-    ("cpu", "CPU (Intel Core Ultra 9)"),
-    ("intel-gpu", "Intel iGPU"),
-    ("intel-gpu:0", "Intel iGPU (explicit)"),
-    ("npu", "Intel NPU (AI Boost)"),
+
+# Check if OpenVINO is available
+try:
+    from polyinfer.backends.openvino import OpenVINOBackend
+
+    OPENVINO_AVAILABLE = True
+    ov_backend = OpenVINOBackend()
+    AVAILABLE_DEVICES = ov_backend.get_available_devices()
+except ImportError:
+    OPENVINO_AVAILABLE = False
+    AVAILABLE_DEVICES = []
+
+
+def _get_test_model():
+    """Get a test model path, or None if not available."""
+    for path in ["yolov8n.onnx", "examples/yolov8n.onnx", "../yolov8n.onnx", "tests/yolov8n.onnx"]:
+        if os.path.exists(path):
+            return path
+    return None
+
+
+# Skip all tests if no model available or OpenVINO not installed
+pytestmark = [
+    pytest.mark.skipif(not OPENVINO_AVAILABLE, reason="OpenVINO not available"),
 ]
 
-print("\n" + "=" * 60)
-print("Running benchmarks...")
-print("=" * 60)
-
-results = []
-for device, description in devices_to_test:
-    try:
-        print(f"\n[{device}] {description}")
-        model = pi.load(model_path, backend="openvino", device=device)
-        print(f"  Backend: {model.backend_name}")
-
-        # Benchmark
-        bench = model.benchmark(input_data, warmup=5, iterations=20)
-        print(f"  Latency: {bench['mean_ms']:.2f} ms ({bench['fps']:.1f} FPS)")
-        results.append((device, description, bench["mean_ms"], bench["fps"]))
-    except Exception as e:
-        print(f"  ERROR: {e}")
-        results.append((device, description, None, None))
-
-# Summary
-print("\n" + "=" * 60)
-print("Summary")
-print("=" * 60)
-print(f"{'Device':<20} {'Description':<30} {'Latency':>10} {'FPS':>10}")
-print("-" * 70)
-for device, desc, latency, fps in results:
-    if latency:
-        print(f"{device:<20} {desc:<30} {latency:>8.2f}ms {fps:>9.1f}")
+
+@pytest.fixture
+def test_model():
+    """Fixture providing a test model path."""
+    model_path = _get_test_model()
+    if model_path is None:
+        pytest.skip("No test model available (yolov8n.onnx)")
+    return model_path
+
+
+@pytest.fixture
+def test_input():
+    """Fixture providing test input data for YOLOv8n (1x3x640x640)."""
+    return np.random.rand(1, 3, 640, 640).astype(np.float32)
+
+
+class TestIntelCPU:
+    """Tests for Intel CPU inference."""
+
+    def test_cpu_inference(self, test_model, test_input):
+        """Test inference on CPU."""
+        model = pi.load(test_model, backend="openvino", device="cpu")
+        assert model.backend_name == "openvino"
+        output = model(test_input)
+        assert output is not None
+
+    def test_cpu_benchmark(self, test_model, test_input):
+        """Test benchmarking on CPU."""
+        model = pi.load(test_model, backend="openvino", device="cpu")
+        bench = model.benchmark(test_input, warmup=2, iterations=5)
+        assert "mean_ms" in bench
+        assert "fps" in bench
+        assert bench["mean_ms"] > 0
+
+
+@pytest.mark.intel_gpu
+class TestIntelGPU:
+    """Tests for Intel iGPU inference."""
+
+    @pytest.fixture(autouse=True)
+    def check_gpu_available(self):
+        """Skip if Intel GPU not available."""
+        if "GPU" not in AVAILABLE_DEVICES and "GPU.0" not in AVAILABLE_DEVICES:
+            pytest.skip("Intel GPU not available")
+
+    def test_igpu_inference(self, test_model, test_input):
+        """Test inference on Intel iGPU."""
+        model = pi.load(test_model, backend="openvino", device="intel-gpu")
+        assert model.backend_name == "openvino"
+        output = model(test_input)
+        assert output is not None
+
+    def test_igpu_benchmark(self, test_model, test_input):
+        """Test benchmarking on Intel iGPU."""
+        model = pi.load(test_model, backend="openvino", device="intel-gpu")
+        bench = model.benchmark(test_input, warmup=2, iterations=5)
+        assert "mean_ms" in bench
+        assert "fps" in bench
+        assert bench["mean_ms"] > 0
+
+
+@pytest.mark.npu
+class TestIntelNPU:
+    """Tests for Intel NPU (AI Boost) inference."""
+
+    @pytest.fixture(autouse=True)
+    def check_npu_available(self):
+        """Skip if Intel NPU not available."""
+        if "NPU" not in AVAILABLE_DEVICES:
+            pytest.skip("Intel NPU not available")
+
+    def test_npu_inference(self, test_model, test_input):
+        """Test inference on Intel NPU."""
+        model = pi.load(test_model, backend="openvino", device="npu")
+        assert model.backend_name == "openvino"
+        output = model(test_input)
+        assert output is not None
+
+    def test_npu_benchmark(self, test_model, test_input):
+        """Test benchmarking on Intel NPU."""
+        model = pi.load(test_model, backend="openvino", device="npu")
+        bench = model.benchmark(test_input, warmup=2, iterations=5)
+        assert "mean_ms" in bench
+        assert "fps" in bench
+        assert bench["mean_ms"] > 0
+
+
+if __name__ == "__main__":
+    # When run as a script, print device info
+    print("=" * 60)
+    print("PolyInfer: Intel Device Test")
+    print("=" * 60)
+
+    print("\nAvailable backends:", pi.list_backends())
+    print("Available devices:", pi.list_devices())
+
+    if OPENVINO_AVAILABLE:
+        print("\nOpenVINO raw devices:", AVAILABLE_DEVICES)
+    else:
+        print("\nOpenVINO not available")
+
+    model_path = _get_test_model()
+    if model_path:
+        print(f"\nTest model found: {model_path}")
     else:
-        print(f"{device:<20} {desc:<30} {'FAILED':>10} {'-':>10}")
+        print("\nNo test model found. Please provide yolov8n.onnx")

From dcf385b10cb2bf7e7bd47056f2c503045ced359f Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:30:11 -0500
Subject: [PATCH 7/8] Add onnxruntime dep in dev dependencies

---
 pyproject.toml             |  2 ++
 tests/test_backends.py     |  9 ++++++++-
 tests/test_mlir.py         |  6 ++++++
 tests/test_quantization.py | 21 +++++++++++++++++++++
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 533672c..86d2133 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -151,6 +151,8 @@ dev = [
     "pytest-benchmark>=4.0",
     "ruff>=0.1",
     "mypy>=1.0",
+    # Include onnxruntime so basic tests can run
+    "onnxruntime>=1.17",
 ]
 
 [project.urls]
diff --git a/tests/test_backends.py b/tests/test_backends.py
index ecd6db4..fd7c025 100644
--- a/tests/test_backends.py
+++ b/tests/test_backends.py
@@ -5,6 +5,10 @@
 import polyinfer as pi
 from polyinfer.backends.registry import get_all_backends, get_backend
 
+# Check if any backend is available
+_BACKENDS = pi.list_backends()
+_HAS_ANY_BACKEND = len(_BACKENDS) > 0
+
 
 class TestBackendDiscovery:
     """Test backend discovery functionality."""
@@ -14,6 +18,7 @@ def test_list_backends_returns_list(self):
         backends = pi.list_backends()
         assert isinstance(backends, list)
 
+    @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed")
     def test_list_backends_not_empty(self):
         """At least one backend should be available."""
         backends = pi.list_backends()
@@ -24,8 +29,9 @@ def test_list_devices_returns_list(self):
         devices = pi.list_devices()
         assert isinstance(devices, list)
 
+    @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed")
     def test_cpu_always_available(self):
-        """CPU device should always be available."""
+        """CPU device should always be available when backends are installed."""
         devices = pi.list_devices()
         device_names = [d.name for d in devices]
         assert "cpu" in device_names, "CPU device not found"
@@ -128,6 +134,7 @@ def test_backends_have_priority(self):
             assert isinstance(backend.priority, int)
             assert backend.priority >= 0
 
+    @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed")
     def test_select_backend_for_cpu(self):
         """Auto-selection should work for CPU."""
         from polyinfer.discovery import select_backend
diff --git a/tests/test_mlir.py b/tests/test_mlir.py
index be48f7d..9e11eb0 100644
--- a/tests/test_mlir.py
+++ b/tests/test_mlir.py
@@ -8,6 +8,12 @@
 
 import polyinfer as pi
 
+# Check if IREE is available
+IREE_AVAILABLE = pi.is_available("iree")
+
+# Skip all tests in this module if IREE is not available
+pytestmark = pytest.mark.skipif(not IREE_AVAILABLE, reason="IREE backend not available")
+
 # =============================================================================
 # Fixtures
 # =============================================================================
diff --git a/tests/test_quantization.py b/tests/test_quantization.py
index ceb5317..7d78122 100644
--- a/tests/test_quantization.py
+++ b/tests/test_quantization.py
@@ -7,6 +7,17 @@
 
 import polyinfer as pi
 
+# Check if onnxruntime quantization is available
+try:
+    from onnxruntime.quantization import quantize_dynamic as _  # noqa: F401
+
+    ONNXRUNTIME_QUANT_AVAILABLE = True
+except ImportError:
+    ONNXRUNTIME_QUANT_AVAILABLE = False
+
+# Check if any backend can load models
+_HAS_ANY_BACKEND = len(pi.list_backends()) > 0
+
 
 class TestQuantizationAPI:
     """Test quantization API availability and basic functionality."""
@@ -46,6 +57,9 @@ def test_calibration_method_values(self):
         assert pi.CalibrationMethod.PERCENTILE.value == "percentile"
 
 
+@pytest.mark.skipif(
+    not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed"
+)
 class TestDynamicQuantization:
     """Test dynamic quantization (no calibration needed)."""
 
@@ -116,6 +130,7 @@ def test_quantize_function_dynamic(self, simple_model, tmp_path):
         assert output_path.exists()
         assert result.method == "dynamic"
 
+    @pytest.mark.skipif(not _HAS_ANY_BACKEND, reason="No backends installed")
     def test_quantized_model_loads(self, simple_model, tmp_path):
         """Test that quantized model can be loaded and run."""
         output_path = tmp_path / "model_int8.onnx"
@@ -131,6 +146,9 @@ def test_quantized_model_loads(self, simple_model, tmp_path):
         assert output.shape == (1, 3, 32, 32)
 
 
+@pytest.mark.skipif(
+    not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed"
+)
 class TestStaticQuantization:
     """Test static quantization (requires calibration)."""
 
@@ -304,6 +322,9 @@ def test_fp16_model_runs(self, simple_model, tmp_path):
         assert output is not None
 
 
+@pytest.mark.skipif(
+    not ONNXRUNTIME_QUANT_AVAILABLE, reason="onnxruntime quantization not installed"
+)
 class TestQuantizationResult:
     """Test QuantizationResult dataclass."""
 

From b06707ba24de0c3f4d258227146dae3c18232e1f Mon Sep 17 00:00:00 2001
From: Athrva <athrva98@gmail.com>
Date: Sat, 20 Dec 2025 14:33:33 -0500
Subject: [PATCH 8/8] Fix opset version in test

---
 tests/test_backend_options.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_backend_options.py b/tests/test_backend_options.py
index 80c6867..fb445a1 100644
--- a/tests/test_backend_options.py
+++ b/tests/test_backend_options.py
@@ -40,7 +40,8 @@ def dummy_onnx_model(tmp_path):
         [const_tensor],
     )
 
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    model.ir_version = 8  # Use IR version 8 for broader compatibility
     model_path = tmp_path / "test_model.onnx"
     onnx.save(model, str(model_path))