Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/whichllm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
from whichllm.data.gpu import (
_GiB,
AMD_SHARED_MEMORY_APU_MARKERS,
CURATED_GPU_SPECS,
CuratedGPUSpec,
GPU_BANDWIDTH,
GPU_MEMORY_CLOCK_VARIANTS,
INTEL_PCI_DEVICE_NAMES,
NVIDIA_COMPUTE_CAPABILITY,
VULKAN_ONLY_GPUS,
)
Expand All @@ -32,9 +35,12 @@
__all__ = [
"_GiB",
"AMD_SHARED_MEMORY_APU_MARKERS",
"CURATED_GPU_SPECS",
"CuratedGPUSpec",
"FRAMEWORK_OVERHEAD_BYTES",
"GPU_BANDWIDTH",
"GPU_MEMORY_CLOCK_VARIANTS",
"INTEL_PCI_DEVICE_NAMES",
"MIN_COMPUTE_CAPABILITY_OLLAMA",
"MIN_COMPUTE_CAPABILITY_VLLM",
"MODEL_GENERATION_BONUS_MAX",
Expand Down
39 changes: 38 additions & 1 deletion src/whichllm/data/gpu.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
"""GPU bandwidth, NVIDIA compute capability, and AMD shared-memory APU markers."""
"""GPU bandwidth, VRAM, NVIDIA compute capability, and GPU markers."""

from __future__ import annotations

from typing import NamedTuple

_GiB = 1024**3


class CuratedGPUSpec(NamedTuple):
"""Small curated spec for GPUs missing or ambiguous in dbgpu."""

name: str
vendor: str
vram_gb: float
memory_bandwidth_gbps: float
shared_memory: bool = False


AMD_SHARED_MEMORY_APU_MARKERS: tuple[str, ...] = (
"STRIX HALO",
"STRXLGEN",
Expand Down Expand Up @@ -141,6 +156,9 @@
"MI300X": 5300.0,
"MI250X": 3276.0,
"MI210": 1638.0,
# Intel discrete GPUs
"Arc Pro B70": 608.0,
"Battlemage G31": 608.0,
# Apple Silicon (unified memory bandwidth)
"M1 Ultra": 800.0,
"M1 Max": 400.0,
Expand All @@ -163,6 +181,25 @@
"M5": 153.0,
}

CURATED_GPU_SPECS: dict[str, CuratedGPUSpec] = {
"Arc Pro B70": CuratedGPUSpec(
name="Intel Arc Pro B70",
vendor="intel",
vram_gb=32.0,
memory_bandwidth_gbps=608.0,
),
"Battlemage G31": CuratedGPUSpec(
name="Battlemage G31 [Intel Graphics]",
vendor="intel",
vram_gb=32.0,
memory_bandwidth_gbps=608.0,
),
}

INTEL_PCI_DEVICE_NAMES: dict[str, str] = {
"0xe223": "Battlemage G31 [Intel Graphics]",
}

# NVIDIA GPU compute capability lookup (substring match, case-insensitive)
NVIDIA_COMPUTE_CAPABILITY: dict[str, tuple[int, int]] = {
# RTX 50 series (Blackwell)
Expand Down
6 changes: 5 additions & 1 deletion src/whichllm/hardware/gpu_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ def _static_bandwidth(name: str) -> float | None:
if not name:
return None
if "/" not in name:
return _substring_bandwidth(name)
bandwidth = _substring_bandwidth(name)
if bandwidth is not None:
return bandwidth
normalized = _normalize_detected_name(name)
return _substring_bandwidth(normalized) if normalized != name else None
bracket = _BRACKET_RE.search(name)
raw = bracket.group(1) if bracket else name
for seg in raw.split("/"):
Expand Down
32 changes: 29 additions & 3 deletions src/whichllm/hardware/gpu_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
if TYPE_CHECKING:
from dbgpu import GPUSpecification

from whichllm.constants import AMD_SHARED_MEMORY_APU_MARKERS, GPU_BANDWIDTH, _GiB
from whichllm.constants import (
AMD_SHARED_MEMORY_APU_MARKERS,
CURATED_GPU_SPECS,
GPU_BANDWIDTH,
CuratedGPUSpec,
_GiB,
)
from whichllm.hardware.types import GPUInfo

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -105,6 +111,14 @@ def _lookup_static_bandwidth(name: str) -> float | None:
return None


def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
name_upper = name.upper()
for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
if key.upper() in name_upper:
return CURATED_GPU_SPECS[key]
return None


def _normalize_gpu_name(name: str) -> str:
"""Normalize user input: 'GTX1080' → 'GTX 1080', 'RX7900XTX' → 'RX 7900 XTX'."""
# Insert space between letters and digits
Expand Down Expand Up @@ -257,6 +271,7 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
_last_suggestions.clear()

amd_shared_memory_apu = _is_amd_shared_memory_apu(name)
curated = _lookup_curated_spec(name)

# Apple Silicon short-circuit: dbgpu has no Apple entries, so we check
# first to avoid fuzzy-matching "M1" against "Rage Mobility-M1".
Expand All @@ -280,6 +295,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
vram_bytes = int(vram_override_gb * _GiB)
elif spec is not None and spec.memory_size_gb:
vram_bytes = int(spec.memory_size_gb * _GiB)
elif curated is not None:
vram_bytes = int(curated.vram_gb * _GiB)
else:
msg = f"Unknown GPU '{name}'."
if _last_suggestions:
Expand All @@ -292,6 +309,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
bandwidth: float | None = None
if spec is not None and spec.memory_bandwidth_gb_s:
bandwidth = spec.memory_bandwidth_gb_s
if bandwidth is None and curated is not None:
bandwidth = curated.memory_bandwidth_gbps
if bandwidth is None:
bandwidth = _lookup_static_bandwidth(name)

Expand All @@ -304,17 +323,24 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
vendor = "nvidia"
if spec is not None:
vendor = _MANUFACTURER_TO_VENDOR.get(spec.manufacturer, "nvidia")
elif curated is not None:
vendor = curated.vendor
elif amd_shared_memory_apu:
vendor = "amd"

display_name = spec.name if spec is not None else name
if spec is not None:
display_name = spec.name
elif curated is not None:
display_name = curated.name
else:
display_name = name

return GPUInfo(
name=f"{display_name} (simulated)",
vendor=vendor,
vram_bytes=vram_bytes,
compute_capability=compute_cap,
memory_bandwidth_gbps=bandwidth,
shared_memory=amd_shared_memory_apu,
shared_memory=curated.shared_memory if curated else amd_shared_memory_apu,
vram_overridden=vram_override_gb is not None,
)
54 changes: 44 additions & 10 deletions src/whichllm/hardware/intel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import subprocess
from pathlib import Path

from whichllm.constants import (
CURATED_GPU_SPECS,
INTEL_PCI_DEVICE_NAMES,
CuratedGPUSpec,
_GiB,
)
from whichllm.hardware.types import GPUInfo

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,9 +80,19 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
continue

name = "Intel Integrated Graphics"
known_device = False
try:
device_id = (device / "device").read_text().strip().lower()
mapped_name = INTEL_PCI_DEVICE_NAMES.get(device_id)
if mapped_name:
name = mapped_name
known_device = True
except OSError:
pass

try:
product_name = (device / "product_name").read_text().strip()
if product_name:
if product_name and not known_device:
name = product_name
except OSError:
pass
Expand All @@ -87,16 +103,34 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
return names


def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
name_upper = name.upper()
for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
if key.upper() in name_upper:
return CURATED_GPU_SPECS[key]
return None


def _gpu_info_from_name(name: str) -> GPUInfo:
curated = _lookup_curated_spec(name)
if curated is not None:
return GPUInfo(
name=name,
vendor=curated.vendor,
vram_bytes=int(curated.vram_gb * _GiB),
memory_bandwidth_gbps=curated.memory_bandwidth_gbps,
shared_memory=curated.shared_memory,
)
return GPUInfo(
name=name,
vendor="intel",
vram_bytes=0,
shared_memory=True,
)


def detect_intel_gpus() -> list[GPUInfo]:
"""Detect Linux Intel iGPUs. Returns empty list on failure."""
names = _detect_from_lspci() or _detect_from_sysfs()

return [
GPUInfo(
name=name,
vendor="intel",
vram_bytes=0,
shared_memory=True,
)
for name in names
]
return [_gpu_info_from_name(name) for name in names]
11 changes: 9 additions & 2 deletions src/whichllm/models/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import httpx

from whichllm.models.http import DEFAULT_ACCEPT_ENCODING
from whichllm.utils import _cache_dir, _current_version

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -153,8 +154,14 @@ async def fetch_benchmark_scores() -> dict[str, float]:
get_livebench_data,
)

async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
client.headers["User-Agent"] = f"whichllm/{_current_version()}"
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
headers={
"Accept-Encoding": DEFAULT_ACCEPT_ENCODING,
"User-Agent": f"whichllm/{_current_version()}",
},
) as client:
leaderboard_task = asyncio.create_task(fetch_leaderboard_with_fallback(client))
arena_task = asyncio.create_task(fetch_arena_scores(client))
aa_task = asyncio.create_task(fetch_aa_index_scores(client))
Expand Down
14 changes: 11 additions & 3 deletions src/whichllm/models/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import httpx

from whichllm.constants import QUANT_BYTES_PER_WEIGHT
from whichllm.models.http import get_with_retries
from whichllm.models.http import DEFAULT_ACCEPT_ENCODING, get_with_retries
from whichllm.models.types import GGUFVariant, ModelInfo

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -717,7 +717,11 @@ async def fetch_models(
"""Fetch popular models from HuggingFace Hub."""
models: list[ModelInfo] = []

async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
) as client:
# Fetch top text-generation models
params = {
"pipeline_tag": "text-generation",
Expand Down Expand Up @@ -1076,7 +1080,11 @@ async def fetch_model_published_at(model_ids: list[str]) -> dict[str, str]:
if not unique_ids:
return {}

async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
async with httpx.AsyncClient(
timeout=20.0,
follow_redirects=True,
headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
) as client:
tasks = [
client.get(
_hf_api_url(f"models/{model_id}"),
Expand Down
1 change: 1 addition & 0 deletions src/whichllm/models/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import httpx

RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
DEFAULT_ACCEPT_ENCODING = "gzip, deflate"


async def get_with_retries(
Expand Down
24 changes: 24 additions & 0 deletions tests/test_benchmark_lookup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,38 @@
"""Tests for benchmark lookup direct/inherited semantics."""

import asyncio

import whichllm.models.benchmark_sources as benchmark_sources
from whichllm.models.benchmark import (
_lineage_recency_factor,
build_line_bucket_index,
build_score_index,
fetch_benchmark_scores,
lookup_benchmark,
lookup_benchmark_evidence,
)


def test_fetch_benchmark_scores_disables_brotli_accept_encoding(monkeypatch):
encodings: list[str] = []

async def fake_source(client):
encodings.append(client.headers["accept-encoding"])
return {}

monkeypatch.setattr(
benchmark_sources, "fetch_leaderboard_with_fallback", fake_source
)
monkeypatch.setattr(benchmark_sources, "fetch_arena_scores", fake_source)
monkeypatch.setattr(benchmark_sources, "fetch_aa_index_scores", fake_source)
monkeypatch.setattr(benchmark_sources, "fetch_aider_polyglot_scores", fake_source)
monkeypatch.setattr(benchmark_sources, "fetch_vision_scores", fake_source)
monkeypatch.setattr(benchmark_sources, "get_livebench_data", lambda: {})

assert asyncio.run(fetch_benchmark_scores()) == {}
assert set(encodings) == {"gzip, deflate"}


def test_lookup_benchmark_model_id_match_is_direct():
scores = {"Qwen/Qwen2.5-7B-Instruct": 70.0}
ci, line = build_score_index(scores)
Expand Down
Loading