From 328030b0f6f47c7bc478fa2eb925b0a7ef9f28f5 Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 29 Jun 2026 12:49:27 +0900 Subject: [PATCH] fix: handle Arc Pro B70 and avoid brotli fetch failures --- src/whichllm/constants.py | 6 +++ src/whichllm/data/gpu.py | 39 ++++++++++++++++++- src/whichllm/hardware/gpu_db.py | 6 ++- src/whichllm/hardware/gpu_simulator.py | 32 +++++++++++++-- src/whichllm/hardware/intel.py | 54 +++++++++++++++++++++----- src/whichllm/models/benchmark.py | 11 +++++- src/whichllm/models/fetcher.py | 14 +++++-- src/whichllm/models/http.py | 1 + tests/test_benchmark_lookup.py | 24 ++++++++++++ tests/test_fetcher.py | 6 +++ tests/test_gpu_db.py | 6 +-- tests/test_gpu_simulator.py | 8 ++++ tests/test_intel_gpu.py | 40 +++++++++++++++++++ 13 files changed, 224 insertions(+), 23 deletions(-) diff --git a/src/whichllm/constants.py b/src/whichllm/constants.py index 5918530..a67354d 100644 --- a/src/whichllm/constants.py +++ b/src/whichllm/constants.py @@ -13,8 +13,11 @@ from whichllm.data.gpu import ( _GiB, AMD_SHARED_MEMORY_APU_MARKERS, + CURATED_GPU_SPECS, + CuratedGPUSpec, GPU_BANDWIDTH, GPU_MEMORY_CLOCK_VARIANTS, + INTEL_PCI_DEVICE_NAMES, NVIDIA_COMPUTE_CAPABILITY, VULKAN_ONLY_GPUS, ) @@ -32,9 +35,12 @@ __all__ = [ "_GiB", "AMD_SHARED_MEMORY_APU_MARKERS", + "CURATED_GPU_SPECS", + "CuratedGPUSpec", "FRAMEWORK_OVERHEAD_BYTES", "GPU_BANDWIDTH", "GPU_MEMORY_CLOCK_VARIANTS", + "INTEL_PCI_DEVICE_NAMES", "MIN_COMPUTE_CAPABILITY_OLLAMA", "MIN_COMPUTE_CAPABILITY_VLLM", "MODEL_GENERATION_BONUS_MAX", diff --git a/src/whichllm/data/gpu.py b/src/whichllm/data/gpu.py index 429e613..ea87181 100644 --- a/src/whichllm/data/gpu.py +++ b/src/whichllm/data/gpu.py @@ -1,7 +1,22 @@ -"""GPU bandwidth, NVIDIA compute capability, and AMD shared-memory APU markers.""" +"""GPU bandwidth, VRAM, NVIDIA compute capability, and GPU markers.""" + +from __future__ import annotations + +from typing import NamedTuple _GiB = 1024**3 + +class CuratedGPUSpec(NamedTuple): + """Small curated spec for GPUs missing or ambiguous in dbgpu.""" + + name: str + vendor: str + vram_gb: float + memory_bandwidth_gbps: float + shared_memory: bool = False + + AMD_SHARED_MEMORY_APU_MARKERS: tuple[str, ...] = ( "STRIX HALO", "STRXLGEN", @@ -141,6 +156,9 @@ "MI300X": 5300.0, "MI250X": 3276.0, "MI210": 1638.0, + # Intel discrete GPUs + "Arc Pro B70": 608.0, + "Battlemage G31": 608.0, # Apple Silicon (unified memory bandwidth) "M1 Ultra": 800.0, "M1 Max": 400.0, @@ -163,6 +181,25 @@ "M5": 153.0, } +CURATED_GPU_SPECS: dict[str, CuratedGPUSpec] = { + "Arc Pro B70": CuratedGPUSpec( + name="Intel Arc Pro B70", + vendor="intel", + vram_gb=32.0, + memory_bandwidth_gbps=608.0, + ), + "Battlemage G31": CuratedGPUSpec( + name="Battlemage G31 [Intel Graphics]", + vendor="intel", + vram_gb=32.0, + memory_bandwidth_gbps=608.0, + ), +} + +INTEL_PCI_DEVICE_NAMES: dict[str, str] = { + "0xe223": "Battlemage G31 [Intel Graphics]", +} + # NVIDIA GPU compute capability lookup (substring match, case-insensitive) NVIDIA_COMPUTE_CAPABILITY: dict[str, tuple[int, int]] = { # RTX 50 series (Blackwell) diff --git a/src/whichllm/hardware/gpu_db.py b/src/whichllm/hardware/gpu_db.py index 2ff74b9..e679615 100644 --- a/src/whichllm/hardware/gpu_db.py +++ b/src/whichllm/hardware/gpu_db.py @@ -93,7 +93,11 @@ def _static_bandwidth(name: str) -> float | None: if not name: return None if "/" not in name: - return _substring_bandwidth(name) + bandwidth = _substring_bandwidth(name) + if bandwidth is not None: + return bandwidth + normalized = _normalize_detected_name(name) + return _substring_bandwidth(normalized) if normalized != name else None bracket = _BRACKET_RE.search(name) raw = bracket.group(1) if bracket else name for seg in raw.split("/"): diff --git a/src/whichllm/hardware/gpu_simulator.py b/src/whichllm/hardware/gpu_simulator.py index 768cbca..e45ccde 100644 --- a/src/whichllm/hardware/gpu_simulator.py +++ b/src/whichllm/hardware/gpu_simulator.py @@ -14,7 +14,13 @@ if TYPE_CHECKING: from dbgpu import GPUSpecification -from whichllm.constants import AMD_SHARED_MEMORY_APU_MARKERS, GPU_BANDWIDTH, _GiB +from whichllm.constants import ( + AMD_SHARED_MEMORY_APU_MARKERS, + CURATED_GPU_SPECS, + GPU_BANDWIDTH, + CuratedGPUSpec, + _GiB, +) from whichllm.hardware.types import GPUInfo logger = logging.getLogger(__name__) @@ -105,6 +111,14 @@ def _lookup_static_bandwidth(name: str) -> float | None: return None +def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None: + name_upper = name.upper() + for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True): + if key.upper() in name_upper: + return CURATED_GPU_SPECS[key] + return None + + def _normalize_gpu_name(name: str) -> str: """Normalize user input: 'GTX1080' → 'GTX 1080', 'RX7900XTX' → 'RX 7900 XTX'.""" # Insert space between letters and digits @@ -257,6 +271,7 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP _last_suggestions.clear() amd_shared_memory_apu = _is_amd_shared_memory_apu(name) + curated = _lookup_curated_spec(name) # Apple Silicon short-circuit: dbgpu has no Apple entries, so we check # first to avoid fuzzy-matching "M1" against "Rage Mobility-M1". @@ -280,6 +295,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP vram_bytes = int(vram_override_gb * _GiB) elif spec is not None and spec.memory_size_gb: vram_bytes = int(spec.memory_size_gb * _GiB) + elif curated is not None: + vram_bytes = int(curated.vram_gb * _GiB) else: msg = f"Unknown GPU '{name}'." if _last_suggestions: @@ -292,6 +309,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP bandwidth: float | None = None if spec is not None and spec.memory_bandwidth_gb_s: bandwidth = spec.memory_bandwidth_gb_s + if bandwidth is None and curated is not None: + bandwidth = curated.memory_bandwidth_gbps if bandwidth is None: bandwidth = _lookup_static_bandwidth(name) @@ -304,10 +323,17 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP vendor = "nvidia" if spec is not None: vendor = _MANUFACTURER_TO_VENDOR.get(spec.manufacturer, "nvidia") + elif curated is not None: + vendor = curated.vendor elif amd_shared_memory_apu: vendor = "amd" - display_name = spec.name if spec is not None else name + if spec is not None: + display_name = spec.name + elif curated is not None: + display_name = curated.name + else: + display_name = name return GPUInfo( name=f"{display_name} (simulated)", @@ -315,6 +341,6 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP vram_bytes=vram_bytes, compute_capability=compute_cap, memory_bandwidth_gbps=bandwidth, - shared_memory=amd_shared_memory_apu, + shared_memory=curated.shared_memory if curated else amd_shared_memory_apu, vram_overridden=vram_override_gb is not None, ) diff --git a/src/whichllm/hardware/intel.py b/src/whichllm/hardware/intel.py index 64f97e4..127a496 100644 --- a/src/whichllm/hardware/intel.py +++ b/src/whichllm/hardware/intel.py @@ -6,6 +6,12 @@ import subprocess from pathlib import Path +from whichllm.constants import ( + CURATED_GPU_SPECS, + INTEL_PCI_DEVICE_NAMES, + CuratedGPUSpec, + _GiB, +) from whichllm.hardware.types import GPUInfo logger = logging.getLogger(__name__) @@ -74,9 +80,19 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]: continue name = "Intel Integrated Graphics" + known_device = False + try: + device_id = (device / "device").read_text().strip().lower() + mapped_name = INTEL_PCI_DEVICE_NAMES.get(device_id) + if mapped_name: + name = mapped_name + known_device = True + except OSError: + pass + try: product_name = (device / "product_name").read_text().strip() - if product_name: + if product_name and not known_device: name = product_name except OSError: pass @@ -87,16 +103,34 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]: return names +def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None: + name_upper = name.upper() + for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True): + if key.upper() in name_upper: + return CURATED_GPU_SPECS[key] + return None + + +def _gpu_info_from_name(name: str) -> GPUInfo: + curated = _lookup_curated_spec(name) + if curated is not None: + return GPUInfo( + name=name, + vendor=curated.vendor, + vram_bytes=int(curated.vram_gb * _GiB), + memory_bandwidth_gbps=curated.memory_bandwidth_gbps, + shared_memory=curated.shared_memory, + ) + return GPUInfo( + name=name, + vendor="intel", + vram_bytes=0, + shared_memory=True, + ) + + def detect_intel_gpus() -> list[GPUInfo]: """Detect Linux Intel iGPUs. Returns empty list on failure.""" names = _detect_from_lspci() or _detect_from_sysfs() - return [ - GPUInfo( - name=name, - vendor="intel", - vram_bytes=0, - shared_memory=True, - ) - for name in names - ] + return [_gpu_info_from_name(name) for name in names] diff --git a/src/whichllm/models/benchmark.py b/src/whichllm/models/benchmark.py index 706b61b..1ab31f9 100644 --- a/src/whichllm/models/benchmark.py +++ b/src/whichllm/models/benchmark.py @@ -13,6 +13,7 @@ import httpx +from whichllm.models.http import DEFAULT_ACCEPT_ENCODING from whichllm.utils import _cache_dir, _current_version logger = logging.getLogger(__name__) @@ -153,8 +154,14 @@ async def fetch_benchmark_scores() -> dict[str, float]: get_livebench_data, ) - async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: - client.headers["User-Agent"] = f"whichllm/{_current_version()}" + async with httpx.AsyncClient( + timeout=30.0, + follow_redirects=True, + headers={ + "Accept-Encoding": DEFAULT_ACCEPT_ENCODING, + "User-Agent": f"whichllm/{_current_version()}", + }, + ) as client: leaderboard_task = asyncio.create_task(fetch_leaderboard_with_fallback(client)) arena_task = asyncio.create_task(fetch_arena_scores(client)) aa_task = asyncio.create_task(fetch_aa_index_scores(client)) diff --git a/src/whichllm/models/fetcher.py b/src/whichllm/models/fetcher.py index 7c99929..a384cc4 100644 --- a/src/whichllm/models/fetcher.py +++ b/src/whichllm/models/fetcher.py @@ -11,7 +11,7 @@ import httpx from whichllm.constants import QUANT_BYTES_PER_WEIGHT -from whichllm.models.http import get_with_retries +from whichllm.models.http import DEFAULT_ACCEPT_ENCODING, get_with_retries from whichllm.models.types import GGUFVariant, ModelInfo logger = logging.getLogger(__name__) @@ -717,7 +717,11 @@ async def fetch_models( """Fetch popular models from HuggingFace Hub.""" models: list[ModelInfo] = [] - async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: + async with httpx.AsyncClient( + timeout=30.0, + follow_redirects=True, + headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING}, + ) as client: # Fetch top text-generation models params = { "pipeline_tag": "text-generation", @@ -1076,7 +1080,11 @@ async def fetch_model_published_at(model_ids: list[str]) -> dict[str, str]: if not unique_ids: return {} - async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client: + async with httpx.AsyncClient( + timeout=20.0, + follow_redirects=True, + headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING}, + ) as client: tasks = [ client.get( _hf_api_url(f"models/{model_id}"), diff --git a/src/whichllm/models/http.py b/src/whichllm/models/http.py index 7459449..a71b076 100644 --- a/src/whichllm/models/http.py +++ b/src/whichllm/models/http.py @@ -6,6 +6,7 @@ import httpx RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504} +DEFAULT_ACCEPT_ENCODING = "gzip, deflate" async def get_with_retries( diff --git a/tests/test_benchmark_lookup.py b/tests/test_benchmark_lookup.py index 31725c1..c531b82 100644 --- a/tests/test_benchmark_lookup.py +++ b/tests/test_benchmark_lookup.py @@ -1,14 +1,38 @@ """Tests for benchmark lookup direct/inherited semantics.""" +import asyncio + +import whichllm.models.benchmark_sources as benchmark_sources from whichllm.models.benchmark import ( _lineage_recency_factor, build_line_bucket_index, build_score_index, + fetch_benchmark_scores, lookup_benchmark, lookup_benchmark_evidence, ) +def test_fetch_benchmark_scores_disables_brotli_accept_encoding(monkeypatch): + encodings: list[str] = [] + + async def fake_source(client): + encodings.append(client.headers["accept-encoding"]) + return {} + + monkeypatch.setattr( + benchmark_sources, "fetch_leaderboard_with_fallback", fake_source + ) + monkeypatch.setattr(benchmark_sources, "fetch_arena_scores", fake_source) + monkeypatch.setattr(benchmark_sources, "fetch_aa_index_scores", fake_source) + monkeypatch.setattr(benchmark_sources, "fetch_aider_polyglot_scores", fake_source) + monkeypatch.setattr(benchmark_sources, "fetch_vision_scores", fake_source) + monkeypatch.setattr(benchmark_sources, "get_livebench_data", lambda: {}) + + assert asyncio.run(fetch_benchmark_scores()) == {} + assert set(encodings) == {"gzip, deflate"} + + def test_lookup_benchmark_model_id_match_is_direct(): scores = {"Qwen/Qwen2.5-7B-Instruct": 70.0} ci, line = build_score_index(scores) diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index 2d714d3..0984e03 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -62,9 +62,11 @@ def test_hf_api_url_rejects_endpoint_without_scheme(monkeypatch): def test_fetch_models_respects_hf_endpoint(monkeypatch): monkeypatch.setenv("HF_ENDPOINT", "https://hf-mirror.example") urls: list[str] = [] + encodings: list[str] = [] async def fake_get_with_retries(client, url: str, **kwargs): urls.append(url) + encodings.append(client.headers["accept-encoding"]) request = httpx.Request("GET", url) if "/models/" in url: return httpx.Response(404, request=request) @@ -79,14 +81,17 @@ async def fake_get_with_retries(client, url: str, **kwargs): assert all(url.startswith("https://hf-mirror.example/api/") for url in urls) assert "https://hf-mirror.example/api/models" in urls assert not any(url.startswith("https://huggingface.co/api/") for url in urls) + assert set(encodings) == {"gzip, deflate"} def test_fetch_model_published_at_respects_hf_endpoint(monkeypatch): monkeypatch.setenv("HF_ENDPOINT", "https://hf-mirror.example") urls: list[str] = [] + encodings: list[str] = [] async def fake_get(self, url: str, **kwargs): urls.append(url) + encodings.append(self.headers["accept-encoding"]) return httpx.Response( 200, json={"createdAt": "2026-06-22T00:00:00.000Z"}, @@ -99,6 +104,7 @@ async def fake_get(self, url: str, **kwargs): assert result == {"Qwen/Qwen3-8B": "2026-06-22T00:00:00.000Z"} assert urls == ["https://hf-mirror.example/api/models/Qwen/Qwen3-8B"] + assert encodings == ["gzip, deflate"] def test_normalize_param_count_for_quantized_repo_uses_size_hint(): diff --git a/tests/test_gpu_db.py b/tests/test_gpu_db.py index 12643d2..1063091 100644 --- a/tests/test_gpu_db.py +++ b/tests/test_gpu_db.py @@ -86,9 +86,9 @@ def test_resolve_variant_qualifier_is_preserved(): assert 200 < bw < 400 -def test_resolve_unknown_gpu_returns_none_not_wrong_guess(): - # Arc Pro B70 is not in dbgpu yet: better None than a fuzzy mismatch. - assert resolve_detected_bandwidth("Intel(R) Arc(TM) Pro B70 Graphics") is None +def test_resolve_arc_pro_b70_uses_curated_value(): + assert resolve_detected_bandwidth("Intel(R) Arc(TM) Pro B70 Graphics") == 608.0 + assert resolve_detected_bandwidth("Battlemage G31 [Intel Graphics]") == 608.0 def test_resolve_empty_name_returns_none(): diff --git a/tests/test_gpu_simulator.py b/tests/test_gpu_simulator.py index 8c843d7..56864af 100644 --- a/tests/test_gpu_simulator.py +++ b/tests/test_gpu_simulator.py @@ -99,6 +99,14 @@ def test_h100_80gb_alias(self): assert gpu.vendor == "nvidia" assert "(simulated)" in gpu.name + def test_intel_arc_pro_b70_curated_spec(self): + gpu = create_synthetic_gpu("Arc Pro B70") + assert gpu.name == "Intel Arc Pro B70 (simulated)" + assert gpu.vram_bytes == 32 * _GiB + assert gpu.vendor == "intel" + assert gpu.memory_bandwidth_gbps == 608.0 + assert gpu.shared_memory is False + class TestAppleSiliconAliases: @pytest.mark.parametrize( diff --git a/tests/test_intel_gpu.py b/tests/test_intel_gpu.py index 8bf7e03..fae340c 100644 --- a/tests/test_intel_gpu.py +++ b/tests/test_intel_gpu.py @@ -30,6 +30,27 @@ def fake_run(*args, **kwargs): assert "UHD Graphics" in gpus[0].name +def test_detect_intel_arc_pro_b70_from_battlemage_g31_lspci(monkeypatch): + output = ( + '12:00.0 "VGA compatible controller" "Intel Corporation" ' + '"Battlemage G31 [Intel Graphics]"\n' + ) + + def fake_run(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout=output, stderr="") + + monkeypatch.setattr(intel.subprocess, "run", fake_run) + + gpus = intel.detect_intel_gpus() + + assert len(gpus) == 1 + assert gpus[0].name == "Battlemage G31 [Intel Graphics]" + assert gpus[0].vendor == "intel" + assert gpus[0].vram_bytes == 32 * 1024**3 + assert gpus[0].memory_bandwidth_gbps == 608.0 + assert gpus[0].shared_memory is False + + def test_detect_intel_gpu_ignores_non_display_lspci(monkeypatch): output = '00:00.0 "Host bridge" "Intel Corporation" "Device 4621"\n' @@ -60,6 +81,25 @@ def test_detect_intel_gpu_from_sysfs_when_lspci_missing(monkeypatch, tmp_path): assert gpus[0].name == "Intel Integrated Graphics" +def test_detect_intel_arc_pro_b70_from_sysfs_device_id(monkeypatch, tmp_path): + card = tmp_path / "card0" / "device" + card.mkdir(parents=True) + (card / "vendor").write_text("0x8086\n") + (card / "device").write_text("0xe223\n") + + monkeypatch.setattr(intel, "_detect_from_lspci", lambda: []) + original_sysfs = intel._detect_from_sysfs + monkeypatch.setattr(intel, "_detect_from_sysfs", lambda: original_sysfs(tmp_path)) + + gpus = intel.detect_intel_gpus() + + assert len(gpus) == 1 + assert gpus[0].name == "Battlemage G31 [Intel Graphics]" + assert gpus[0].vram_bytes == 32 * 1024**3 + assert gpus[0].memory_bandwidth_gbps == 608.0 + assert gpus[0].shared_memory is False + + def test_display_intel_shared_memory_without_zero_kb(monkeypatch): from whichllm.output import _console as console_mod from whichllm.output import display as display_mod