From 328030b0f6f47c7bc478fa2eb925b0a7ef9f28f5 Mon Sep 17 00:00:00 2001
From: Andy <takuyarossi77@gmail.com>
Date: Mon, 29 Jun 2026 12:49:27 +0900
Subject: [PATCH] fix: handle Arc Pro B70 and avoid brotli fetch failures

---
 src/whichllm/constants.py              |  6 +++
 src/whichllm/data/gpu.py               | 39 ++++++++++++++++++-
 src/whichllm/hardware/gpu_db.py        |  6 ++-
 src/whichllm/hardware/gpu_simulator.py | 32 +++++++++++++--
 src/whichllm/hardware/intel.py         | 54 +++++++++++++++++++++-----
 src/whichllm/models/benchmark.py       | 11 +++++-
 src/whichllm/models/fetcher.py         | 14 +++++--
 src/whichllm/models/http.py            |  1 +
 tests/test_benchmark_lookup.py         | 24 ++++++++++++
 tests/test_fetcher.py                  |  6 +++
 tests/test_gpu_db.py                   |  6 +--
 tests/test_gpu_simulator.py            |  8 ++++
 tests/test_intel_gpu.py                | 40 +++++++++++++++++++
 13 files changed, 224 insertions(+), 23 deletions(-)

diff --git a/src/whichllm/constants.py b/src/whichllm/constants.py
index 5918530..a67354d 100644
--- a/src/whichllm/constants.py
+++ b/src/whichllm/constants.py
@@ -13,8 +13,11 @@
 from whichllm.data.gpu import (
     _GiB,
     AMD_SHARED_MEMORY_APU_MARKERS,
+    CURATED_GPU_SPECS,
+    CuratedGPUSpec,
     GPU_BANDWIDTH,
     GPU_MEMORY_CLOCK_VARIANTS,
+    INTEL_PCI_DEVICE_NAMES,
     NVIDIA_COMPUTE_CAPABILITY,
     VULKAN_ONLY_GPUS,
 )
@@ -32,9 +35,12 @@
 __all__ = [
     "_GiB",
     "AMD_SHARED_MEMORY_APU_MARKERS",
+    "CURATED_GPU_SPECS",
+    "CuratedGPUSpec",
     "FRAMEWORK_OVERHEAD_BYTES",
     "GPU_BANDWIDTH",
     "GPU_MEMORY_CLOCK_VARIANTS",
+    "INTEL_PCI_DEVICE_NAMES",
     "MIN_COMPUTE_CAPABILITY_OLLAMA",
     "MIN_COMPUTE_CAPABILITY_VLLM",
     "MODEL_GENERATION_BONUS_MAX",
diff --git a/src/whichllm/data/gpu.py b/src/whichllm/data/gpu.py
index 429e613..ea87181 100644
--- a/src/whichllm/data/gpu.py
+++ b/src/whichllm/data/gpu.py
@@ -1,7 +1,22 @@
-"""GPU bandwidth, NVIDIA compute capability, and AMD shared-memory APU markers."""
+"""GPU bandwidth, VRAM, NVIDIA compute capability, and GPU markers."""
+
+from __future__ import annotations
+
+from typing import NamedTuple
 
 _GiB = 1024**3
 
+
+class CuratedGPUSpec(NamedTuple):
+    """Small curated spec for GPUs missing or ambiguous in dbgpu."""
+
+    name: str
+    vendor: str
+    vram_gb: float
+    memory_bandwidth_gbps: float
+    shared_memory: bool = False
+
+
 AMD_SHARED_MEMORY_APU_MARKERS: tuple[str, ...] = (
     "STRIX HALO",
     "STRXLGEN",
@@ -141,6 +156,9 @@
     "MI300X": 5300.0,
     "MI250X": 3276.0,
     "MI210": 1638.0,
+    # Intel discrete GPUs
+    "Arc Pro B70": 608.0,
+    "Battlemage G31": 608.0,
     # Apple Silicon (unified memory bandwidth)
     "M1 Ultra": 800.0,
     "M1 Max": 400.0,
@@ -163,6 +181,25 @@
     "M5": 153.0,
 }
 
+CURATED_GPU_SPECS: dict[str, CuratedGPUSpec] = {
+    "Arc Pro B70": CuratedGPUSpec(
+        name="Intel Arc Pro B70",
+        vendor="intel",
+        vram_gb=32.0,
+        memory_bandwidth_gbps=608.0,
+    ),
+    "Battlemage G31": CuratedGPUSpec(
+        name="Battlemage G31 [Intel Graphics]",
+        vendor="intel",
+        vram_gb=32.0,
+        memory_bandwidth_gbps=608.0,
+    ),
+}
+
+INTEL_PCI_DEVICE_NAMES: dict[str, str] = {
+    "0xe223": "Battlemage G31 [Intel Graphics]",
+}
+
 # NVIDIA GPU compute capability lookup (substring match, case-insensitive)
 NVIDIA_COMPUTE_CAPABILITY: dict[str, tuple[int, int]] = {
     # RTX 50 series (Blackwell)
diff --git a/src/whichllm/hardware/gpu_db.py b/src/whichllm/hardware/gpu_db.py
index 2ff74b9..e679615 100644
--- a/src/whichllm/hardware/gpu_db.py
+++ b/src/whichllm/hardware/gpu_db.py
@@ -93,7 +93,11 @@ def _static_bandwidth(name: str) -> float | None:
     if not name:
         return None
     if "/" not in name:
-        return _substring_bandwidth(name)
+        bandwidth = _substring_bandwidth(name)
+        if bandwidth is not None:
+            return bandwidth
+        normalized = _normalize_detected_name(name)
+        return _substring_bandwidth(normalized) if normalized != name else None
     bracket = _BRACKET_RE.search(name)
     raw = bracket.group(1) if bracket else name
     for seg in raw.split("/"):
diff --git a/src/whichllm/hardware/gpu_simulator.py b/src/whichllm/hardware/gpu_simulator.py
index 768cbca..e45ccde 100644
--- a/src/whichllm/hardware/gpu_simulator.py
+++ b/src/whichllm/hardware/gpu_simulator.py
@@ -14,7 +14,13 @@
 if TYPE_CHECKING:
     from dbgpu import GPUSpecification
 
-from whichllm.constants import AMD_SHARED_MEMORY_APU_MARKERS, GPU_BANDWIDTH, _GiB
+from whichllm.constants import (
+    AMD_SHARED_MEMORY_APU_MARKERS,
+    CURATED_GPU_SPECS,
+    GPU_BANDWIDTH,
+    CuratedGPUSpec,
+    _GiB,
+)
 from whichllm.hardware.types import GPUInfo
 
 logger = logging.getLogger(__name__)
@@ -105,6 +111,14 @@ def _lookup_static_bandwidth(name: str) -> float | None:
     return None
 
 
+def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
+    name_upper = name.upper()
+    for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
+        if key.upper() in name_upper:
+            return CURATED_GPU_SPECS[key]
+    return None
+
+
 def _normalize_gpu_name(name: str) -> str:
     """Normalize user input: 'GTX1080' → 'GTX 1080', 'RX7900XTX' → 'RX 7900 XTX'."""
     # Insert space between letters and digits
@@ -257,6 +271,7 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     _last_suggestions.clear()
 
     amd_shared_memory_apu = _is_amd_shared_memory_apu(name)
+    curated = _lookup_curated_spec(name)
 
     # Apple Silicon short-circuit: dbgpu has no Apple entries, so we check
     # first to avoid fuzzy-matching "M1" against "Rage Mobility-M1".
@@ -280,6 +295,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
         vram_bytes = int(vram_override_gb * _GiB)
     elif spec is not None and spec.memory_size_gb:
         vram_bytes = int(spec.memory_size_gb * _GiB)
+    elif curated is not None:
+        vram_bytes = int(curated.vram_gb * _GiB)
     else:
         msg = f"Unknown GPU '{name}'."
         if _last_suggestions:
@@ -292,6 +309,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     bandwidth: float | None = None
     if spec is not None and spec.memory_bandwidth_gb_s:
         bandwidth = spec.memory_bandwidth_gb_s
+    if bandwidth is None and curated is not None:
+        bandwidth = curated.memory_bandwidth_gbps
     if bandwidth is None:
         bandwidth = _lookup_static_bandwidth(name)
 
@@ -304,10 +323,17 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     vendor = "nvidia"
     if spec is not None:
         vendor = _MANUFACTURER_TO_VENDOR.get(spec.manufacturer, "nvidia")
+    elif curated is not None:
+        vendor = curated.vendor
     elif amd_shared_memory_apu:
         vendor = "amd"
 
-    display_name = spec.name if spec is not None else name
+    if spec is not None:
+        display_name = spec.name
+    elif curated is not None:
+        display_name = curated.name
+    else:
+        display_name = name
 
     return GPUInfo(
         name=f"{display_name} (simulated)",
@@ -315,6 +341,6 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
         vram_bytes=vram_bytes,
         compute_capability=compute_cap,
         memory_bandwidth_gbps=bandwidth,
-        shared_memory=amd_shared_memory_apu,
+        shared_memory=curated.shared_memory if curated else amd_shared_memory_apu,
         vram_overridden=vram_override_gb is not None,
     )
diff --git a/src/whichllm/hardware/intel.py b/src/whichllm/hardware/intel.py
index 64f97e4..127a496 100644
--- a/src/whichllm/hardware/intel.py
+++ b/src/whichllm/hardware/intel.py
@@ -6,6 +6,12 @@
 import subprocess
 from pathlib import Path
 
+from whichllm.constants import (
+    CURATED_GPU_SPECS,
+    INTEL_PCI_DEVICE_NAMES,
+    CuratedGPUSpec,
+    _GiB,
+)
 from whichllm.hardware.types import GPUInfo
 
 logger = logging.getLogger(__name__)
@@ -74,9 +80,19 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
             continue
 
         name = "Intel Integrated Graphics"
+        known_device = False
+        try:
+            device_id = (device / "device").read_text().strip().lower()
+            mapped_name = INTEL_PCI_DEVICE_NAMES.get(device_id)
+            if mapped_name:
+                name = mapped_name
+                known_device = True
+        except OSError:
+            pass
+
         try:
             product_name = (device / "product_name").read_text().strip()
-            if product_name:
+            if product_name and not known_device:
                 name = product_name
         except OSError:
             pass
@@ -87,16 +103,34 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
     return names
 
 
+def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
+    name_upper = name.upper()
+    for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
+        if key.upper() in name_upper:
+            return CURATED_GPU_SPECS[key]
+    return None
+
+
+def _gpu_info_from_name(name: str) -> GPUInfo:
+    curated = _lookup_curated_spec(name)
+    if curated is not None:
+        return GPUInfo(
+            name=name,
+            vendor=curated.vendor,
+            vram_bytes=int(curated.vram_gb * _GiB),
+            memory_bandwidth_gbps=curated.memory_bandwidth_gbps,
+            shared_memory=curated.shared_memory,
+        )
+    return GPUInfo(
+        name=name,
+        vendor="intel",
+        vram_bytes=0,
+        shared_memory=True,
+    )
+
+
 def detect_intel_gpus() -> list[GPUInfo]:
     """Detect Linux Intel iGPUs. Returns empty list on failure."""
     names = _detect_from_lspci() or _detect_from_sysfs()
 
-    return [
-        GPUInfo(
-            name=name,
-            vendor="intel",
-            vram_bytes=0,
-            shared_memory=True,
-        )
-        for name in names
-    ]
+    return [_gpu_info_from_name(name) for name in names]
diff --git a/src/whichllm/models/benchmark.py b/src/whichllm/models/benchmark.py
index 706b61b..1ab31f9 100644
--- a/src/whichllm/models/benchmark.py
+++ b/src/whichllm/models/benchmark.py
@@ -13,6 +13,7 @@
 
 import httpx
 
+from whichllm.models.http import DEFAULT_ACCEPT_ENCODING
 from whichllm.utils import _cache_dir, _current_version
 
 logger = logging.getLogger(__name__)
@@ -153,8 +154,14 @@ async def fetch_benchmark_scores() -> dict[str, float]:
         get_livebench_data,
     )
 
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-        client.headers["User-Agent"] = f"whichllm/{_current_version()}"
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        headers={
+            "Accept-Encoding": DEFAULT_ACCEPT_ENCODING,
+            "User-Agent": f"whichllm/{_current_version()}",
+        },
+    ) as client:
         leaderboard_task = asyncio.create_task(fetch_leaderboard_with_fallback(client))
         arena_task = asyncio.create_task(fetch_arena_scores(client))
         aa_task = asyncio.create_task(fetch_aa_index_scores(client))
diff --git a/src/whichllm/models/fetcher.py b/src/whichllm/models/fetcher.py
index 7c99929..a384cc4 100644
--- a/src/whichllm/models/fetcher.py
+++ b/src/whichllm/models/fetcher.py
@@ -11,7 +11,7 @@
 import httpx
 
 from whichllm.constants import QUANT_BYTES_PER_WEIGHT
-from whichllm.models.http import get_with_retries
+from whichllm.models.http import DEFAULT_ACCEPT_ENCODING, get_with_retries
 from whichllm.models.types import GGUFVariant, ModelInfo
 
 logger = logging.getLogger(__name__)
@@ -717,7 +717,11 @@ async def fetch_models(
     """Fetch popular models from HuggingFace Hub."""
     models: list[ModelInfo] = []
 
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
+    ) as client:
         # Fetch top text-generation models
         params = {
             "pipeline_tag": "text-generation",
@@ -1076,7 +1080,11 @@ async def fetch_model_published_at(model_ids: list[str]) -> dict[str, str]:
     if not unique_ids:
         return {}
 
-    async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=20.0,
+        follow_redirects=True,
+        headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
+    ) as client:
         tasks = [
             client.get(
                 _hf_api_url(f"models/{model_id}"),
diff --git a/src/whichllm/models/http.py b/src/whichllm/models/http.py
index 7459449..a71b076 100644
--- a/src/whichllm/models/http.py
+++ b/src/whichllm/models/http.py
@@ -6,6 +6,7 @@
 import httpx
 
 RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
+DEFAULT_ACCEPT_ENCODING = "gzip, deflate"
 
 
 async def get_with_retries(
diff --git a/tests/test_benchmark_lookup.py b/tests/test_benchmark_lookup.py
index 31725c1..c531b82 100644
--- a/tests/test_benchmark_lookup.py
+++ b/tests/test_benchmark_lookup.py
@@ -1,14 +1,38 @@
 """Tests for benchmark lookup direct/inherited semantics."""
 
+import asyncio
+
+import whichllm.models.benchmark_sources as benchmark_sources
 from whichllm.models.benchmark import (
     _lineage_recency_factor,
     build_line_bucket_index,
     build_score_index,
+    fetch_benchmark_scores,
     lookup_benchmark,
     lookup_benchmark_evidence,
 )
 
 
+def test_fetch_benchmark_scores_disables_brotli_accept_encoding(monkeypatch):
+    encodings: list[str] = []
+
+    async def fake_source(client):
+        encodings.append(client.headers["accept-encoding"])
+        return {}
+
+    monkeypatch.setattr(
+        benchmark_sources, "fetch_leaderboard_with_fallback", fake_source
+    )
+    monkeypatch.setattr(benchmark_sources, "fetch_arena_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_aa_index_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_aider_polyglot_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_vision_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "get_livebench_data", lambda: {})
+
+    assert asyncio.run(fetch_benchmark_scores()) == {}
+    assert set(encodings) == {"gzip, deflate"}
+
+
 def test_lookup_benchmark_model_id_match_is_direct():
     scores = {"Qwen/Qwen2.5-7B-Instruct": 70.0}
     ci, line = build_score_index(scores)
diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py
index 2d714d3..0984e03 100644
--- a/tests/test_fetcher.py
+++ b/tests/test_fetcher.py
@@ -62,9 +62,11 @@ def test_hf_api_url_rejects_endpoint_without_scheme(monkeypatch):
 def test_fetch_models_respects_hf_endpoint(monkeypatch):
     monkeypatch.setenv("HF_ENDPOINT", "https://hf-mirror.example")
     urls: list[str] = []
+    encodings: list[str] = []
 
     async def fake_get_with_retries(client, url: str, **kwargs):
         urls.append(url)
+        encodings.append(client.headers["accept-encoding"])
         request = httpx.Request("GET", url)
         if "/models/" in url:
             return httpx.Response(404, request=request)
@@ -79,14 +81,17 @@ async def fake_get_with_retries(client, url: str, **kwargs):
     assert all(url.startswith("https://hf-mirror.example/api/") for url in urls)
     assert "https://hf-mirror.example/api/models" in urls
     assert not any(url.startswith("https://huggingface.co/api/") for url in urls)
+    assert set(encodings) == {"gzip, deflate"}
 
 
 def test_fetch_model_published_at_respects_hf_endpoint(monkeypatch):
     monkeypatch.setenv("HF_ENDPOINT", "https://hf-mirror.example")
     urls: list[str] = []
+    encodings: list[str] = []
 
     async def fake_get(self, url: str, **kwargs):
         urls.append(url)
+        encodings.append(self.headers["accept-encoding"])
         return httpx.Response(
             200,
             json={"createdAt": "2026-06-22T00:00:00.000Z"},
@@ -99,6 +104,7 @@ async def fake_get(self, url: str, **kwargs):
 
     assert result == {"Qwen/Qwen3-8B": "2026-06-22T00:00:00.000Z"}
     assert urls == ["https://hf-mirror.example/api/models/Qwen/Qwen3-8B"]
+    assert encodings == ["gzip, deflate"]
 
 
 def test_normalize_param_count_for_quantized_repo_uses_size_hint():
diff --git a/tests/test_gpu_db.py b/tests/test_gpu_db.py
index 12643d2..1063091 100644
--- a/tests/test_gpu_db.py
+++ b/tests/test_gpu_db.py
@@ -86,9 +86,9 @@ def test_resolve_variant_qualifier_is_preserved():
     assert 200 < bw < 400
 
 
-def test_resolve_unknown_gpu_returns_none_not_wrong_guess():
-    # Arc Pro B70 is not in dbgpu yet: better None than a fuzzy mismatch.
-    assert resolve_detected_bandwidth("Intel(R) Arc(TM) Pro B70 Graphics") is None
+def test_resolve_arc_pro_b70_uses_curated_value():
+    assert resolve_detected_bandwidth("Intel(R) Arc(TM) Pro B70 Graphics") == 608.0
+    assert resolve_detected_bandwidth("Battlemage G31 [Intel Graphics]") == 608.0
 
 
 def test_resolve_empty_name_returns_none():
diff --git a/tests/test_gpu_simulator.py b/tests/test_gpu_simulator.py
index 8c843d7..56864af 100644
--- a/tests/test_gpu_simulator.py
+++ b/tests/test_gpu_simulator.py
@@ -99,6 +99,14 @@ def test_h100_80gb_alias(self):
         assert gpu.vendor == "nvidia"
         assert "(simulated)" in gpu.name
 
+    def test_intel_arc_pro_b70_curated_spec(self):
+        gpu = create_synthetic_gpu("Arc Pro B70")
+        assert gpu.name == "Intel Arc Pro B70 (simulated)"
+        assert gpu.vram_bytes == 32 * _GiB
+        assert gpu.vendor == "intel"
+        assert gpu.memory_bandwidth_gbps == 608.0
+        assert gpu.shared_memory is False
+
 
 class TestAppleSiliconAliases:
     @pytest.mark.parametrize(
diff --git a/tests/test_intel_gpu.py b/tests/test_intel_gpu.py
index 8bf7e03..fae340c 100644
--- a/tests/test_intel_gpu.py
+++ b/tests/test_intel_gpu.py
@@ -30,6 +30,27 @@ def fake_run(*args, **kwargs):
     assert "UHD Graphics" in gpus[0].name
 
 
+def test_detect_intel_arc_pro_b70_from_battlemage_g31_lspci(monkeypatch):
+    output = (
+        '12:00.0 "VGA compatible controller" "Intel Corporation" '
+        '"Battlemage G31 [Intel Graphics]"\n'
+    )
+
+    def fake_run(*args, **kwargs):
+        return subprocess.CompletedProcess(args[0], 0, stdout=output, stderr="")
+
+    monkeypatch.setattr(intel.subprocess, "run", fake_run)
+
+    gpus = intel.detect_intel_gpus()
+
+    assert len(gpus) == 1
+    assert gpus[0].name == "Battlemage G31 [Intel Graphics]"
+    assert gpus[0].vendor == "intel"
+    assert gpus[0].vram_bytes == 32 * 1024**3
+    assert gpus[0].memory_bandwidth_gbps == 608.0
+    assert gpus[0].shared_memory is False
+
+
 def test_detect_intel_gpu_ignores_non_display_lspci(monkeypatch):
     output = '00:00.0 "Host bridge" "Intel Corporation" "Device 4621"\n'
 
@@ -60,6 +81,25 @@ def test_detect_intel_gpu_from_sysfs_when_lspci_missing(monkeypatch, tmp_path):
     assert gpus[0].name == "Intel Integrated Graphics"
 
 
+def test_detect_intel_arc_pro_b70_from_sysfs_device_id(monkeypatch, tmp_path):
+    card = tmp_path / "card0" / "device"
+    card.mkdir(parents=True)
+    (card / "vendor").write_text("0x8086\n")
+    (card / "device").write_text("0xe223\n")
+
+    monkeypatch.setattr(intel, "_detect_from_lspci", lambda: [])
+    original_sysfs = intel._detect_from_sysfs
+    monkeypatch.setattr(intel, "_detect_from_sysfs", lambda: original_sysfs(tmp_path))
+
+    gpus = intel.detect_intel_gpus()
+
+    assert len(gpus) == 1
+    assert gpus[0].name == "Battlemage G31 [Intel Graphics]"
+    assert gpus[0].vram_bytes == 32 * 1024**3
+    assert gpus[0].memory_bandwidth_gbps == 608.0
+    assert gpus[0].shared_memory is False
+
+
 def test_display_intel_shared_memory_without_zero_kb(monkeypatch):
     from whichllm.output import _console as console_mod
     from whichllm.output import display as display_mod