Andyyyy64 · Andyyyy64 · Jun 29, 2026 · Jun 29, 2026
diff --git a/src/whichllm/constants.py b/src/whichllm/constants.py
@@ -13,8 +13,11 @@
 from whichllm.data.gpu import (
     _GiB,
     AMD_SHARED_MEMORY_APU_MARKERS,
+    CURATED_GPU_SPECS,
+    CuratedGPUSpec,
     GPU_BANDWIDTH,
     GPU_MEMORY_CLOCK_VARIANTS,
+    INTEL_PCI_DEVICE_NAMES,
     NVIDIA_COMPUTE_CAPABILITY,
     VULKAN_ONLY_GPUS,
 )
@@ -32,9 +35,12 @@
 __all__ = [
     "_GiB",
     "AMD_SHARED_MEMORY_APU_MARKERS",
+    "CURATED_GPU_SPECS",
+    "CuratedGPUSpec",
     "FRAMEWORK_OVERHEAD_BYTES",
     "GPU_BANDWIDTH",
     "GPU_MEMORY_CLOCK_VARIANTS",
+    "INTEL_PCI_DEVICE_NAMES",
     "MIN_COMPUTE_CAPABILITY_OLLAMA",
     "MIN_COMPUTE_CAPABILITY_VLLM",
     "MODEL_GENERATION_BONUS_MAX",

diff --git a/src/whichllm/data/gpu.py b/src/whichllm/data/gpu.py
@@ -1,7 +1,22 @@
-"""GPU bandwidth, NVIDIA compute capability, and AMD shared-memory APU markers."""
+"""GPU bandwidth, VRAM, NVIDIA compute capability, and GPU markers."""
+
+from __future__ import annotations
+
+from typing import NamedTuple
 
 _GiB = 1024**3
 
+
+class CuratedGPUSpec(NamedTuple):
+    """Small curated spec for GPUs missing or ambiguous in dbgpu."""
+
+    name: str
+    vendor: str
+    vram_gb: float
+    memory_bandwidth_gbps: float
+    shared_memory: bool = False
+
+
 AMD_SHARED_MEMORY_APU_MARKERS: tuple[str, ...] = (
     "STRIX HALO",
     "STRXLGEN",
@@ -141,6 +156,9 @@
     "MI300X": 5300.0,
     "MI250X": 3276.0,
     "MI210": 1638.0,
+    # Intel discrete GPUs
+    "Arc Pro B70": 608.0,
+    "Battlemage G31": 608.0,
     # Apple Silicon (unified memory bandwidth)
     "M1 Ultra": 800.0,
     "M1 Max": 400.0,
@@ -163,6 +181,25 @@
     "M5": 153.0,
 }
 
+CURATED_GPU_SPECS: dict[str, CuratedGPUSpec] = {
+    "Arc Pro B70": CuratedGPUSpec(
+        name="Intel Arc Pro B70",
+        vendor="intel",
+        vram_gb=32.0,
+        memory_bandwidth_gbps=608.0,
+    ),
+    "Battlemage G31": CuratedGPUSpec(
+        name="Battlemage G31 [Intel Graphics]",
+        vendor="intel",
+        vram_gb=32.0,
+        memory_bandwidth_gbps=608.0,
+    ),
+}
+
+INTEL_PCI_DEVICE_NAMES: dict[str, str] = {
+    "0xe223": "Battlemage G31 [Intel Graphics]",
+}
+
 # NVIDIA GPU compute capability lookup (substring match, case-insensitive)
 NVIDIA_COMPUTE_CAPABILITY: dict[str, tuple[int, int]] = {
     # RTX 50 series (Blackwell)

diff --git a/src/whichllm/hardware/gpu_db.py b/src/whichllm/hardware/gpu_db.py
@@ -93,7 +93,11 @@ def _static_bandwidth(name: str) -> float | None:
     if not name:
         return None
     if "/" not in name:
-        return _substring_bandwidth(name)
+        bandwidth = _substring_bandwidth(name)
+        if bandwidth is not None:
+            return bandwidth
+        normalized = _normalize_detected_name(name)
+        return _substring_bandwidth(normalized) if normalized != name else None
     bracket = _BRACKET_RE.search(name)
     raw = bracket.group(1) if bracket else name
     for seg in raw.split("/"):

diff --git a/src/whichllm/hardware/gpu_simulator.py b/src/whichllm/hardware/gpu_simulator.py
@@ -14,7 +14,13 @@
 if TYPE_CHECKING:
     from dbgpu import GPUSpecification
 
-from whichllm.constants import AMD_SHARED_MEMORY_APU_MARKERS, GPU_BANDWIDTH, _GiB
+from whichllm.constants import (
+    AMD_SHARED_MEMORY_APU_MARKERS,
+    CURATED_GPU_SPECS,
+    GPU_BANDWIDTH,
+    CuratedGPUSpec,
+    _GiB,
+)
 from whichllm.hardware.types import GPUInfo
 
 logger = logging.getLogger(__name__)
@@ -105,6 +111,14 @@ def _lookup_static_bandwidth(name: str) -> float | None:
     return None
 
 
+def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
+    name_upper = name.upper()
+    for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
+        if key.upper() in name_upper:
+            return CURATED_GPU_SPECS[key]
+    return None
+
+
 def _normalize_gpu_name(name: str) -> str:
     """Normalize user input: 'GTX1080' → 'GTX 1080', 'RX7900XTX' → 'RX 7900 XTX'."""
     # Insert space between letters and digits
@@ -257,6 +271,7 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     _last_suggestions.clear()
 
     amd_shared_memory_apu = _is_amd_shared_memory_apu(name)
+    curated = _lookup_curated_spec(name)
 
     # Apple Silicon short-circuit: dbgpu has no Apple entries, so we check
     # first to avoid fuzzy-matching "M1" against "Rage Mobility-M1".
@@ -280,6 +295,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
         vram_bytes = int(vram_override_gb * _GiB)
     elif spec is not None and spec.memory_size_gb:
         vram_bytes = int(spec.memory_size_gb * _GiB)
+    elif curated is not None:
+        vram_bytes = int(curated.vram_gb * _GiB)
     else:
         msg = f"Unknown GPU '{name}'."
         if _last_suggestions:
@@ -292,6 +309,8 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     bandwidth: float | None = None
     if spec is not None and spec.memory_bandwidth_gb_s:
         bandwidth = spec.memory_bandwidth_gb_s
+    if bandwidth is None and curated is not None:
+        bandwidth = curated.memory_bandwidth_gbps
     if bandwidth is None:
         bandwidth = _lookup_static_bandwidth(name)
 
@@ -304,17 +323,24 @@ def create_synthetic_gpu(name: str, vram_override_gb: float | None = None) -> GP
     vendor = "nvidia"
     if spec is not None:
         vendor = _MANUFACTURER_TO_VENDOR.get(spec.manufacturer, "nvidia")
+    elif curated is not None:
+        vendor = curated.vendor
     elif amd_shared_memory_apu:
         vendor = "amd"
 
-    display_name = spec.name if spec is not None else name
+    if spec is not None:
+        display_name = spec.name
+    elif curated is not None:
+        display_name = curated.name
+    else:
+        display_name = name
 
     return GPUInfo(
         name=f"{display_name} (simulated)",
         vendor=vendor,
         vram_bytes=vram_bytes,
         compute_capability=compute_cap,
         memory_bandwidth_gbps=bandwidth,
-        shared_memory=amd_shared_memory_apu,
+        shared_memory=curated.shared_memory if curated else amd_shared_memory_apu,
         vram_overridden=vram_override_gb is not None,
     )
diff --git a/src/whichllm/hardware/intel.py b/src/whichllm/hardware/intel.py
@@ -6,6 +6,12 @@
 import subprocess
 from pathlib import Path
 
+from whichllm.constants import (
+    CURATED_GPU_SPECS,
+    INTEL_PCI_DEVICE_NAMES,
+    CuratedGPUSpec,
+    _GiB,
+)
 from whichllm.hardware.types import GPUInfo
 
 logger = logging.getLogger(__name__)
@@ -74,9 +80,19 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
             continue
 
         name = "Intel Integrated Graphics"
+        known_device = False
+        try:
+            device_id = (device / "device").read_text().strip().lower()
+            mapped_name = INTEL_PCI_DEVICE_NAMES.get(device_id)
+            if mapped_name:
+                name = mapped_name
+                known_device = True
+        except OSError:
+            pass
+
         try:
             product_name = (device / "product_name").read_text().strip()
-            if product_name:
+            if product_name and not known_device:
                 name = product_name
         except OSError:
             pass
@@ -87,16 +103,34 @@ def _detect_from_sysfs(drm_path: Path = Path("/sys/class/drm")) -> list[str]:
     return names
 
 
+def _lookup_curated_spec(name: str) -> CuratedGPUSpec | None:
+    name_upper = name.upper()
+    for key in sorted(CURATED_GPU_SPECS, key=len, reverse=True):
+        if key.upper() in name_upper:
+            return CURATED_GPU_SPECS[key]
+    return None
+
+
+def _gpu_info_from_name(name: str) -> GPUInfo:
+    curated = _lookup_curated_spec(name)
+    if curated is not None:
+        return GPUInfo(
+            name=name,
+            vendor=curated.vendor,
+            vram_bytes=int(curated.vram_gb * _GiB),
+            memory_bandwidth_gbps=curated.memory_bandwidth_gbps,
+            shared_memory=curated.shared_memory,
+        )
+    return GPUInfo(
+        name=name,
+        vendor="intel",
+        vram_bytes=0,
+        shared_memory=True,
+    )
+
+
 def detect_intel_gpus() -> list[GPUInfo]:
     """Detect Linux Intel iGPUs. Returns empty list on failure."""
     names = _detect_from_lspci() or _detect_from_sysfs()
 
-    return [
-        GPUInfo(
-            name=name,
-            vendor="intel",
-            vram_bytes=0,
-            shared_memory=True,
-        )
-        for name in names
-    ]
+    return [_gpu_info_from_name(name) for name in names]
diff --git a/src/whichllm/models/benchmark.py b/src/whichllm/models/benchmark.py
@@ -13,6 +13,7 @@
 
 import httpx
 
+from whichllm.models.http import DEFAULT_ACCEPT_ENCODING
 from whichllm.utils import _cache_dir, _current_version
 
 logger = logging.getLogger(__name__)
@@ -153,8 +154,14 @@ async def fetch_benchmark_scores() -> dict[str, float]:
         get_livebench_data,
     )
 
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-        client.headers["User-Agent"] = f"whichllm/{_current_version()}"
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        headers={
+            "Accept-Encoding": DEFAULT_ACCEPT_ENCODING,
+            "User-Agent": f"whichllm/{_current_version()}",
+        },
+    ) as client:
         leaderboard_task = asyncio.create_task(fetch_leaderboard_with_fallback(client))
         arena_task = asyncio.create_task(fetch_arena_scores(client))
         aa_task = asyncio.create_task(fetch_aa_index_scores(client))

diff --git a/src/whichllm/models/fetcher.py b/src/whichllm/models/fetcher.py
@@ -11,7 +11,7 @@
 import httpx
 
 from whichllm.constants import QUANT_BYTES_PER_WEIGHT
-from whichllm.models.http import get_with_retries
+from whichllm.models.http import DEFAULT_ACCEPT_ENCODING, get_with_retries
 from whichllm.models.types import GGUFVariant, ModelInfo
 
 logger = logging.getLogger(__name__)
@@ -717,7 +717,11 @@ async def fetch_models(
     """Fetch popular models from HuggingFace Hub."""
     models: list[ModelInfo] = []
 
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
+    ) as client:
         # Fetch top text-generation models
         params = {
             "pipeline_tag": "text-generation",
@@ -1076,7 +1080,11 @@ async def fetch_model_published_at(model_ids: list[str]) -> dict[str, str]:
     if not unique_ids:
         return {}
 
-    async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=20.0,
+        follow_redirects=True,
+        headers={"Accept-Encoding": DEFAULT_ACCEPT_ENCODING},
+    ) as client:
         tasks = [
             client.get(
                 _hf_api_url(f"models/{model_id}"),

diff --git a/src/whichllm/models/http.py b/src/whichllm/models/http.py
@@ -6,6 +6,7 @@
 import httpx
 
 RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
+DEFAULT_ACCEPT_ENCODING = "gzip, deflate"
 
 
 async def get_with_retries(

diff --git a/tests/test_benchmark_lookup.py b/tests/test_benchmark_lookup.py
@@ -1,14 +1,38 @@
 """Tests for benchmark lookup direct/inherited semantics."""
 
+import asyncio
+
+import whichllm.models.benchmark_sources as benchmark_sources
 from whichllm.models.benchmark import (
     _lineage_recency_factor,
     build_line_bucket_index,
     build_score_index,
+    fetch_benchmark_scores,
     lookup_benchmark,
     lookup_benchmark_evidence,
 )
 
 
+def test_fetch_benchmark_scores_disables_brotli_accept_encoding(monkeypatch):
+    encodings: list[str] = []
+
+    async def fake_source(client):
+        encodings.append(client.headers["accept-encoding"])
+        return {}
+
+    monkeypatch.setattr(
+        benchmark_sources, "fetch_leaderboard_with_fallback", fake_source
+    )
+    monkeypatch.setattr(benchmark_sources, "fetch_arena_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_aa_index_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_aider_polyglot_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "fetch_vision_scores", fake_source)
+    monkeypatch.setattr(benchmark_sources, "get_livebench_data", lambda: {})
+
+    assert asyncio.run(fetch_benchmark_scores()) == {}
+    assert set(encodings) == {"gzip, deflate"}
+
+
 def test_lookup_benchmark_model_id_match_is_direct():
     scores = {"Qwen/Qwen2.5-7B-Instruct": 70.0}
     ci, line = build_score_index(scores)