From 327d8900c8d7e7ed7db2ac3caf5e50f120883a6b Mon Sep 17 00:00:00 2001
From: Rach Pradhan <rachxpradhan@gmail.com>
Date: Thu, 30 Apr 2026 09:42:36 +0800
Subject: [PATCH] fix(#142): HTMLResponse content-type, noargs cache, future
 annotations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three rough edges from issue #142:

Bug 1 — HTMLResponse Content-Type didn't reach the wire
Bug 2 — noargs response cache replayed every cached body as application/json
  Both stem from the same root cause in zig/src/server.zig: the per-entry and
  StringHashMap response caches stored only the body and the cache-replay path
  hard-coded 'Content-Type: application/json'. Cache now stores
  CachedResponse{content_type, body} (new cached_ct_ptr/cached_ct_len atomics
  on HandlerEntry) and sendCachedResponse emits the original Content-Type.
  Caching is also gated on status_code == 200 since the replay path always
  emits a 200 status line.

Bug 3 — 'from __future__ import annotations' broke path-parameter binding
  PEP 563 stringifies annotations, so identity checks like 'ann is int' and
  'inspect.isclass(ann)' silently failed and path params were never bound.
  Fix: resolve annotations via typing.get_type_hints() in classify_handler,
  extract_path_params, create_enhanced_handler, create_fast_handler,
  create_fast_async_handler, and the routing.py decorator wrapper, with a
  string-name fallback for the built-in type aliases.

Tests: tests/test_issue_142.py covers all three bugs end-to-end against a
real running server. Existing test_issue_fixes / test_binary_responses /
test_perf_callnoargs_tupleabi suites still pass (70 tests). The Zig
'response cache is safe under concurrent access' unit test was updated to
read CachedResponse.body.

Closes #142

Amp-Thread-ID: https://ampcode.com/threads/T-019ddbfa-6d44-705e-b930-6daf1d41e918
Co-authored-by: Amp <amp@ampcode.com>
---
 python/turboapi/request_handler.py |  68 +++++++++++--
 python/turboapi/routing.py         |  39 ++++++--
 python/turboapi/zig_integration.py |  68 ++++++++++++-
 tests/test_issue_142.py            | 112 +++++++++++++++++++++
 zig/src/server.zig                 | 156 ++++++++++++++++++++++-------
 5 files changed, 389 insertions(+), 54 deletions(-)
 create mode 100644 tests/test_issue_142.py

diff --git a/python/turboapi/request_handler.py b/python/turboapi/request_handler.py
index 8c35480..57f6cb9 100644
--- a/python/turboapi/request_handler.py
+++ b/python/turboapi/request_handler.py
@@ -239,7 +239,8 @@ class PathParamParser:
 
     @staticmethod
     def extract_path_params(
-        route_pattern: str, actual_path: str, handler_signature: inspect.Signature | None = None
+        route_pattern: str, actual_path: str, handler_signature: inspect.Signature | None = None,
+        handler: Any | None = None,
     ) -> dict[str, Any]:
         """
         Extract path parameters from actual path using route pattern.
@@ -248,6 +249,7 @@ def extract_path_params(
             route_pattern: Route pattern with {param} placeholders (e.g., "/users/{user_id}")
             actual_path: Actual request path (e.g., "/users/123")
             handler_signature: Optional handler signature for type coercion
+            handler: Optional callable; used to resolve PEP 563 stringified annotations
 
         Returns:
             Dictionary of extracted path parameters (type-coerced if signature provided)
@@ -265,11 +267,32 @@ def extract_path_params(
 
         params = match.groupdict()
 
+        # Resolve stringified annotations so ``from __future__ import annotations``
+        # does not silently break type coercion below.
+        resolved_hints: dict[str, Any] = {}
+        if handler is not None:
+            try:
+                import typing as _typing
+
+                resolved_hints = _typing.get_type_hints(handler)
+            except Exception:
+                resolved_hints = {}
+
         # Coerce types based on handler signature annotations
         if handler_signature:
             for name, value in params.items():
                 if name in handler_signature.parameters:
-                    annotation = handler_signature.parameters[name].annotation
+                    annotation = resolved_hints.get(
+                        name, handler_signature.parameters[name].annotation
+                    )
+                    # Resolve raw string annotations to built-in types where possible.
+                    if isinstance(annotation, str):
+                        annotation = {
+                            "int": int,
+                            "float": float,
+                            "bool": bool,
+                            "str": str,
+                        }.get(annotation, annotation)
                     try:
                         if annotation is int:
                             params[name] = int(value)
@@ -785,6 +808,18 @@ def create_enhanced_handler(original_handler, route_definition):
     sig = inspect.signature(original_handler)
     is_async = inspect.iscoroutinefunction(original_handler)
 
+    # Resolve PEP 563 stringified annotations (``from __future__ import annotations``)
+    # so all downstream identity / isclass / issubclass checks see real types.
+    try:
+        import typing as _typing
+
+        _resolved_hints = _typing.get_type_hints(original_handler)
+    except Exception:
+        _resolved_hints = {}
+
+    def _ann(pname: str, param: inspect.Parameter):
+        return _resolved_hints.get(pname, param.annotation)
+
     # Pre-compile path param regex and type converters at registration time
     import re as _re
 
@@ -795,7 +830,7 @@ def create_enhanced_handler(original_handler, route_definition):
         if "{" in rp:
             _path_pattern = _re.compile("^" + _re.sub(r"\{(\w+)\}", r"(?P<\1>[^/]+)", rp) + "$")
         for pname, param in sig.parameters.items():
-            ann = param.annotation
+            ann = _ann(pname, param)
             if ann is int:
                 _path_param_types[pname] = int
             elif ann is float:
@@ -816,10 +851,10 @@ def create_enhanced_handler(original_handler, route_definition):
     _raw_body_param_names: set[str] = set()
     _request_param_names: set[str] = set()
     for _pname, _param in sig.parameters.items():
-        _ann = _param.annotation
-        if _ann is bytes or _ann is bytearray:
+        _a = _ann(_pname, _param)
+        if _a is bytes or _a is bytearray:
             _raw_body_param_names.add(_pname)
-        elif isinstance(_ann, type) and issubclass(_ann, _TurboRequest):
+        elif isinstance(_a, type) and issubclass(_a, _TurboRequest):
             _request_param_names.add(_pname)
     _skip_json_body = bool(_raw_body_param_names or _request_param_names)
 
@@ -1304,10 +1339,19 @@ def create_fast_handler(original_handler, route_definition):
     sig = inspect.signature(original_handler)
     param_names = set(sig.parameters.keys())
 
+    # Resolve stringified annotations (``from __future__ import annotations``) so
+    # path-param type coercion below still fires on ``get(item_id: int)`` etc.
+    try:
+        import typing as _typing
+
+        _hints = _typing.get_type_hints(original_handler)
+    except Exception:
+        _hints = {}
+
     # Pre-build type converters for path params
     _converters: dict[str, type] = {}
     for pname, param in sig.parameters.items():
-        ann = param.annotation
+        ann = _hints.get(pname, param.annotation)
         if ann is int:
             _converters[pname] = int
         elif ann is float:
@@ -1427,9 +1471,17 @@ def create_fast_async_handler(original_handler, route_definition, eager: bool =
     sig = inspect.signature(original_handler)
     param_names = set(sig.parameters.keys())
 
+    # Resolve stringified annotations for ``from __future__ import annotations``.
+    try:
+        import typing as _typing
+
+        _hints = _typing.get_type_hints(original_handler)
+    except Exception:
+        _hints = {}
+
     _converters: dict[str, type] = {}
     for pname, param in sig.parameters.items():
-        ann = param.annotation
+        ann = _hints.get(pname, param.annotation)
         if ann is int:
             _converters[pname] = int
         elif ann is float:
diff --git a/python/turboapi/routing.py b/python/turboapi/routing.py
index 7c415a5..9fb6625 100644
--- a/python/turboapi/routing.py
+++ b/python/turboapi/routing.py
@@ -133,31 +133,56 @@ def decorator(
             def wrapper(func: Callable) -> Callable:
                 # Analyze function signature
                 sig = inspect.signature(func)
+
+                # Resolve PEP 563 stringified annotations (``from __future__ import
+                # annotations``) so ``inspect.isclass(...)`` and identity checks on
+                # the annotation don't silently skip path/query binding.
+                try:
+                    import typing as _typing
+
+                    resolved_hints = _typing.get_type_hints(func)
+                except Exception:
+                    resolved_hints = {}
+
+                _str_aliases = {
+                    "int": int,
+                    "float": float,
+                    "bool": bool,
+                    "str": str,
+                    "bytes": bytes,
+                }
+
+                def _resolved_ann(p_name, p):
+                    ann = resolved_hints.get(p_name, p.annotation)
+                    if isinstance(ann, str):
+                        ann = _str_aliases.get(ann, ann)
+                    return ann
+
                 path_params = []
                 query_params = {}
                 request_model = None
 
                 for param_name, param in sig.parameters.items():
+                    raw_ann = param.annotation
+                    ann = _resolved_ann(param_name, param)
                     if f"{{{param_name}}}" in path:
                         # Path parameter
                         path_param = PathParameter(
                             name=param_name,
-                            type=param.annotation
-                            if param.annotation != inspect.Parameter.empty
-                            else str,
+                            type=ann if raw_ann != inspect.Parameter.empty else str,
                             default=param.default
                             if param.default != inspect.Parameter.empty
                             else None,
                             required=param.default == inspect.Parameter.empty,
                         )
                         path_params.append(path_param)
-                    elif param.annotation != inspect.Parameter.empty:
+                    elif raw_ann != inspect.Parameter.empty:
                         # Check if it's a request model (class type)
-                        if inspect.isclass(param.annotation):
-                            request_model = param.annotation
+                        if inspect.isclass(ann):
+                            request_model = ann
                         else:
                             # Query parameter
-                            query_params[param_name] = param.annotation
+                            query_params[param_name] = ann
 
                 # Create route definition
                 full_path = self.prefix + path
diff --git a/python/turboapi/zig_integration.py b/python/turboapi/zig_integration.py
index cad0f9a..b220e74 100644
--- a/python/turboapi/zig_integration.py
+++ b/python/turboapi/zig_integration.py
@@ -8,6 +8,7 @@
 import inspect
 import json
 import os
+import typing
 from typing import Any, get_origin
 
 try:
@@ -27,6 +28,57 @@
 )
 from .version_check import CHECK_MARK, CROSS_MARK, ROCKET
 
+# ── PEP 563 helpers ──────────────────────────────────────────────────────────
+# Resolve stringified annotations produced by ``from __future__ import
+# annotations`` so identity checks like ``ann is int`` and ``inspect.isclass``
+# below keep working. See issue #142 (Bug 3).
+
+
+def _resolve_handler_hints(handler) -> dict[str, Any]:
+    """Resolve stringified annotations (PEP 563 / ``from __future__ import annotations``).
+
+    Returns a mapping of parameter name -> resolved type. Silently falls back to
+    ``inspect`` annotations (which may still be strings) if resolution fails —
+    callers that truly need the resolved type should guard identity checks with
+    :func:`_coerce_annotation`.
+    """
+    try:
+        return typing.get_type_hints(handler, include_extras=True)
+    except Exception:
+        out: dict[str, Any] = {}
+        try:
+            sig = inspect.signature(handler)
+        except (TypeError, ValueError):
+            return out
+        for name, param in sig.parameters.items():
+            if param.annotation is not inspect.Parameter.empty:
+                out[name] = param.annotation
+        return out
+
+
+_STR_TYPE_ALIASES: dict[str, Any] = {
+    "int": int,
+    "float": float,
+    "bool": bool,
+    "str": str,
+    "bytes": bytes,
+    "bytearray": bytearray,
+    "list": list,
+    "dict": dict,
+    "tuple": tuple,
+    "set": set,
+}
+
+
+def _coerce_annotation(annotation: Any) -> Any:
+    """Best-effort resolution for a single stringified annotation.
+
+    Only handles built-in type names; anything else is returned unchanged.
+    """
+    if isinstance(annotation, str):
+        return _STR_TYPE_ALIASES.get(annotation, annotation)
+    return annotation
+
 _ASYNC_YIELD_OPS = {
     "ASYNC_GEN_WRAP",
     "BEFORE_ASYNC_WITH",
@@ -135,6 +187,15 @@ def classify_handler(handler, route) -> tuple[str, dict[str, str], dict]:
     is_async = inspect.iscoroutinefunction(handler)
 
     sig = inspect.signature(handler)
+    # Resolve PEP 563 stringified annotations so identity checks below (``is int``,
+    # ``issubclass``, ``isclass`` …) work even when the handler's module uses
+    # ``from __future__ import annotations``.
+    _resolved_hints = _resolve_handler_hints(handler)
+
+    def _ann_of(p_name: str, param: inspect.Parameter) -> Any:
+        ann = _resolved_hints.get(p_name, param.annotation)
+        return _coerce_annotation(ann)
+
     param_types = {}
     needs_body = False
     has_depends = False
@@ -161,14 +222,15 @@ def classify_handler(handler, route) -> tuple[str, dict[str, str], dict]:
         from .datastructures import File, Form, UploadFile
 
         for pname, param in sig.parameters.items():
+            ann = _ann_of(pname, param)
             if isinstance(param.default, Form):
                 has_form = True
                 param_types[pname] = "form"
             elif isinstance(param.default, File):
                 has_file = True
                 param_types[pname] = "file"
-            elif param.annotation is UploadFile or (
-                isinstance(param.annotation, type) and issubclass(param.annotation, UploadFile)
+            elif ann is UploadFile or (
+                isinstance(ann, type) and issubclass(ann, UploadFile)
             ):
                 has_file = True
                 param_types[pname] = "file"
@@ -177,7 +239,7 @@ def classify_handler(handler, route) -> tuple[str, dict[str, str], dict]:
 
     has_implicit_header_params = False
     for param_name, param in sig.parameters.items():
-        annotation = param.annotation
+        annotation = _ann_of(param_name, param)
 
         if param_types.get(param_name) in ("form", "file"):
             needs_body = True
diff --git a/tests/test_issue_142.py b/tests/test_issue_142.py
new file mode 100644
index 0000000..1441fc8
--- /dev/null
+++ b/tests/test_issue_142.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Tests for issue #142: Three HTML-serving rough edges in 1.0.29
+
+Bug 1 — HTMLResponse.media_type doesn't reach the wire.
+Bug 2 — noargs response cache drops custom headers / Content-Type.
+Bug 3 — `from __future__ import annotations` silently breaks path-parameter binding.
+"""
+
+from __future__ import annotations
+
+import socket
+import sys
+import threading
+import time
+from pathlib import Path
+
+import pytest
+import requests
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "python"))
+
+from turboapi import TurboAPI  # noqa: E402
+from turboapi.responses import HTMLResponse  # noqa: E402
+
+
+def _free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+def _start_app(app: TurboAPI) -> int:
+    port = _free_port()
+    t = threading.Thread(target=lambda: app.run(host="127.0.0.1", port=port), daemon=True)
+    t.start()
+    # Poll until ready
+    for _ in range(50):
+        try:
+            requests.get(f"http://127.0.0.1:{port}/__healthz_probe__", timeout=0.1)
+            break
+        except Exception:
+            time.sleep(0.05)
+    time.sleep(0.2)
+    return port
+
+
+def test_bug1_html_response_content_type_on_first_request():
+    """Bug 1: HTMLResponse on a noargs handler must emit text/html on the wire."""
+    app = TurboAPI(title="bug1")
+
+    @app.get("/")
+    def home():
+        return HTMLResponse("<h1>Hello</h1>")
+
+    port = _start_app(app)
+    r = requests.get(f"http://127.0.0.1:{port}/", timeout=2)
+    ct = r.headers.get("Content-Type", "")
+    assert "text/html" in ct, f"first request Content-Type is {ct!r}"
+    assert r.text == "<h1>Hello</h1>"
+
+
+def test_bug2_noargs_cache_preserves_content_type_on_replay():
+    """Bug 2: cached noargs replays must keep the original Content-Type."""
+    app = TurboAPI(title="bug2")
+
+    @app.get("/page")
+    def page():
+        return HTMLResponse("<p>hi</p>")
+
+    port = _start_app(app)
+    # Hit the route a few times — second+ requests are served from the
+    # noargs cache. All of them must report text/html.
+    cts = []
+    for _ in range(5):
+        r = requests.get(f"http://127.0.0.1:{port}/page", timeout=2)
+        cts.append(r.headers.get("Content-Type", ""))
+        assert r.text == "<p>hi</p>"
+    for i, ct in enumerate(cts):
+        assert "text/html" in ct, f"request #{i} got Content-Type={ct!r}"
+
+
+def test_bug3_future_annotations_path_param_str():
+    """Bug 3: ``from __future__ import annotations`` must not break path binding."""
+    app = TurboAPI(title="bug3-str")
+
+    @app.get("/item/{item_id}")
+    def get_item(item_id: str):
+        return {"id": item_id}
+
+    port = _start_app(app)
+    r = requests.get(f"http://127.0.0.1:{port}/item/42", timeout=2)
+    assert r.status_code == 200, r.text
+    assert r.json() == {"id": "42"}
+
+
+def test_bug3_future_annotations_path_param_int():
+    """Bug 3: int path params must coerce correctly under PEP 563."""
+    app = TurboAPI(title="bug3-int")
+
+    @app.get("/n/{n}")
+    def get_n(n: int):
+        return {"n": n, "type": type(n).__name__}
+
+    port = _start_app(app)
+    r = requests.get(f"http://127.0.0.1:{port}/n/123", timeout=2)
+    assert r.status_code == 200, r.text
+    assert r.json() == {"n": 123, "type": "int"}
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v", "-s"]))
diff --git a/zig/src/server.zig b/zig/src/server.zig
index 3c0bb6a..b93b5d1 100644
--- a/zig/src/server.zig
+++ b/zig/src/server.zig
@@ -222,8 +222,19 @@ const HandlerEntry = struct {
     // Vectorcall dispatch: ordered param metadata parsed at registration time
     param_meta: [MAX_PARAMS]ParamMeta = undefined,
     param_count: usize = 0,
+    // Per-entry cached response. The body lives in [cached_body_ptr..+cached_body_len];
+    // the content-type lives in [cached_ct_ptr..+cached_ct_len]. Empty ct (len=0)
+    // means "treat as application/json" for backwards compatibility with the
+    // pre-1.0.30 cache layout — no separate sentinel needed.
     cached_body_ptr: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
     cached_body_len: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
+    cached_ct_ptr: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
+    cached_ct_len: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
+};
+
+const CachedResponse = struct {
+    content_type: []const u8,
+    body: []const u8,
 };
 
 const HeaderPair = struct {
@@ -290,7 +301,7 @@ const StaticRouteEntry = struct {
 var routes: ?std.StringHashMap(HandlerEntry) = null;
 var native_routes: ?std.StringHashMap(NativeHandlerEntry) = null;
 var static_routes: ?std.StringHashMap(StaticRouteEntry) = null;
-var response_cache: ?std.StringHashMap([]const u8) = null;
+var response_cache: ?std.StringHashMap(CachedResponse) = null;
 var response_cache_lock: std.Io.Mutex = .init;
 var response_cache_count: usize = 0;
 const MAX_CACHE_ENTRIES: usize = 10_000; // bounded to prevent OOM via unique paths
@@ -398,14 +409,14 @@ fn getStaticRoutes() *std.StringHashMap(StaticRouteEntry) {
     return &static_routes.?;
 }
 
-fn getResponseCache() *std.StringHashMap([]const u8) {
+fn getResponseCache() *std.StringHashMap(CachedResponse) {
     if (response_cache == null) {
-        response_cache = std.StringHashMap([]const u8).init(allocator);
+        response_cache = std.StringHashMap(CachedResponse).init(allocator);
     }
     return &response_cache.?;
 }
 
-fn getCachedResponse(key: []const u8) ?[]const u8 {
+fn getCachedResponse(key: []const u8) ?CachedResponse {
     response_cache_lock.lockUncancelable(runtime.io);
     defer response_cache_lock.unlock(runtime.io);
     if (response_cache == null) return null;
@@ -413,62 +424,104 @@ fn getCachedResponse(key: []const u8) ?[]const u8 {
 }
 
 /// Cache a pre-rendered response, respecting MAX_CACHE_ENTRIES to prevent OOM.
-fn cacheResponse(key: []const u8, rendered: []const u8) void {
+/// `content_type` and `body` must both be heap-allocated; ownership is transferred
+/// to the cache on success or freed on failure / duplicate-key.
+fn cacheResponse(key: []const u8, content_type: []const u8, body: []const u8) void {
     response_cache_lock.lockUncancelable(runtime.io);
     defer response_cache_lock.unlock(runtime.io);
 
     if (response_cache_count >= MAX_CACHE_ENTRIES) {
-        allocator.free(rendered);
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
         return;
     }
 
-    const key_dupe = allocator.dupe(u8, key) catch return;
+    const key_dupe = allocator.dupe(u8, key) catch {
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
+        return;
+    };
     const cache = getResponseCache();
     const gop = cache.getOrPut(key_dupe) catch {
-        allocator.free(rendered);
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
         allocator.free(key_dupe);
         return;
     };
 
     if (gop.found_existing) {
-        allocator.free(rendered);
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
         allocator.free(key_dupe);
         return;
     }
 
-    gop.value_ptr.* = rendered;
+    gop.value_ptr.* = .{ .content_type = content_type, .body = body };
     response_cache_count += 1;
 }
 
-fn getCachedEntryBody(entry: *const HandlerEntry) ?[]const u8 {
-    const ptr_val = entry.cached_body_ptr.load(.acquire);
-    if (ptr_val == 0) return null;
+fn getCachedEntryResponse(entry: *const HandlerEntry) ?CachedResponse {
+    const body_ptr_val = entry.cached_body_ptr.load(.acquire);
+    if (body_ptr_val == 0) return null;
 
-    const len = entry.cached_body_len.load(.acquire);
-    const ptr: [*]const u8 = @ptrFromInt(ptr_val);
-    return ptr[0..len];
+    const body_len = entry.cached_body_len.load(.acquire);
+    const body_ptr: [*]const u8 = @ptrFromInt(body_ptr_val);
+
+    const ct_ptr_val = entry.cached_ct_ptr.load(.acquire);
+    const ct_len = entry.cached_ct_len.load(.acquire);
+    const ct: []const u8 = if (ct_ptr_val == 0 or ct_len == 0)
+        ""
+    else blk: {
+        const ct_ptr: [*]const u8 = @ptrFromInt(ct_ptr_val);
+        break :blk ct_ptr[0..ct_len];
+    };
+
+    return .{ .content_type = ct, .body = body_ptr[0..body_len] };
 }
 
-fn cacheEntryBody(entry: *HandlerEntry, rendered: []const u8) void {
+/// Atomically install a cached response on a HandlerEntry. Both `content_type`
+/// and `body` must be heap-allocated; ownership is transferred to the entry on
+/// success or freed on duplicate / OOM. Empty `content_type` (len=0) is allowed
+/// and means "fall back to application/json on serve".
+fn cacheEntryResponse(entry: *HandlerEntry, content_type: []const u8, body: []const u8) void {
     response_cache_lock.lockUncancelable(runtime.io);
     defer response_cache_lock.unlock(runtime.io);
 
     if (entry.cached_body_ptr.load(.monotonic) != 0) {
-        allocator.free(rendered);
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
         return;
     }
 
     if (response_cache_count >= MAX_CACHE_ENTRIES) {
-        allocator.free(rendered);
+        allocator.free(body);
+        if (content_type.len > 0) allocator.free(content_type);
         return;
     }
 
-    entry.cached_body_len.store(rendered.len, .release);
-    entry.cached_body_ptr.store(@intFromPtr(rendered.ptr), .release);
+    // Publish content-type slots first (acquire-load on the body pointer is the
+    // synchronization point; readers only consult ct after seeing a non-zero body).
+    if (content_type.len > 0) {
+        entry.cached_ct_len.store(content_type.len, .release);
+        entry.cached_ct_ptr.store(@intFromPtr(content_type.ptr), .release);
+    } else {
+        entry.cached_ct_len.store(0, .release);
+        entry.cached_ct_ptr.store(0, .release);
+    }
+    entry.cached_body_len.store(body.len, .release);
+    entry.cached_body_ptr.store(@intFromPtr(body.ptr), .release);
     response_cache_count += 1;
 }
 
-fn sendCachedJsonBody(stream: std.Io.net.Stream, body: []const u8) void {
+/// Serve a cached response. When `content_type` is empty we keep the historical
+/// fast-path (hard-coded `application/json` header block) so the JSON case stays
+/// allocation-free; otherwise we delegate to the general-purpose `sendResponse`.
+fn sendCachedResponse(stream: std.Io.net.Stream, content_type: []const u8, body: []const u8) void {
+    if (content_type.len > 0 and !std.mem.eql(u8, content_type, "application/json")) {
+        sendResponse(stream, 200, content_type, body);
+        return;
+    }
+
     if (cors_headers.len > 0) {
         sendResponse(stream, 200, "application/json", body);
         return;
@@ -1288,8 +1341,8 @@ fn handleOneRequest(stream: std.Io.net.Stream, tstate: ?*anyopaque) !void {
     switch (entry.handler_tag) {
         .simple_sync_noargs => {
             if (cache_noargs_responses) {
-                if (getCachedEntryBody(entry_ptr)) |cached| {
-                    sendCachedJsonBody(stream, cached);
+                if (getCachedEntryResponse(entry_ptr)) |cached| {
+                    sendCachedResponse(stream, cached.content_type, cached.body);
                     return;
                 }
                 callPythonNoArgsEntryCaching(tstate, entry_ptr, stream);
@@ -1308,7 +1361,7 @@ fn handleOneRequest(stream: std.Io.net.Stream, tstate: ?*anyopaque) !void {
                 else
                     std.fmt.bufPrint(&cache_key_buf, "{s} {s}", .{ method, path }) catch path;
                 if (getCachedResponse(cache_key)) |cached| {
-                    sendCachedJsonBody(stream, cached);
+                    sendCachedResponse(stream, cached.content_type, cached.body);
                     return;
                 }
                 callPythonVectorcallCaching(tstate, entry, query_string, &match.params, stream, cache_key);
@@ -1321,8 +1374,8 @@ fn handleOneRequest(stream: std.Io.net.Stream, tstate: ?*anyopaque) !void {
             const eager = entry.handler_tag == .simple_async_eager;
             if (cache_noargs_responses) {
                 if (entry.param_count == 0) {
-                    if (getCachedEntryBody(entry_ptr)) |cached| {
-                        sendCachedJsonBody(stream, cached);
+                    if (getCachedEntryResponse(entry_ptr)) |cached| {
+                        sendCachedResponse(stream, cached.content_type, cached.body);
                         return;
                     }
                     callPythonAsyncNoArgs(tstate, entry, stream, entry_ptr, eager);
@@ -1333,7 +1386,7 @@ fn handleOneRequest(stream: std.Io.net.Stream, tstate: ?*anyopaque) !void {
                     else
                         std.fmt.bufPrint(&cache_key_buf, "{s} {s}", .{ method, path }) catch path;
                     if (getCachedResponse(cache_key)) |cached| {
-                        sendCachedJsonBody(stream, cached);
+                        sendCachedResponse(stream, cached.content_type, cached.body);
                         return;
                     }
                     callPythonAsyncVectorcall(tstate, entry, query_string, &match.params, stream, cache_key, eager);
@@ -1608,8 +1661,19 @@ fn sendTupleResponseAndCache(stream: std.Io.net.Stream, result: *c.PyObject, cac
 
     sendResponse(stream, status_code, content_type, body_slice);
 
+    // Only memoize successful 200 responses; sendCachedResponse always emits
+    // a 200 status line, so caching non-200 responses would corrupt the wire.
+    if (status_code != 200) return;
+
     const body_dupe = allocator.dupe(u8, body_slice) catch return;
-    cacheResponse(cache_key, body_dupe);
+    const ct_dupe: []const u8 = if (std.mem.eql(u8, content_type, "application/json"))
+        ""
+    else
+        allocator.dupe(u8, content_type) catch {
+            allocator.free(body_dupe);
+            return;
+        };
+    cacheResponse(cache_key, ct_dupe, body_dupe);
 }
 
 fn sendTupleResponseAndCacheEntry(stream: std.Io.net.Stream, result: *c.PyObject, entry: *HandlerEntry) void {
@@ -1634,8 +1698,17 @@ fn sendTupleResponseAndCacheEntry(stream: std.Io.net.Stream, result: *c.PyObject
 
     sendResponse(stream, status_code, content_type, body_slice);
 
+    if (status_code != 200) return;
+
     const body_dupe = allocator.dupe(u8, body_slice) catch return;
-    cacheEntryBody(entry, body_dupe);
+    const ct_dupe: []const u8 = if (std.mem.eql(u8, content_type, "application/json"))
+        ""
+    else
+        allocator.dupe(u8, content_type) catch {
+            allocator.free(body_dupe);
+            return;
+        };
+    cacheEntryResponse(entry, ct_dupe, body_dupe);
 }
 
 // ── simple_sync_noargs: PyObject_CallNoArgs — no tuple/dict construction ─────
@@ -1952,9 +2025,18 @@ fn callPythonVectorcallCaching(
 
     sendResponse(stream, status_code, content_type, body_slice);
 
-    // Cache body only (sendResponse adds fresh Date headers on each hit)
+    if (status_code != 200) return;
+
+    // Cache body + content-type (sendResponse adds fresh Date headers on each hit)
     const body_dupe = allocator.dupe(u8, body_slice) catch return;
-    cacheResponse(cache_key, body_dupe);
+    const ct_dupe: []const u8 = if (std.mem.eql(u8, content_type, "application/json"))
+        ""
+    else
+        allocator.dupe(u8, content_type) catch {
+            allocator.free(body_dupe);
+            return;
+        };
+    cacheResponse(cache_key, ct_dupe, body_dupe);
 }
 
 // ── Fast Python handler dispatch (simple_sync/body_sync) ─────────────────────
@@ -2620,9 +2702,11 @@ const CacheThreadCtx = struct {
 fn cacheThreadWorker(ctx: *const CacheThreadCtx) void {
     for (0..ctx.iterations) |_| {
         const rendered = allocator.dupe(u8, ctx.body) catch return;
-        cacheResponse(ctx.key, rendered);
+        // Empty content_type → cache treats as application/json (test stays
+        // body-only, matching the JSON fast path).
+        cacheResponse(ctx.key, "", rendered);
         const cached = getCachedResponse(ctx.key) orelse continue;
-        std.debug.assert(std.mem.eql(u8, cached, ctx.body));
+        std.debug.assert(std.mem.eql(u8, cached.body, ctx.body));
     }
 }
 
@@ -2648,8 +2732,8 @@ test "response cache is safe under concurrent access" {
     for (threads) |thread| thread.join();
 
     try std.testing.expectEqual(@as(usize, 2), response_cache_count);
-    try std.testing.expectEqualStrings("{\"item_id\":1}", getCachedResponse("GET /items/1").?);
-    try std.testing.expectEqualStrings("{\"item_id\":2}", getCachedResponse("GET /items/2").?);
+    try std.testing.expectEqualStrings("{\"item_id\":1}", getCachedResponse("GET /items/1").?.body);
+    try std.testing.expectEqualStrings("{\"item_id\":2}", getCachedResponse("GET /items/2").?.body);
 }
 
 // ── Fuzz tests ───────────────────────────────────────────────────────────────