praxis-proxy · shaneutt · Jun 23, 2026 · Jun 12, 2026 · Jun 23, 2026 · Jun 23, 2026
@@ -0,0 +1,46 @@
+<!-- Generated by: cargo xtask generate-filter-docs -->
+<!-- Do not edit manually -->
+
+# `openai_responses_model_rewrite`
+
+Rewrites the `model` field in Responses API request bodies.
+
+Requires Cargo feature: `ai-inference`.
+
+## Configuration
+
+| Field | Type | Required | Description |
+|-------|------|---------|-------------|
+| `default_model` | string | no | Model name to inject when the request body has no `model` field or when the field is `null`. |
+| `headers` | ModelRewriteHeaders | no | Header names for promoted model values. |
+| `headers.effective_model` | string | no | Header name for the effective (post-rewrite) model value. |
+| `headers.original_model` | string | no | Header name for the original (pre-rewrite) model value. |
+| `max_body_bytes` | usize | no | Maximum request body size to buffer before parsing. |
+| `model_aliases` | object<string, string> | no | Map from client-facing model names to backend model names. |
+| `on_invalid` | `continue` \| `reject` | no | Behavior when the body is not valid JSON. |
+
+## Examples
+
+### Example 1
+
+```yaml
+filter: openai_responses_model_rewrite
+default_model: "llama-3.3-70b"
+model_aliases:
+  codex-mini-latest: "llama-3.3-70b"
+  gpt-4.1-mini: "qwen-2.5-72b"
+```
+
+### Example 2
+
+```yaml
+filter: openai_responses_model_rewrite
+default_model: "llama-3.3-70b"
+model_aliases:
+  codex-mini-latest: "llama-3.3-70b"
+max_body_bytes: 10485760
+on_invalid: continue
+headers:
+  effective_model: x-praxis-ai-effective-model
+  original_model: x-praxis-ai-original-model
+```
@@ -21,6 +21,7 @@ Built-in filters organized by protocol and category.
 | [`model_to_header`](http/ai/model_to_header.md) | `ai-inference` | Promotes the JSON `"model"` field from the request body to a request header. |
 | [`openai_response_store`](http/ai/openai_response_store.md) | `ai-inference` | Persists non-streaming Responses API responses to the configured response store backend. |
 | [`openai_responses_format`](http/ai/openai_responses_format.md) | `ai-inference` | Classifies AI API request bodies and promotes routing facts to headers, metadata, and filter results without mutating the body. |
+| [`openai_responses_model_rewrite`](http/ai/openai_responses_model_rewrite.md) | `ai-inference` | Rewrites the `model` field in Responses API request bodies. |
 | [`openai_responses_validate`](http/ai/openai_responses_validate.md) | `ai-inference` | Validates and enriches Responses API requests. |
 | [`prompt_enrich`](http/ai/prompt_enrich.md) | `ai-inference` | Injects statically configured messages into the `messages` array of OpenAI-compatible chat completion request bodies. |
 | [`token_usage_headers`](http/ai/token_usage_headers.md) | - | Injects `Praxis-Token-Input`, `Praxis-Token-Output`, and `Praxis-Token-Total` headers into downstream responses when token usage data is present in [`filter_metadata`]. |

@@ -35,6 +35,7 @@ page.
 | [model-to-header-routing.yaml](configs/ai/model-to-header-routing.yaml) | Routes LLM API requests to different backends based on the "model" field in the JSON request body |
 | [format-routing.yaml](configs/ai/openai/responses/format-routing.yaml) | Routes AI API traffic by detected body format |
 | [full-flow.yaml](configs/ai/openai/responses/full-flow.yaml) | Combines format classification, request validation, and backend routing into a single pipeline |
+| [model-rewrite.yaml](configs/ai/openai/responses/model-rewrite.yaml) | Rewrites or injects the top-level `model` field in Responses API request bodies before forwarding to the inference backend |
 | [request-validate.yaml](configs/ai/openai/responses/request-validate.yaml) | Validates Responses API requests and rejects invalid parameter combinations |
 | [response-store.yaml](configs/ai/openai/responses/response-store.yaml) | Persists non-streaming Responses API responses to a database and serves stored data via GET endpoints and handles DELETE /v1/responses/{id} locally |
 | [responses-routing.yaml](configs/ai/openai/responses/responses-routing.yaml) | Routes Responses API traffic by detected mode |

@@ -0,0 +1,65 @@
+# Responses Model Rewrite
+#
+# Rewrites or injects the top-level `model` field in Responses API
+# request bodies before forwarding to the inference backend.
+#
+# This pipeline uses `openai_responses_format` to classify the
+# request, then `openai_responses_model_rewrite` to apply alias
+# mapping. The router uses the effective model header to select
+# the backend cluster, so routing reflects the rewritten model,
+# not the original client-facing name.
+#
+# Use case: Codex or other Responses API clients send
+# `model: "codex-mini-latest"` and the proxy transparently
+# rewrites it to a locally-hosted model before forwarding.
+#
+# Requires the ai-inference feature:
+#   cargo build -p praxis --features ai-inference
+
+listeners:
+  - name: ai-gateway
+    address: "127.0.0.1:8080"
+    filter_chains: [model-rewrite-pipeline]
+
+filter_chains:
+  - name: model-rewrite-pipeline
+    filters:
+      - filter: openai_responses_format
+        on_invalid: continue
+        headers:
+          format: x-praxis-ai-format
+          model: x-praxis-ai-model
+
+      - filter: openai_responses_model_rewrite
+        default_model: "llama-3.3-70b"
+        model_aliases:
+          codex-mini-latest: "llama-3.3-70b"
+          gpt-4.1-mini: "qwen-2.5-72b"
+        headers:
+          effective_model: x-praxis-ai-effective-model
+          original_model: x-praxis-ai-original-model
+
+      - filter: router
+        routes:
+          - path: "/v1/responses"
+            headers:
+              x-praxis-ai-effective-model: "llama-3.3-70b"
+            cluster: "llama-backend"
+          - path: "/v1/responses"
+            headers:
+              x-praxis-ai-effective-model: "qwen-2.5-72b"
+            cluster: "qwen-backend"
+          - path_prefix: "/"
+            cluster: "default-backend"
+
+      - filter: load_balancer
+        clusters:
+          - name: "llama-backend"
+            endpoints:
+              - "127.0.0.1:3001"
+          - name: "qwen-backend"
+            endpoints:
+              - "127.0.0.1:3002"
+          - name: "default-backend"
+            endpoints:
+              - "127.0.0.1:3003"
@@ -87,7 +87,7 @@ pub(crate) struct ClassifiedRequest {
 /// - `POST   /v1/responses/compact`
 /// - `DELETE /v1/responses/{id}`
 pub(crate) fn is_responses_path(method: &http::Method, path: &str) -> bool {
-    let path = path.strip_suffix('/').filter(|p| !p.is_empty()).unwrap_or(path);
+    let path = normalize_trailing_slash(path);
     let segments: Vec<&str> = path.split('/').collect();
 
     match (method, segments.as_slice()) {
@@ -108,6 +108,14 @@ pub(crate) fn is_responses_path(method: &http::Method, path: &str) -> bool {
     }
 }
 
+/// Check whether a method + path pair is the Responses API create endpoint.
+///
+/// Returns `true` only for `POST /v1/responses` (with optional trailing slash).
+/// Sub-resource POSTs like `/v1/responses/{id}/cancel` return `false`.
+pub(crate) fn is_responses_create(method: &http::Method, path: &str) -> bool {
+    method == http::Method::POST && normalize_trailing_slash(path) == "/v1/responses"
+}
+
 // -----------------------------------------------------------------------------
 // Body Classification
 // -----------------------------------------------------------------------------
@@ -212,6 +220,11 @@ fn has_anthropic_signals(obj: &serde_json::Map<String, serde_json::Value>) -> bo
 // Private Utilities
 // -----------------------------------------------------------------------------
 
+/// Strip a single trailing slash unless the path is the root `/`.
+fn normalize_trailing_slash(path: &str) -> &str {
+    path.strip_suffix('/').filter(|p| !p.is_empty()).unwrap_or(path)
+}
+
 /// Build a result with no extracted facts.
 pub(crate) fn empty_result(format: AiRequestFormat) -> ClassifiedRequest {
     ClassifiedRequest {
@@ -859,6 +872,66 @@ mod tests {
         );
     }
 
+    // -------------------------------------------------------------------------
+    // Create-Endpoint Classification
+    // -------------------------------------------------------------------------
+
+    #[test]
+    fn create_matches_post_v1_responses() {
+        assert!(
+            is_responses_create(&http::Method::POST, "/v1/responses"),
+            "POST /v1/responses should match create"
+        );
+    }
+
+    #[test]
+    fn create_matches_post_v1_responses_trailing_slash() {
+        assert!(
+            is_responses_create(&http::Method::POST, "/v1/responses/"),
+            "POST /v1/responses/ should match create"
+        );
+    }
+
+    #[test]
+    fn create_rejects_get() {
+        assert!(
+            !is_responses_create(&http::Method::GET, "/v1/responses"),
+            "GET /v1/responses should not match create"
+        );
+    }
+
+    #[test]
+    fn create_rejects_cancel_subresource() {
+        assert!(
+            !is_responses_create(&http::Method::POST, "/v1/responses/resp_abc/cancel"),
+            "POST /v1/responses/{{id}}/cancel should not match create"
+        );
+    }
+
+    #[test]
+    fn create_rejects_input_tokens() {
+        assert!(
+            !is_responses_create(&http::Method::POST, "/v1/responses/input_tokens"),
+            "POST /v1/responses/input_tokens should not match create"
+        );
+    }
+
+    #[test]
+    fn create_rejects_compact() {
+        assert!(
+            !is_responses_create(&http::Method::POST, "/v1/responses/compact"),
+            "POST /v1/responses/compact should not match create"
+        );
+    }
+
+    #[test]
+    fn create_rejects_chat_completions() {
+        assert!(
+            !is_responses_create(&http::Method::POST, "/v1/chat/completions"),
+            "POST /v1/chat/completions should not match create"
+        );
+    }
+
     #[test]
     fn previous_response_id_only_classifies_as_responses() {
         let body = br#"{"model":"gpt-4.1","previous_response_id":"resp_abc"}"#;

@@ -45,6 +45,8 @@ pub use guardrails::AiGuardrailsFilter;
 pub use inference::ModelToHeaderFilter;
 pub(crate) use on_invalid::OnInvalidBehavior;
 #[cfg(feature = "ai-inference")]
+pub use openai::ModelRewriteFilter;
+#[cfg(feature = "ai-inference")]
 pub use openai::OpenaiResponsesValidateFilter;
 #[cfg(feature = "ai-inference")]
 pub use openai::ResponseStoreFilter;

@@ -5,6 +5,8 @@
 
 pub(crate) mod responses;
 
+#[cfg(feature = "ai-inference")]
+pub use responses::ModelRewriteFilter;
 #[cfg(feature = "ai-inference")]
 pub use responses::OpenaiResponsesValidateFilter;
 pub use responses::{ResponseStoreFilter, ResponsesFormatFilter};
@@ -18,6 +18,8 @@
 //! to validate parameter combinations and extract additional fields.
 
 mod config;
+#[cfg(feature = "ai-inference")]
+pub(crate) mod model_rewrite;
 #[expect(clippy::allow_attributes, reason = "dead_code expect unfulfilled on modules")]
 #[allow(
     dead_code,
@@ -26,6 +28,8 @@ mod config;
 pub(crate) mod state;
 pub(crate) mod store;
 
+#[cfg(feature = "ai-inference")]
+pub use model_rewrite::ModelRewriteFilter;
 pub use store::ResponseStoreFilter;
 
 #[cfg(test)]