Skip to content

Commit 5749506

Browse files
committed
resolve comments from simo
Signed-off-by: TingtingZhou7 <zhoutt96@gmail.com>
1 parent d0da3a9 commit 5749506

6 files changed

Lines changed: 112 additions & 51 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,6 @@ clients/java/target/
160160
clients/java/build.sbt
161161
clients/java/gradle.properties
162162
openapitools.json
163+
164+
# Local MCP config
165+
mcp.local.yaml

crates/mcp/src/transform/transformer.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,8 @@ impl ResponseTransformer {
4141
// (derived from builtin_type), not payload key heuristics.
4242
fn image_payload_from_wrapped_content(
4343
result: &Value,
44-
format: &ResponseFormat,
44+
_format: &ResponseFormat,
4545
) -> Option<Value> {
46-
if !matches!(format, ResponseFormat::ImageGenerationCall) {
47-
return None;
48-
}
49-
5046
// Already a direct object payload.
5147
if result
5248
.as_object()

crates/protocols/src/responses.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,19 @@ pub struct CodeInterpreterTool {
8686

8787
/// Built-in image generation tool.
8888
///
89-
/// The upstream API may add optional fields over time. We keep this tool shape
90-
/// open by preserving unknown key/value pairs.
89+
/// Known fields are typed for validation; unknown fields are preserved for
90+
/// forward compatibility via `extra`.
9191
#[derive(Debug, Clone, Deserialize, Serialize, Default, schemars::JsonSchema)]
9292
pub struct ImageGenerationTool {
93+
pub size: Option<String>,
94+
pub quality: Option<String>,
95+
pub background: Option<String>,
96+
pub output_format: Option<String>,
97+
pub output_compression: Option<u32>,
98+
pub moderation: Option<String>,
99+
pub model: Option<String>,
93100
#[serde(flatten)]
94-
pub options: HashMap<String, Value>,
101+
pub extra: HashMap<String, Value>,
95102
}
96103

97104
/// `require_approval` values.
@@ -1506,14 +1513,16 @@ mod tests {
15061513
let tool: ResponseTool = serde_json::from_value(json!({
15071514
"type": "image_generation",
15081515
"size": "1024x1024",
1509-
"quality": "high"
1516+
"quality": "high",
1517+
"custom_flag": true
15101518
}))
15111519
.expect("image_generation tool should deserialize");
15121520

15131521
match tool {
15141522
ResponseTool::ImageGeneration(t) => {
1515-
assert_eq!(t.options.get("size"), Some(&json!("1024x1024")));
1516-
assert_eq!(t.options.get("quality"), Some(&json!("high")));
1523+
assert_eq!(t.size.as_deref(), Some("1024x1024"));
1524+
assert_eq!(t.quality.as_deref(), Some("high"));
1525+
assert_eq!(t.extra.get("custom_flag"), Some(&json!(true)));
15171526
}
15181527
other => panic!("expected image_generation tool, got {other:?}"),
15191528
}

mcp.local.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,11 @@ servers:
77
tools:
88
generate_image:
99
response_format: image_generation_call
10+
- name: search_web
11+
protocol: streamable
12+
url: http://localhost:8080/mcp
13+
builtin_type: web_search_preview
14+
builtin_tool_name: search_web
15+
tools:
16+
search_web:
17+
response_format: web_search_call

model_gateway/src/routers/openai/mcp/tool_loop.rs

Lines changed: 84 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use openai_protocol::{
1717
is_function_call_type, CodeInterpreterCallEvent, FileSearchCallEvent,
1818
ImageGenerationCallEvent, ItemType, McpEvent, OutputItemEvent, WebSearchCallEvent,
1919
},
20-
responses::{generate_id, ResponseInput, ResponsesRequest},
20+
responses::{generate_id, ResponseInput, ResponseTool, ResponsesRequest},
2121
};
2222
use serde_json::{json, to_value, Value};
2323
use smg_mcp::{
@@ -35,8 +35,8 @@ use crate::{
3535
},
3636
};
3737

38-
// TODO: Temporary hardcoded image model; replace with configurable model routing later.
39-
const IMAGE_MODEL: &str = "openai.gpt-image-1.5";
38+
// Only image_generation is enabled for request-level argument overrides for now.
39+
const ALLOWED_BUILTIN_TOOLS_FOR_OVERRIDE: &[&str] = &["image_generation"];
4040

4141
/// State for tracking multi-turn tool calling loop
4242
pub(crate) struct ToolLoopState {
@@ -103,6 +103,7 @@ pub(crate) async fn execute_streaming_tool_calls(
103103
state: &mut ToolLoopState,
104104
sequence_number: &mut u64,
105105
model_id: &str,
106+
original_body: &ResponsesRequest,
106107
) -> bool {
107108
for call in pending_calls {
108109
if call.name.is_empty() {
@@ -166,13 +167,12 @@ pub(crate) async fn execute_streaming_tool_calls(
166167
continue;
167168
}
168169
};
169-
sanitize_builtin_tool_arguments(&response_format, &mut arguments);
170-
170+
apply_request_tool_overrides(&response_format, original_body, &mut arguments);
171171
if !send_tool_call_intermediate_event(tx, &call, &response_format, sequence_number) {
172172
return false;
173173
}
174174

175-
debug!("Calling MCP tool '{}' with args: {}", call.name, args_str);
175+
debug!("Calling MCP tool '{}' with args: {}", call.name, arguments);
176176
let tool_output = session
177177
.execute_tool(ToolExecutionInput {
178178
call_id: call.call_id.clone(),
@@ -261,6 +261,82 @@ pub(crate) fn prepare_mcp_tools_as_functions(payload: &mut Value, session: &McpT
261261
}
262262
}
263263

264+
fn builtin_tool_type_for_response_format(response_format: &ResponseFormat) -> Option<&'static str> {
265+
match response_format {
266+
ResponseFormat::ImageGenerationCall => Some("image_generation"),
267+
ResponseFormat::WebSearchCall => Some("web_search_preview"),
268+
ResponseFormat::CodeInterpreterCall => Some("code_interpreter"),
269+
ResponseFormat::FileSearchCall | ResponseFormat::Passthrough => None,
270+
}
271+
}
272+
273+
fn request_tool_overrides(
274+
response_format: &ResponseFormat,
275+
original_body: &ResponsesRequest,
276+
) -> Option<Value> {
277+
let builtin_tool_type = builtin_tool_type_for_response_format(response_format)?;
278+
if !ALLOWED_BUILTIN_TOOLS_FOR_OVERRIDE.contains(&builtin_tool_type) {
279+
return None;
280+
}
281+
282+
let tools = original_body.tools.as_ref()?;
283+
284+
tools.iter().find_map(|tool| {
285+
let mut serialized = match (builtin_tool_type, tool) {
286+
("image_generation", ResponseTool::ImageGeneration(image_tool)) => {
287+
match to_value(image_tool).ok()? {
288+
Value::Object(obj) => obj,
289+
_ => return None,
290+
}
291+
}
292+
("web_search_preview", ResponseTool::WebSearchPreview(web_search_tool)) => {
293+
match to_value(web_search_tool).ok()? {
294+
Value::Object(obj) => obj,
295+
_ => return None,
296+
}
297+
}
298+
("code_interpreter", ResponseTool::CodeInterpreter(code_interpreter_tool)) => {
299+
match to_value(code_interpreter_tool).ok()? {
300+
Value::Object(obj) => obj,
301+
_ => return None,
302+
}
303+
}
304+
_ => {
305+
return None;
306+
}
307+
};
308+
serialized.retain(|_, v| !v.is_null());
309+
if serialized.is_empty() {
310+
None
311+
} else {
312+
Some(Value::Object(serialized))
313+
}
314+
})
315+
}
316+
317+
fn apply_request_tool_overrides(
318+
response_format: &ResponseFormat,
319+
original_body: &ResponsesRequest,
320+
arguments: &mut Value,
321+
) {
322+
let Some(overrides) = request_tool_overrides(response_format, original_body) else {
323+
return;
324+
};
325+
let Some(override_obj) = overrides.as_object() else {
326+
return;
327+
};
328+
329+
if !arguments.is_object() {
330+
*arguments = json!({});
331+
}
332+
333+
if let Some(args_obj) = arguments.as_object_mut() {
334+
for (k, v) in override_obj {
335+
args_obj.insert(k.clone(), v.clone());
336+
}
337+
}
338+
}
339+
264340
/// Build a resume payload with conversation history
265341
pub(crate) fn build_resume_payload(
266342
base_payload: &Value,
@@ -683,11 +759,10 @@ pub(crate) async fn execute_tool_loop(
683759
continue;
684760
}
685761
};
686-
sanitize_builtin_tool_arguments(&response_format, &mut arguments);
687-
762+
apply_request_tool_overrides(&response_format, original_body, &mut arguments);
688763
debug!(
689764
"Calling MCP tool '{}' with args: {}",
690-
call.name, call.arguments
765+
call.name, arguments
691766
);
692767
let tool_output = session
693768
.execute_tool(ToolExecutionInput {
@@ -849,37 +924,6 @@ fn build_mcp_call_item(
849924
})
850925
}
851926

852-
// TODO: Extend this with per-tool argument override rules as we add more
853-
// builtin tool formats that need request-time normalization.
854-
fn sanitize_builtin_tool_arguments(response_format: &ResponseFormat, arguments: &mut Value) {
855-
match response_format {
856-
ResponseFormat::ImageGenerationCall => {
857-
// Current fallback behavior: we intentionally narrow image arguments
858-
// to `model` + `revised_prompt` for compatibility. This drops extra
859-
// image options for now; we'll later pass either explicit user-provided
860-
// values or safe defaults per option instead of truncating.
861-
let revised_prompt = arguments
862-
.as_object()
863-
.and_then(|obj| {
864-
obj.get("revised_prompt")
865-
.and_then(|v| v.as_str())
866-
.or_else(|| obj.get("prompt").and_then(|v| v.as_str()))
867-
})
868-
.unwrap_or("")
869-
.to_string();
870-
871-
*arguments = json!({
872-
"model": IMAGE_MODEL,
873-
"revised_prompt": revised_prompt
874-
});
875-
}
876-
ResponseFormat::WebSearchCall
877-
| ResponseFormat::CodeInterpreterCall
878-
| ResponseFormat::FileSearchCall
879-
| ResponseFormat::Passthrough => {}
880-
}
881-
}
882-
883927
/// Build a transformed output item using ResponseTransformer
884928
///
885929
/// Converts the output using the tool's response_format to the correctly-typed

model_gateway/src/routers/openai/responses/streaming.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,7 @@ pub(super) fn handle_streaming_with_tool_interception(
968968
&mut state,
969969
&mut sequence_number,
970970
&original_request.model,
971+
&original_request,
971972
)
972973
.await
973974
{

0 commit comments

Comments
 (0)