diff --git a/README.md b/README.md index 2c3cd17a..ec3177f8 100755 --- a/README.md +++ b/README.md @@ -150,10 +150,9 @@ python opentinker/client/math_rl.py \ data_path=data/math_agentloop/train.parquet \ val_data_path=data/math_agentloop/test.parquet \ scheduler_url=http://: \ - interaction.config.env_port= \ - interaction.config.env_host= + env_url=http://: -# multi turn tool ca +# multi turn tool call python opentinker/client/math_tool_rl.py \ tokenizer_path=Qwen/Qwen2.5-1.5B \ batch_size=16 \ @@ -162,8 +161,7 @@ python opentinker/client/math_tool_rl.py \ save_freq=1000 \ test_freq=5 \ scheduler_url=http://: \ - interaction.config.env_port= \ - interaction.config.env_host= + env_url=http://: ``` **Gomoku RL (Multi-turn):** @@ -176,8 +174,7 @@ python opentinker/client/gomoku_rl.py \ save_freq=1000 \ test_freq=5 \ scheduler_url=http://: \ - interaction.config.env_port= \ - interaction.config.env_host= + env_url=http://: ``` **Math Inference:** @@ -188,8 +185,8 @@ python opentinker/client/math_inference.py \ data_path=data/math/test.parquet \ output_path=./tmp/results.jsonl \ max_samples=5 \ - env_endpoint=http://: \ - scheduler_url=http://: + scheduler_url=http://: \ + env_url=http://: # multi turn tool call python opentinker/client/math_tool_inference.py \ @@ -197,8 +194,8 @@ python opentinker/client/math_tool_inference.py \ data_path=data/math/test.parquet \ output_path=./tmp/results.jsonl \ max_samples=5 \ - env_endpoint=http://: \ - scheduler_url=http://: + scheduler_url=http://: \ + env_url=http://: ``` **Gomoku Inference:** @@ -207,8 +204,8 @@ python opentinker/client/gomoku_inference.py \ model_path= \ output_path=./tmp/results.jsonl \ max_samples=5 \ - env_endpoint=http://: \ - scheduler_url=http://: + scheduler_url=http://: \ + env_url=http://: ``` diff --git a/opentinker/client/client_config/generic_env_param.yaml b/opentinker/client/client_config/generic_env_param.yaml index 07746dae..c273c772 100755 --- a/opentinker/client/client_config/generic_env_param.yaml +++ b/opentinker/client/client_config/generic_env_param.yaml @@ -5,9 +5,10 @@ # - No reward function (environment provides rewards) # - Interaction configuration for Gym environment -server_url: "http://localhost:8000" # 如果是scheduler版本,不需要这个参数 +# Server settings scheduler_url: "http://localhost:8766" scheduler_api_key: "otk_98b8db24ccd64c92e1fdd9a232e209fa" +env_url: "http://localhost:8084" # Environment server URL # GPU allocation num_gpus: 4 @@ -29,7 +30,7 @@ interaction: name: "gym_env" # Name referenced in dataset's interaction_kwargs class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction config: - env_endpoint: "http://localhost:8084" # Mock environment server + env_endpoint: ${env_url} # References top-level env_url max_steps: 50 observation_template: "Environment: {observation}" diff --git a/opentinker/client/client_config/gomoku_inference_config.yaml b/opentinker/client/client_config/gomoku_inference_config.yaml index 2153c4a0..13608b4a 100755 --- a/opentinker/client/client_config/gomoku_inference_config.yaml +++ b/opentinker/client/client_config/gomoku_inference_config.yaml @@ -23,7 +23,7 @@ output_path: null # Output results file (jsonl) max_samples: 10 # Number of games to play # Environment settings -env_endpoint: http://localhost:8091 +env_url: http://localhost:8091 # Multi-turn settings (Gomoku is multi-turn game) multi_turn: diff --git a/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml b/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml index 41a29fed..379c1b35 100755 --- a/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml +++ b/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml @@ -29,7 +29,7 @@ output_path: null # Output results file (jsonl) max_samples: 10 # Number of games to play # Environment settings -env_endpoint: http://0.0.0.0:8091 +env_url: http://0.0.0.0:8091 # Multi-turn settings (Gomoku is multi-turn game) multi_turn: diff --git a/opentinker/client/client_config/gomoku_param.yaml b/opentinker/client/client_config/gomoku_param.yaml index 573ddf88..5fc9e899 100755 --- a/opentinker/client/client_config/gomoku_param.yaml +++ b/opentinker/client/client_config/gomoku_param.yaml @@ -53,9 +53,7 @@ interaction: name: gomoku class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction config: - env_host: 0.0.0.0 - env_port: 8088 - env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port} + env_endpoint: ${env_url} # References top-level env_url max_steps: 81 # 这是后端GymEnvironmentInteraction的参数 max_total_steps: 39 # 调用环境的step方法的最大次数限制(防止invalid move hacking) max_initial_moves: 0 @@ -73,10 +71,11 @@ multi_turn: experiment_name: "gomoku_interaction" # Experiment name in Weave -# Scheduler settings +# Server settings scheduler_url: "http://0.0.0.0:8780" scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa # this is user id +env_url: "http://0.0.0.0:8088" # Game environment server URL # GPU settings -num_gpus: 4 +num_gpus: 2 diff --git a/opentinker/client/client_config/math_code_interpreter_inference_config.yaml b/opentinker/client/client_config/math_code_interpreter_inference_config.yaml index da895ed5..2ceca735 100755 --- a/opentinker/client/client_config/math_code_interpreter_inference_config.yaml +++ b/opentinker/client/client_config/math_code_interpreter_inference_config.yaml @@ -29,7 +29,7 @@ output_path: null # Output results file (jsonl) max_samples: null # Limit samples (null = all) # Environment settings (code interpreter math server) -env_endpoint: http://0.0.0.0:8088 +env_url: http://0.0.0.0:8088 # Multi-turn settings (allow code execution iterations) multi_turn: diff --git a/opentinker/client/client_config/math_code_interpreter_param.yaml b/opentinker/client/client_config/math_code_interpreter_param.yaml index 8b07a647..fcc9bfd3 100755 --- a/opentinker/client/client_config/math_code_interpreter_param.yaml +++ b/opentinker/client/client_config/math_code_interpreter_param.yaml @@ -62,14 +62,13 @@ interaction: name: math_code_interpreter class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction config: - env_host: 0.0.0.0 - env_port: 8088 - env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port} + env_endpoint: ${env_url} # References top-level env_url max_steps: 5 # Max interaction steps (code executions) -# Scheduler settings +# Server settings scheduler_url: "http://0.0.0.0:8780" scheduler_api_key: null +env_url: "http://0.0.0.0:8088" # Code interpreter server URL # GPU settings num_gpus: 4 diff --git a/opentinker/client/client_config/math_inference_config.yaml b/opentinker/client/client_config/math_inference_config.yaml index d9379542..4398b020 100755 --- a/opentinker/client/client_config/math_inference_config.yaml +++ b/opentinker/client/client_config/math_inference_config.yaml @@ -20,7 +20,7 @@ output_path: null # Output results file (jsonl) max_samples: null # Limit samples (null = all) # Environment settings -env_endpoint: http://localhost:8088 +env_url: http://localhost:8088 # Multi-turn settings (same as training config) multi_turn: diff --git a/opentinker/client/client_config/math_inference_scheduler_config.yaml b/opentinker/client/client_config/math_inference_scheduler_config.yaml index fff17506..7cd50810 100755 --- a/opentinker/client/client_config/math_inference_scheduler_config.yaml +++ b/opentinker/client/client_config/math_inference_scheduler_config.yaml @@ -27,7 +27,7 @@ output_path: null # Output results file (jsonl) max_samples: null # Limit samples (null = all) # Environment settings -env_endpoint: http://0.0.0.0:8088 +env_url: http://0.0.0.0:8088 # Multi-turn settings (same as training config) multi_turn: diff --git a/opentinker/client/client_config/math_param.yaml b/opentinker/client/client_config/math_param.yaml index ad98617e..c32ae90f 100755 --- a/opentinker/client/client_config/math_param.yaml +++ b/opentinker/client/client_config/math_param.yaml @@ -57,9 +57,7 @@ interaction: name: math class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction config: - env_host: 0.0.0.0 - env_port: 8088 - env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port} + env_endpoint: ${env_url} # References top-level env_url max_steps: 1 # Max interaction steps multi_turn: @@ -69,9 +67,10 @@ multi_turn: weave_project: null experiment_name: "math_interaction" -# Scheduler settings +# Server settings scheduler_url: "http://0.0.0.0:8780" scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa +env_url: "http://0.0.0.0:8088" # Math environment server URL # GPU settings num_gpus: 4 diff --git a/opentinker/client/gomoku_inference.py b/opentinker/client/gomoku_inference.py index 9871025c..748e0317 100755 --- a/opentinker/client/gomoku_inference.py +++ b/opentinker/client/gomoku_inference.py @@ -47,12 +47,12 @@ def main(args): print(f"✓ Inference job {job_id} started at {vllm_server_url}") # 2. Setup GameStatsClient for per-step metrics (with job_id isolation) - game_stats = GameStatsClient(args.env_endpoint, job_id=job_id) + game_stats = GameStatsClient(args.env_url, job_id=job_id) if game_stats.health_check(): - print(f"✓ Connected to game server at {args.env_endpoint}") + print(f"✓ Connected to game server at {args.env_url}") game_stats.reset_all() # Reset stats for this job before inference else: - print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats") + print(f"⚠ Game server not available at {args.env_url}, continuing without stats") game_stats = None # 3. Run inference using the remote vLLM server @@ -67,7 +67,7 @@ def main(args): tokenizer_path=args.get("tokenizer_path") or args.model_path, data_path=args.get("data_path"), # None for dynamic generation game_class=GomokuGame, - env_endpoint=args.env_endpoint, + env_endpoint=args.env_url, job_id=job_id, # Pass job_id for stats isolation output_path=args.get("output_path"), temperature=args.temperature, diff --git a/opentinker/client/gomoku_rl.py b/opentinker/client/gomoku_rl.py index 6c02137a..731feb61 100755 --- a/opentinker/client/gomoku_rl.py +++ b/opentinker/client/gomoku_rl.py @@ -81,10 +81,11 @@ def main(args): "max_total_steps": interaction_config.get("max_total_steps", 40), } - env_endpoint = interaction_config.env_endpoint + # Use top-level env_url (with fallback to interaction.config.env_endpoint) + env_url = args.get("env_url") or interaction_config.get("env_endpoint") print("\nSetting up GameEnvironment with GomokuGame...") - print(f" Environment endpoint: {env_endpoint}") + print(f" Environment URL: {env_url}") print(f" Board size: {game_kwargs['board_size']}") print(f" Job ID for stats: {job_id}") @@ -99,12 +100,12 @@ def main(args): print(f" Interaction config path: {env.get_interaction_config_path()}") # 3. Setup GameStatsClient for per-step metrics (use env.job_id for consistency) - game_stats = GameStatsClient(env_endpoint, job_id=env.job_id) + game_stats = GameStatsClient(env_url, job_id=env.job_id) if game_stats.health_check(): - print(f"✓ Connected to game server for metrics at {env_endpoint}") + print(f"✓ Connected to game server for metrics at {env_url}") game_stats.reset_all() # Reset all stats before training else: - print(f"⚠ Game server at {env_endpoint} not responding - metrics disabled") + print(f"⚠ Game server at {env_url} not responding - metrics disabled") game_stats = None # 4. Connect to allocated server diff --git a/opentinker/client/math_inference.py b/opentinker/client/math_inference.py index 2da2bc90..6ebd3836 100755 --- a/opentinker/client/math_inference.py +++ b/opentinker/client/math_inference.py @@ -49,12 +49,12 @@ def main(args): print(f"✓ Inference job {job_id} started at {vllm_server_url}") # 2. Setup GameStatsClient for per-step metrics (with job_id isolation) - game_stats = GameStatsClient(args.env_endpoint, job_id=job_id) + game_stats = GameStatsClient(args.env_url, job_id=job_id) if game_stats.health_check(): - print(f"✓ Connected to game server at {args.env_endpoint}") + print(f"✓ Connected to game server at {args.env_url}") game_stats.reset_all() # Reset stats for this job before inference else: - print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats") + print(f"⚠ Game server not available at {args.env_url}, continuing without stats") game_stats = None # 3. Run inference using the remote vLLM server @@ -66,7 +66,7 @@ def main(args): tokenizer_path=args.get("tokenizer_path") or args.model_path, data_path=args.data_path, game_class=MathGame, - env_endpoint=args.env_endpoint, + env_endpoint=args.env_url, job_id=job_id, # Pass job_id for stats isolation output_path=args.get("output_path"), temperature=args.temperature, diff --git a/opentinker/client/math_rl.py b/opentinker/client/math_rl.py index 90edafe3..29809d1b 100755 --- a/opentinker/client/math_rl.py +++ b/opentinker/client/math_rl.py @@ -46,7 +46,8 @@ def main(args): print(f"✓ Job {job_id} allocated at {server_url}") # 2. Setup environment (job_id is automatically handled) - env_endpoint = args.interaction.config.env_endpoint + # Use top-level env_url (with fallback to interaction.config.env_endpoint) + env_url = args.get("env_url") or args.interaction.config.get("env_endpoint") env = MathGameEnvironment( game_class=MathGame, config=args, @@ -57,13 +58,13 @@ def main(args): print(f"✓ Environment created, interaction config: {env.get_interaction_config_path()}") # 3. Setup game stats client (use env.job_id for consistency) - game_stats = GameStatsClient(env_endpoint, job_id=env.job_id) + game_stats = GameStatsClient(env_url, job_id=env.job_id) if game_stats.health_check(): game_stats.reset_all() - print(f"✓ Connected to math server at {env_endpoint}") + print(f"✓ Connected to math server at {env_url}") else: game_stats = None - print(f"⚠ Math server not responding at {env_endpoint}") + print(f"⚠ Math server not responding at {env_url}") # 4. Connect to training server client = ServiceClient( diff --git a/opentinker/client/math_tool_inference.py b/opentinker/client/math_tool_inference.py index 07775ed1..4939c271 100755 --- a/opentinker/client/math_tool_inference.py +++ b/opentinker/client/math_tool_inference.py @@ -49,12 +49,12 @@ def main(args): print(f"✓ Inference job {job_id} started at {vllm_server_url}") # 2. Setup GameStatsClient for per-step metrics (with job_id isolation) - game_stats = GameStatsClient(args.env_endpoint, job_id=job_id) + game_stats = GameStatsClient(args.env_url, job_id=job_id) if game_stats.health_check(): - print(f"✓ Connected to code interpreter game server at {args.env_endpoint}") + print(f"✓ Connected to code interpreter game server at {args.env_url}") game_stats.reset_all() # Reset stats for this job before inference else: - print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats") + print(f"⚠ Game server not available at {args.env_url}, continuing without stats") print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port 8088") game_stats = None @@ -69,7 +69,7 @@ def main(args): tokenizer_path=args.get("tokenizer_path") or args.model_path, data_path=args.data_path, game_class=CodeInterpreterMathGame, - env_endpoint=args.env_endpoint, + env_endpoint=args.env_url, job_id=job_id, # Pass job_id for stats isolation output_path=args.get("output_path"), temperature=args.temperature, diff --git a/opentinker/client/math_tool_rl.py b/opentinker/client/math_tool_rl.py index 790ee007..b9cbecda 100755 --- a/opentinker/client/math_tool_rl.py +++ b/opentinker/client/math_tool_rl.py @@ -46,7 +46,8 @@ def main(args): # 2. Setup environment print("\n[2/4] Setting up environment...") - env_endpoint = args.interaction.config.env_endpoint + # Use top-level env_url (with fallback to interaction.config.env_endpoint) + env_url = args.get("env_url") or args.interaction.config.get("env_endpoint") env = MathCodeInterpreterEnvironment( game_class=CodeInterpreterMathGame, config=args, @@ -56,18 +57,18 @@ def main(args): ) print(f"✓ Environment created") print(f" - Interaction config: {env.get_interaction_config_path()}") - print(f" - Game server endpoint: {env_endpoint}") + print(f" - Game server endpoint: {env_url}") # 3. Setup game stats client print("\n[3/4] Connecting to game server...") - game_stats = GameStatsClient(env_endpoint, job_id=env.job_id) + game_stats = GameStatsClient(env_url, job_id=env.job_id) if game_stats.health_check(): game_stats.reset_all() - print(f"✓ Connected to game server at {env_endpoint}") + print(f"✓ Connected to game server at {env_url}") else: game_stats = None - print(f"⚠ Game server not responding at {env_endpoint}") - print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port {args.interaction.config.env_port}") + print(f"⚠ Game server not responding at {env_url}") + print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py") # 4. Connect to training server and train print("\n[4/4] Starting training...")