diff --git a/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml b/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml index 4f727acc839..9703378f390 100644 --- a/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml +++ b/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 500 diff --git a/configs/env/mettagrid/navigation/evals/honeypot.yaml b/configs/env/mettagrid/navigation/evals/honeypot.yaml index 12211189c7f..13db79b30cb 100644 --- a/configs/env/mettagrid/navigation/evals/honeypot.yaml +++ b/configs/env/mettagrid/navigation/evals/honeypot.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 300 diff --git a/configs/env/mettagrid/navigation/evals/knotty.yaml b/configs/env/mettagrid/navigation/evals/knotty.yaml index 078042a54f9..16c250cdcb9 100644 --- a/configs/env/mettagrid/navigation/evals/knotty.yaml +++ b/configs/env/mettagrid/navigation/evals/knotty.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 800 diff --git a/configs/env/mettagrid/navigation/evals/memory_palace.yaml b/configs/env/mettagrid/navigation/evals/memory_palace.yaml index 4132c62e918..9a93dc8d63e 100644 --- a/configs/env/mettagrid/navigation/evals/memory_palace.yaml +++ b/configs/env/mettagrid/navigation/evals/memory_palace.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 300 diff --git a/configs/env/mettagrid/navigation/evals/radial_large.yaml b/configs/env/mettagrid/navigation/evals/radial_large.yaml index 648fda89ca8..946d931a60a 100644 --- a/configs/env/mettagrid/navigation/evals/radial_large.yaml +++ b/configs/env/mettagrid/navigation/evals/radial_large.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 1000 diff --git a/configs/env/mettagrid/navigation/evals/radial_mini.yaml b/configs/env/mettagrid/navigation/evals/radial_mini.yaml index 6d42a085b0d..4192bedef2b 100644 --- a/configs/env/mettagrid/navigation/evals/radial_mini.yaml +++ b/configs/env/mettagrid/navigation/evals/radial_mini.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 300 diff --git a/configs/env/mettagrid/navigation/evals/radial_small.yaml b/configs/env/mettagrid/navigation/evals/radial_small.yaml index 63f8e52a6be..3e16015b48c 100644 --- a/configs/env/mettagrid/navigation/evals/radial_small.yaml +++ b/configs/env/mettagrid/navigation/evals/radial_small.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 200 diff --git a/configs/env/mettagrid/navigation/evals/swirls.yaml b/configs/env/mettagrid/navigation/evals/swirls.yaml index cfd370dd67b..08a115f8a57 100644 --- a/configs/env/mettagrid/navigation/evals/swirls.yaml +++ b/configs/env/mettagrid/navigation/evals/swirls.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 500 diff --git a/configs/env/mettagrid/navigation/evals/thecube.yaml b/configs/env/mettagrid/navigation/evals/thecube.yaml index 5187db05891..bba990c1834 100644 --- a/configs/env/mettagrid/navigation/evals/thecube.yaml +++ b/configs/env/mettagrid/navigation/evals/thecube.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 500 diff --git a/configs/env/mettagrid/navigation/evals/walkaround.yaml b/configs/env/mettagrid/navigation/evals/walkaround.yaml index 943f03ba2ea..a321d3c95b1 100644 --- a/configs/env/mettagrid/navigation/evals/walkaround.yaml +++ b/configs/env/mettagrid/navigation/evals/walkaround.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 400 diff --git a/configs/env/mettagrid/navigation/evals/wanderout.yaml b/configs/env/mettagrid/navigation/evals/wanderout.yaml index 304080cc6e2..0b30a0642c3 100644 --- a/configs/env/mettagrid/navigation/evals/wanderout.yaml +++ b/configs/env/mettagrid/navigation/evals/wanderout.yaml @@ -1,6 +1,7 @@ defaults: - /env/mettagrid/mettagrid@ - + - _self_ + game: num_agents: 20 #how many agents are in the map x2 max_steps: 800 diff --git a/devops/add_to_leaderboard.sh b/devops/add_to_leaderboard.sh new file mode 100755 index 00000000000..05a2590ac75 --- /dev/null +++ b/devops/add_to_leaderboard.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Usage function for better help messages +usage() { + echo "Usage: $0 -r RUN_NAME [-w WANDB_PATH] [additional Hydra overrides]" + echo " -r RUN_NAME Your run name (e.g., b.$USER.test_run)" + echo " -w WANDB_PATH Optional: Full wandb path if different from auto-generated" + echo "" + echo " Any additional arguments will be passed directly to the Python commands" + echo " Example: $0 -r b.$USER.test_run +hardware=macbook" + exit 1 +} + +# Initialize variables +RUN_NAME="" +WANDB_PATH="" +ADDITIONAL_ARGS="" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -r|--run) + RUN_NAME="$2" + shift 2 + ;; + -w|--wandb) + WANDB_PATH="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + # Collect additional arguments + ADDITIONAL_ARGS="$ADDITIONAL_ARGS $1" + shift + ;; + esac +done + +# Check if run name is provided +if [ -z "$RUN_NAME" ]; then + echo "Error: Run name is required" + usage +fi + +# Auto-generate wandb path if not provided +if [ -z "$WANDB_PATH" ]; then + WANDB_PATH="wandb://run/$RUN_NAME" +fi + +echo "Adding policy to eval leaderboard with run name: $RUN_NAME" +echo "Using policy URI: $WANDB_PATH" +if [ ! -z "$ADDITIONAL_ARGS" ]; then + echo "Additional arguments: $ADDITIONAL_ARGS" +fi + +# Step 1: Verifying policy exists on wandb +echo "Step 1: Verifying policy exists on wandb..." +# Add a check here if needed to verify the policy exists on wandb + +# Step 2: Run the simulation +echo "Step 2: Running simulation..." +SIM_CMD="python3 -m tools.sim sim=navigation run=\"$RUN_NAME\" policy_uri=\"$WANDB_PATH\" +eval_db_uri=wandb://artifacts/navigation_db $ADDITIONAL_ARGS" +echo "Executing: $SIM_CMD" +eval $SIM_CMD + +# Check if the sim was successful +if [ $? -ne 0 ]; then + echo "Error: Simulation failed. Exiting." + exit 1 +fi + +# Step 3: Analyze and update dashboard +echo "Step 3: Analyzing results and updating dashboard..." +ANALYZE_CMD="python3 -m tools.analyze run=analyze +eval_db_uri=wandb://artifacts/navigation_db analyzer.output_path=s3://softmax-public/policydash/dashboard.html +analyzer.num_output_policies=\"all\" $ADDITIONAL_ARGS" +echo "Executing: $ANALYZE_CMD" +eval $ANALYZE_CMD + +if [ $? -ne 0 ]; then + echo "Error: Analysis failed. Exiting." + exit 1 +fi + +echo "Successfully added policy to leaderboard and updated dashboard!" +echo "Dashboard URL: https://softmax-public.s3.amazonaws.com/policydash/dashboard.html" \ No newline at end of file diff --git a/devops/build_mettagrid.sh b/devops/build_mettagrid.sh new file mode 100755 index 00000000000..59d1c9d6204 --- /dev/null +++ b/devops/build_mettagrid.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# This script rebuilds mettagrid without rebuilding other dependencies + +# Exit immediately if a command exits with a non-zero status +set -e + +# Parse command line arguments +CLEAN=0 +for arg in "$@"; do + case $arg in + --clean) + CLEAN=1 + shift + ;; + esac +done + +# Display appropriate header based on clean flag +if [ "$CLEAN" -eq 1 ]; then + echo "========== Rebuilding mettagrid (clean) ==========" +else + echo "========== Rebuilding mettagrid ==========" +fi + +# Get the directory where this script is located +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" + +# Go to the project root directory +cd "$SCRIPT_DIR/.." + +# Check if deps/mettagrid exists +if [ ! -d "deps/mettagrid" ]; then + echo "Error: mettagrid directory not found at deps/mettagrid" + echo "Make sure you have run the full dependency installation script first." + exit 1 +fi + +# Navigate to mettagrid directory +cd deps/mettagrid + +echo "Building mettagrid in $(pwd)" + +# Clean build artifacts only if --clean flag is specified +if [ "$CLEAN" -eq 1 ]; then + echo "Cleaning previous build artifacts..." + rm -rf build + find . -name "*.so" -delete + echo "Clean completed." +else + echo "Skipping clean (use --clean to remove previous build artifacts)" +fi + +# Rebuild mettagrid +echo "Rebuilding mettagrid..." +python setup.py build_ext --inplace + +# Reinstall in development mode +echo "Reinstalling mettagrid in development mode..." +pip install -e . + +echo "========== mettagrid rebuild complete ==========" \ No newline at end of file diff --git a/devops/checkout_and_build.sh b/devops/checkout_and_build.sh index c2346e5421e..17f16c0b447 100755 --- a/devops/checkout_and_build.sh +++ b/devops/checkout_and_build.sh @@ -118,12 +118,11 @@ mkdir -p deps cd deps # ========== METTAGRID ========== -# Note that version control for the mettagrid package has been brought into our monorepo -cd mettagrid -echo "Building mettagrid into $(pwd)" -python setup.py build_ext --inplace -pip install -e . -cd .. +# Call the dedicated build_mettagrid.sh script instead of building directly +echo "Building mettagrid using devops/build_mettagrid.sh" +cd .. # Go back to project root +devops/build_mettagrid.sh +cd deps # Return to deps directory for remaining dependencies # Install dependencies using the function install_repo "fast_gae" $FAST_GAE_REPO "main" "python setup.py build_ext --inplace && pip install -e ." diff --git a/metta/agent/policy_store.py b/metta/agent/policy_store.py index 6eab10516d6..f9a9c16e4b0 100644 --- a/metta/agent/policy_store.py +++ b/metta/agent/policy_store.py @@ -93,7 +93,6 @@ def _policy_records(self, uri, selector_type="top", n=1, metric: str = "score"): prs = self._prs_from_wandb_sweep(sweep_name, version) else: prs = self._prs_from_wandb_artifact(wandb_uri, version) - elif uri.startswith("file://"): prs = self._prs_from_path(uri[len("file://") :]) elif uri.startswith("puffer://"): @@ -104,42 +103,75 @@ def _policy_records(self, uri, selector_type="top", n=1, metric: str = "score"): if len(prs) == 0: raise ValueError(f"No policies found at {uri}") + logger.info(f"Found {len(prs)} policies at {uri}") + if selector_type == "all": + logger.info(f"Returning all {len(prs)} policies") return prs - elif selector_type == "latest": - return [prs[0]] - + selected = [prs[0]] + logger.info(f"Selected latest policy: {selected[0].name}") + return selected elif selector_type == "rand": - return [random.choice(prs)] - + selected = [random.choice(prs)] + logger.info(f"Selected random policy: {selected[0].name}") + return selected elif selector_type == "top": - if metric not in prs[0].metadata: - # check if the metric is in eval_scores - if "eval_scores" in prs[0].metadata and metric in prs[0].metadata["eval_scores"]: - policy_scores = {p: p.metadata["eval_scores"].get(metric, None) for p in prs} - else: - logger.warning(f"Metric {metric} not found in policy metadata, returning latest policy") - return [prs[0]] # - else: + if ( + "eval_scores" in prs[0].metadata + and prs[0].metadata["eval_scores"] is not None + and metric in prs[0].metadata["eval_scores"] + ): + # Metric is in eval_scores + logger.info(f"Found metric '{metric}' in metadata['eval_scores']") + policy_scores = {p: p.metadata.get("eval_scores", {}).get(metric, None) for p in prs} + elif metric in prs[0].metadata: + # Metric is directly in metadata + logger.info(f"Found metric '{metric}' directly in metadata") policy_scores = {p: p.metadata.get(metric, None) for p in prs} + else: + # Metric not found anywhere + logger.warning( + f"Metric '{metric}' not found in policy metadata or eval_scores, returning latest policy" + ) + selected = [prs[0]] + logger.info(f"Selected latest policy (due to missing metric): {selected[0].name}") + return selected policies_with_scores = [p for p, s in policy_scores.items() if s is not None] + # If more than 20% of the policies have no score, return the latest policy if len(policies_with_scores) < len(prs) * 0.8: logger.warning("Too many invalid scores, returning latest policy") - return [prs[0]] # return latest if metric not found - top = sorted(policies_with_scores, key=lambda p: policy_scores[p])[-n:] + selected = [prs[0]] # return latest if metric not found + logger.info(f"Selected latest policy (due to too many invalid scores): {selected[0].name}") + return selected + + # Sort by metric score (assuming higher is better) + def get_policy_score(policy: PolicyRecord) -> float: # Explicitly return a comparable type + score = policy_scores.get(policy) + if score is None: + return float("-inf") # Or another appropriate default + return score + + top = sorted(policies_with_scores, key=get_policy_score)[-n:] + if len(top) < n: logger.warning(f"Only found {len(top)} policies matching criteria, requested {n}") - logger.info(f"Top {n} policies by {metric}:") + logger.info(f"Top {len(top)} policies by {metric}:") logger.info(f"{'Policy':<40} | {metric:<20}") logger.info("-" * 62) for pr in top: - logger.info(f"{pr.name:<40} | {pr.metadata.get(metric, 0):<20.4f}") + score = policy_scores[pr] + logger.info(f"{pr.name:<40} | {score:<20.4f}") + + selected = top[-n:] + logger.info(f"Selected {len(selected)} top policies by {metric}") + for i, pr in enumerate(selected): + logger.info(f" {i + 1}. {pr.name} (score: {policy_scores[pr]:.4f})") - return top[-n:] + return selected else: raise ValueError(f"Invalid selector type {selector_type}") @@ -180,10 +212,16 @@ def save(self, name: str, path: str, policy: nn.Module, metadata: dict): return pr def add_to_wandb_run(self, run_id: str, pr: PolicyRecord, additional_files=None): - return self.add_to_wandb_artifact(run_id, "model", pr.metadata, pr.local_path(), additional_files) + local_path = pr.local_path() + if local_path is None: + raise ValueError("PolicyRecord has no local path") + return self.add_to_wandb_artifact(run_id, "model", pr.metadata, local_path, additional_files) def add_to_wandb_sweep(self, sweep_name: str, pr: PolicyRecord, additional_files=None): - return self.add_to_wandb_artifact(sweep_name, "sweep_model", pr.metadata, pr.local_path(), additional_files) + local_path = pr.local_path() + if local_path is None: + raise ValueError("PolicyRecord has no local path") + return self.add_to_wandb_artifact(sweep_name, "sweep_model", pr.metadata, local_path, additional_files) def add_to_wandb_artifact(self, name: str, type: str, metadata: dict, local_path: str, additional_files=None): if self._wandb_run is None: diff --git a/metta/rl/pufferlib/trainer.py b/metta/rl/pufferlib/trainer.py index 5aef3a4c02f..d5991f24252 100644 --- a/metta/rl/pufferlib/trainer.py +++ b/metta/rl/pufferlib/trainer.py @@ -74,7 +74,7 @@ def __init__( self.eval_stats_logger = EvalStatsLogger(self.sim_suite_config, wandb_run) self.average_reward = 0.0 # Initialize average reward estimate self._current_eval_score = None - self.eval_scores = None + self.eval_scores = {} self._eval_results = [] self._weights_helper = WeightsMetricsHelper(cfg) self._make_vecenv()