From fc150e30cf16aa1b528c52173e8d37e354b0d297 Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Thu, 25 Jun 2026 02:17:42 +0200 Subject: [PATCH 1/6] Add scene package cooking pipeline --- data/.lfs/dimos_office_mesh.tar.gz | 3 + dimos/experimental/pimsim/scene/README.md | 312 +++++ .../pimsim/scene/browser_collision.py | 249 ++++ dimos/experimental/pimsim/scene/cook.py | 330 ++++++ .../pimsim/scene/entity_collision.py | 141 +++ dimos/experimental/pimsim/scene/inspect.py | 185 +++ dimos/experimental/pimsim/scene/plan.py | 341 ++++++ dimos/experimental/pimsim/scene/sidecar.py | 189 ++++ .../pimsim/scene/test_entity_collision.py | 67 ++ dimos/experimental/pimsim/scene/test_spec.py | 324 ++++++ .../pimsim/scene/visual_blender.py | 313 +++++ dimos/experimental/pimsim/scene/visual_glb.py | 363 ++++++ dimos/simulation/mujoco/collision_spec.py | 968 ++++++++++++++++ dimos/simulation/mujoco/scene_mesh_to_mjcf.py | 1003 +++++++++++++++++ .../simulation/mujoco/test_collision_spec.py | 65 ++ dimos/simulation/scene_assets/mesh_scene.py | 710 ++++++++++++ pyproject.toml | 19 +- uv.lock | 109 +- 18 files changed, 5663 insertions(+), 28 deletions(-) create mode 100644 data/.lfs/dimos_office_mesh.tar.gz create mode 100644 dimos/experimental/pimsim/scene/README.md create mode 100644 dimos/experimental/pimsim/scene/browser_collision.py create mode 100644 dimos/experimental/pimsim/scene/cook.py create mode 100644 dimos/experimental/pimsim/scene/entity_collision.py create mode 100644 dimos/experimental/pimsim/scene/inspect.py create mode 100644 dimos/experimental/pimsim/scene/plan.py create mode 100644 dimos/experimental/pimsim/scene/sidecar.py create mode 100644 dimos/experimental/pimsim/scene/test_entity_collision.py create mode 100644 dimos/experimental/pimsim/scene/test_spec.py create mode 100644 dimos/experimental/pimsim/scene/visual_blender.py create mode 100644 dimos/experimental/pimsim/scene/visual_glb.py create mode 100644 dimos/simulation/mujoco/collision_spec.py create mode 100644 dimos/simulation/mujoco/scene_mesh_to_mjcf.py create mode 100644 dimos/simulation/mujoco/test_collision_spec.py create mode 100644 dimos/simulation/scene_assets/mesh_scene.py diff --git a/data/.lfs/dimos_office_mesh.tar.gz b/data/.lfs/dimos_office_mesh.tar.gz new file mode 100644 index 0000000000..d9a02c20b2 --- /dev/null +++ b/data/.lfs/dimos_office_mesh.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7725fd12a8f88e8f757478d3afe9a06938a21d718737c80b9655c9725813c1bf +size 318485597 diff --git a/dimos/experimental/pimsim/scene/README.md b/dimos/experimental/pimsim/scene/README.md new file mode 100644 index 0000000000..a2e52079cb --- /dev/null +++ b/dimos/experimental/pimsim/scene/README.md @@ -0,0 +1,312 @@ +# Scene Packages + +A scene package is the cooked, robot-agnostic form of a 3D environment. It +contains the visual mesh, collision geometry, per-object metadata, a scene-only +MuJoCo wrapper, and optional runtime entities. + +The runtime rule is simple: + +```text +source asset + sidecar -> cooked scene package -> simulator attaches any robot +``` + +The robot is not part of the package. The cooker prepares the world once, +offline. `MujocoSimModule` loads the cooked world at runtime and attaches the +robot MJCF into the same `MjSpec`. + +## Package Layout + +```text +data/scene_packages// +├── scene.meta.json manifest: alignment, artifact paths, entities, stats +├── mujoco// +│ ├── wrapper.xml scene-only MJCF, no robot +│ └── *.obj static collision assets +├── entities// +│ ├── visual.glb per-entity visual, in entity-local frame +│ └── mujoco_collision/ cook-time convex hulls +└── browser/ + ├── visual.glb + ├── collision.glb + └── objects.json semantic object table for browser/raycast users +``` + +Packages are content-hash keyed on the source mesh, alignment, sidecar, and cook +schema version. Changing one of those inputs creates a new cooked output. + +## Spec And Backends + +`dimos/simulation/scene_assets/spec.py` is the shared scene-package contract. It +defines: + +- cook input specs such as `SceneCookSpec`, `BrowserVisualSpec`, + `BrowserCollisionSpec`, and `MujocoSceneSpec`; +- the runtime `ScenePackage` object; +- the on-disk `scene.meta.json` shape; +- `load_scene_package()`, which resolves package-relative artifact paths and + validates artifact frame metadata. + +This spec is intentionally general. A module or simulator that wants to consume +a scene package should not parse `scene.meta.json` by hand. It should load the +package once: + +```python +from dimos.simulation.scene_assets.spec import load_scene_package + +package = load_scene_package("data/scene_packages/dimos_office/scene.meta.json") +``` + +and then consume the artifact it understands: + +- browser/viewer systems use `package.visual_path`, `package.browser_collision_path`, + and `package.objects_path`; +- MuJoCo uses `package.mujoco_scene_path` plus `package.entities`; +- future simulators can add their own artifact fields/specs without changing the + source mesh loader or the scene-name catalog. + +`dimos/simulation/scene_assets/mesh_scene.py` is a different layer. It loads raw +source geometry and applies `SceneMeshAlignment`; it is used by cook-time tools, +not by normal runtime modules. Runtime modules should prefer the cooked package +contract from `spec.py`. + +`dimos/simulation/scenes/catalog.py` is only name/path resolution. It maps user +inputs like `--scene office` to a loaded `ScenePackage`. It does not define what +a package is, and it should not know how MuJoCo, browser viewers, or other +systems load their artifacts. + +## Workflow + +1. Inspect the source asset and identify the prims that need authored collision. +2. Write `.cook.json` next to the source mesh. +3. Cook the package. +4. Verify the generated `mujoco//wrapper.xml`. +5. Load it through a blueprint with `--scene`. + +The DimOS office scene is the reference example below. + +## Office Example + +The office source mesh lives in data: + +```text +data/dimos_office_mesh/dimos_office_mesh.glb +``` + +That file is a visual asset, not a physics contract. Before writing the sidecar, +inspect the scene graph and bounds of the source prims after applying the same +alignment used by the cooker: + +```bash +python - <<'PY' +from pathlib import Path + +import numpy as np + +from dimos.simulation.scene_assets.mesh_scene import ( + SceneMeshAlignment, + load_scene_prims, +) + +source = Path("data/dimos_office_mesh/dimos_office_mesh.glb") +alignment = SceneMeshAlignment(scale=2.0, y_up=False) + +for prim in load_scene_prims(source, alignment=alignment): + name = prim.visual_node_name or prim.prim_path or prim.name + if "Floor" not in name: + continue + lo = np.min(prim.vertices, axis=0) + hi = np.max(prim.vertices, axis=0) + extent = hi - lo + print(f"{name}: min={lo.round(4).tolist()} max={hi.round(4).tolist()} extent={extent.round(4).tolist()}") +PY +``` + +For the current office GLB, the relevant support prim is `Floor_Plane.002`. Its +Z extent is effectively zero: it is a visual sheet. If that sheet is cooked into +a very thin MuJoCo box, humanoid foot contacts can clip through it during walking +even though the robot can stand still on it. + +Do not solve that by adding an infinite MuJoCo plane. That ignores the authored +scene geometry and breaks as soon as floors have different heights, holes, +stairs, ramps, platforms, or multiple stories. The sidecar should instead state +the authored physics intent for the named support surface. + +Create or edit: + +```text +data/dimos_office_mesh/dimos_office_mesh.cook.json +``` + +with the floor override: + +```json +{ + "collision": { + "default": "auto", + "prim_overrides": { + "Floor*": { + "type": "box", + "min_thickness": 0.04, + "preserve": "top" + } + } + } +} +``` + +Reasoning: + +- `Floor*` matches `Floor_Plane.002` without depending on the sanitized MJCF + name. +- `type: "box"` says the floor is a support slab, not a raw visual mesh. +- `min_thickness: 0.04` gives the support 4 cm of total thickness. +- `preserve: "top"` keeps the walkable surface at the authored visual height and + expands the slab downward. + +Cook the office package: + +```bash +python -m dimos.experimental.pimsim.scene.cook \ + data/dimos_office_mesh/dimos_office_mesh.glb \ + --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ + --output-dir data/scene_packages/dimos_office \ + --scale 2.0 \ + --no-y-up \ + --rebake +``` + +Verify the generated MuJoCo wrapper: + +```bash +rg 'Floor.*_col' data/scene_packages/dimos_office/mujoco -g wrapper.xml +``` + +For a floor whose top is at `z=0`, the cooked geom should have half-thickness +`0.02` and center `z=-0.02`, for example: + +```xml + +``` + +The important invariant is top height unchanged, bottom expanded downward: + +```text +top_z = pos_z + size_z +bottom_z = pos_z - size_z +``` + +## Loading In MuJoCo + +The G1 GR00T WBC blueprint already uses the scene package path when `--scene` +is provided: + +```bash +python -m dimos.robot.cli.dimos \ + --simulation mujoco \ + --scene office \ + --viewer rerun \ + run unitree-g1-groot-wbc +``` + +`office` resolves through `dimos/simulation/scenes/catalog.py` to: + +```text +data/scene_packages/dimos_office/scene.meta.json +``` + +At runtime, `MujocoSimModule`: + +1. receives a `ScenePackage` resolved by `catalog.py`; +2. reads `package.mujoco_scene_path`, which points at + `mujoco//wrapper.xml`; +3. loads the robot-only G1 MJCF; +4. attaches the robot into the scene `MjSpec`; +5. adds `package.entities`; +6. compiles one MuJoCo model. + +The robot MJCF must stay robot-only: no office floor, no scene walls, no +furniture, no manipulation rig. Scene geometry belongs in the cooked package. + +## Sidecar Schema + +`.cook.json` can contain static-collision policy and interactables: + +```json +{ + "collision": { + "default": "auto", + "prim_overrides": { + "Floor*": {"type": "box", "min_thickness": 0.04, "preserve": "top"}, + "Wall_*": {"type": "box"}, + "Stairs_*": {"type": "decompose", "max_hulls": 16} + } + }, + "interactables": [ + { + "id": "chair_000", + "source_prim_paths": ["Chair_000_*"], + "mass": 8.0, + "physics": {"shape": "mesh"}, + "tags": ["chair"] + } + ] +} +``` + +Static collision types are: + +```text +auto | box | sphere | cylinder | capsule | plane | hull | mesh | decompose | skip +``` + +For box overrides, `min_thickness` is full world-Z thickness in meters. +`preserve` can be `top`, `bottom`, or `center`. + +Interactables become MuJoCo bodies named `entity:`. They can be: + +- extracted from the source mesh with `source_prim_paths`; +- synthetic primitives with an authored `pose` and `physics.extents`; +- static, kinematic, or dynamic depending on `kind` and `mass`. + +## Data Publishing + +Scene source meshes and cooked packages live under `data/`, which is ignored by +normal git. Do not add these artifacts with ordinary `git add`. Publish or update +them through the repository LFS bin workflow described in: + +```text +docs/development/large_file_management.md +``` + +Code and docs changes can go through normal git. Data changes such as +`data/dimos_office_mesh/dimos_office_mesh.cook.json` and +`data/scene_packages/dimos_office` should be handled through the LFS script when +we are ready to ship them. + +## Why Bake + +MuJoCo treats a mesh geom as convex for collision. A raw concave building mesh is +therefore the wrong collision representation: it becomes a coarse blob or a bad +support surface. The cooker turns source prims into MuJoCo-friendly collision: +primitives where the shape is obvious, hulls where they fit, and CoACD +decompositions for concave objects. + +The sidecar is the place for source-specific knowledge. It keeps the cook +deterministic and reviewable instead of relying on broad heuristics that may fail +on multi-story buildings or unusual floor geometry. + +## Reference + +| File | Role | +|---|---| +| `dimos/experimental/pimsim/scene/cook.py` | cook entry point and CLI | +| `dimos/experimental/pimsim/scene/sidecar.py` | `.cook.json` schema | +| `dimos/experimental/pimsim/scene/plan.py` | sidecar to cook plan | +| `dimos/experimental/pimsim/scene/inspect.py` | source asset statistics | +| `dimos/simulation/scene_assets/spec.py` | shared scene-package metadata contract | +| `dimos/simulation/scene_assets/mesh_scene.py` | source mesh loading and prim inspection | +| `dimos/simulation/mujoco/scene_mesh_to_mjcf.py` | MuJoCo scene bake | +| `dimos/simulation/mujoco/collision_spec.py` | static collision policy | +| `dimos/simulation/mujoco/entity_scene.py` | runtime entity composition | +| `dimos/simulation/scenes/catalog.py` | scene name/path resolution | +| `dimos/simulation/engines/mujoco_sim_module.py` | runtime scene + robot composition | diff --git a/dimos/experimental/pimsim/scene/browser_collision.py b/dimos/experimental/pimsim/scene/browser_collision.py new file mode 100644 index 0000000000..0b7e9f35dd --- /dev/null +++ b/dimos/experimental/pimsim/scene/browser_collision.py @@ -0,0 +1,249 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bake browser-side collision geometry from a scene asset.""" + +from __future__ import annotations + +from dataclasses import dataclass +import json +from pathlib import Path +from typing import Any + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import trimesh + +from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset +from dimos.simulation.mujoco.collision_spec import CollisionSpec +from dimos.simulation.scene_assets.mesh_scene import ( + SceneMeshAlignment, + ScenePrimMesh, + load_scene_prims, + split_disconnected_scene_prims, +) +from dimos.simulation.scene_assets.spec import BrowserCollisionSpec +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +OBJECTS_SIDECAR_NAME = "objects.json" + + +@dataclass(frozen=True) +class BrowserCollisionCookResult: + path: Path + stats: dict[str, Any] + objects_path: Path | None = None + + +def cook_browser_collision( + source_path: str | Path, + output_dir: str | Path, + *, + alignment: SceneMeshAlignment | None = None, + spec: BrowserCollisionSpec | None = None, + collision_spec: CollisionSpec | None = None, + rebake: bool = False, +) -> BrowserCollisionCookResult | None: + """Write a simplified GLB used for browser picking/raycast/physics. + + For scene packages this should stay in source-asset coordinates; the + browser applies the package alignment to the visual and collision roots + together. + """ + browser_spec = spec or BrowserCollisionSpec() + if not browser_spec.enabled: + return None + + source = Path(source_path).expanduser().resolve() + out_dir = Path(output_dir).expanduser().resolve() + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / browser_spec.output_name + objects_path = out_dir / OBJECTS_SIDECAR_NAME + + mesh_cached = out_path.exists() and not rebake + objects_cached = objects_path.exists() and not rebake + if mesh_cached and objects_cached: + return BrowserCollisionCookResult( + path=out_path, + stats=inspect_scene_asset(out_path).to_json_dict(), + objects_path=objects_path, + ) + + prims = _load_collision_prims(source, alignment=alignment, collision_spec=collision_spec) + stats: dict[str, Any] + if mesh_cached: + stats = inspect_scene_asset(out_path).to_json_dict() + else: + mesh = _build_fused_collision_mesh( + prims, collision_spec or CollisionSpec.auto_discover(source) + ) + original_triangles = len(mesh.triangles) + target_faces = int(browser_spec.target_faces) + if target_faces > 0 and original_triangles > target_faces: + logger.info( + "browser collision: simplifying %s triangles -> %s", + original_triangles, + target_faces, + ) + mesh = mesh.simplify_quadric_decimation(target_number_of_triangles=target_faces) + mesh.remove_degenerate_triangles() + mesh.remove_duplicated_triangles() + mesh.remove_duplicated_vertices() + mesh.remove_non_manifold_edges() + _write_glb(mesh, out_path) + stats = inspect_scene_asset(out_path).to_json_dict() + stats["source_triangles"] = original_triangles + stats["target_faces"] = target_faces + + objects = extract_scene_objects(prims) + if not objects_cached: + _write_objects_json(objects_path, objects) + stats["objects"] = len(objects) + return BrowserCollisionCookResult(path=out_path, stats=stats, objects_path=objects_path) + + +def _write_glb(mesh: o3d.geometry.TriangleMesh, path: Path) -> None: + vertices = np.asarray(mesh.vertices, dtype=np.float64) + faces = np.asarray(mesh.triangles, dtype=np.int64) + if len(vertices) == 0 or len(faces) == 0: + raise RuntimeError("browser collision bake produced an empty mesh") + trimesh.Trimesh(vertices=vertices, faces=faces, process=False).export(str(path)) + + +def _load_collision_prims( + source: Path, + *, + alignment: SceneMeshAlignment | None, + collision_spec: CollisionSpec | None, +) -> list[ScenePrimMesh]: + spec = collision_spec or CollisionSpec.auto_discover(source) + source_alignment = alignment or SceneMeshAlignment(y_up=False) + + prims = load_scene_prims(source, alignment=source_alignment) + if spec.split_disconnected_components: + prims, split_stats = split_disconnected_scene_prims( + prims, + min_components=spec.split_min_components, + extent_ratio=spec.split_extent_ratio, + prim_min_extent=spec.split_prim_min_extent_m, + axis_ratio=spec.split_axis_ratio, + min_component_extent=spec.split_component_min_extent_m, + min_component_faces=spec.split_component_min_faces, + can_split=lambda prim: ( + spec.resolve(prim.prim_path or prim.name).get("type", spec.default) == "auto" + ), + ) + if split_stats["split_prims"]: + logger.info( + "browser collision: split %s disconnected prims into %s kept " + "components; dropped %s tiny components", + split_stats["split_prims"], + split_stats["emitted_components"], + split_stats["dropped_components"], + ) + return prims + + +def _build_fused_collision_mesh( + prims: list[ScenePrimMesh], + spec: CollisionSpec, +) -> o3d.geometry.TriangleMesh: + vertices: list[np.ndarray] = [] + faces: list[np.ndarray] = [] + vertex_offset = 0 + for prim in prims: + mesh = _mesh_for_prim(prim, spec) + if mesh is None: + continue + prim_vertices = np.asarray(mesh.vertices, dtype=np.float64) + prim_faces = np.asarray(mesh.triangles, dtype=np.int64) + if len(prim_vertices) == 0 or len(prim_faces) == 0: + continue + vertices.append(prim_vertices) + faces.append(prim_faces + vertex_offset) + vertex_offset += len(prim_vertices) + if not vertices: + raise RuntimeError("browser collision sidecar skipped every prim") + return _mesh_from_arrays(np.concatenate(vertices, axis=0), np.concatenate(faces, axis=0)) + + +def extract_scene_objects(prims: list[ScenePrimMesh]) -> list[dict[str, Any]]: + """Per-prim semantic metadata (id, prim_path, AABB in source frame). + + Emitted independently of the fused collision GLB so the runtime can + answer ``findAsset("sectional")``-style queries without paying a + per-object PhysicsAggregate cost. AABB shares the collision GLB + frame (source / z-up after alignment). + """ + objects: list[dict[str, Any]] = [] + for prim in prims: + v = np.asarray(prim.vertices, dtype=np.float64) + if v.size == 0: + continue + objects.append( + { + "id": prim.name, + "prim_path": prim.prim_path, + "aabb": { + "min": v.min(axis=0).tolist(), + "max": v.max(axis=0).tolist(), + }, + } + ) + return objects + + +def _write_objects_json(path: Path, objects: list[dict[str, Any]]) -> None: + payload = {"frame": "source", "objects": objects} + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n") + + +def _mesh_for_prim( + prim: ScenePrimMesh, + spec: CollisionSpec, +) -> o3d.geometry.TriangleMesh | None: + override = spec.resolve(prim.prim_path or prim.name) + override_type = override.get("type", spec.default) + if override_type == "skip": + return None + + mesh = _mesh_from_arrays( + prim.vertices.astype(np.float64), + prim.triangles.astype(np.int64), + ) + target_faces = int(override.get("target_faces") or 0) + if target_faces > 0 and len(mesh.triangles) > target_faces: + mesh = mesh.simplify_quadric_decimation(target_number_of_triangles=target_faces) + mesh.remove_degenerate_triangles() + mesh.remove_duplicated_triangles() + mesh.remove_duplicated_vertices() + mesh.remove_non_manifold_edges() + return mesh + + +def _mesh_from_arrays(vertices: np.ndarray, faces: np.ndarray) -> o3d.geometry.TriangleMesh: + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(vertices) + mesh.triangles = o3d.utility.Vector3iVector(faces.astype(np.int32)) + return mesh + + +__all__ = [ + "OBJECTS_SIDECAR_NAME", + "BrowserCollisionCookResult", + "cook_browser_collision", + "extract_scene_objects", +] diff --git a/dimos/experimental/pimsim/scene/cook.py b/dimos/experimental/pimsim/scene/cook.py new file mode 100644 index 0000000000..58eeacf898 --- /dev/null +++ b/dimos/experimental/pimsim/scene/cook.py @@ -0,0 +1,330 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Offline scene package cooker. + +This is intentionally not a DimOS runtime module. It prepares cooked scene +packages that runtime modules consume through normal config. +""" + +from __future__ import annotations + +import argparse +from dataclasses import asdict +import hashlib +import json +from pathlib import Path +from typing import Any + +from dimos.experimental.pimsim.scene.browser_collision import cook_browser_collision +from dimos.experimental.pimsim.scene.entity_collision import ( + COLLISION_DIR_NAME, + cook_entity_collision_hulls, +) +from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset +from dimos.experimental.pimsim.scene.plan import build_scene_cook_plan +from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar +from dimos.experimental.pimsim.scene.visual_blender import cook_plan_visual_assets +from dimos.experimental.pimsim.scene.visual_glb import cook_browser_visual +from dimos.simulation.mujoco.collision_spec import CollisionSpec +from dimos.simulation.mujoco.scene_mesh_to_mjcf import load_or_bake +from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment +from dimos.simulation.scene_assets.spec import ( + BrowserCollisionSpec, + BrowserVisualSpec, + MujocoSceneSpec, + SceneCookSpec, + ScenePackage, +) +from dimos.utils.data import get_data_dir +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +SCENE_PACKAGE_DIR = get_data_dir("scene_packages") +_PACKAGE_KEY_LEN = 12 +_COOK_VERSION = 4 + + +def cook_scene_package( + source_path: str | Path, + *, + output_dir: str | Path | None = None, + alignment: SceneMeshAlignment | None = None, + collision_spec: CollisionSpec | None = None, + cook_sidecar: SceneCookSidecar | None = None, + visual_spec: BrowserVisualSpec | None = None, + browser_collision_spec: BrowserCollisionSpec | None = None, + mujoco_spec: MujocoSceneSpec | None = None, + rebake: bool = False, +) -> ScenePackage: + """Cook one source scene into a robot-agnostic package. + + The package contains browser artifacts (visual + collision GLBs, + semantic ``objects.json``), per-entity GLBs, and a scene-only MuJoCo + wrapper. Robots are attached at runtime via ``MjSpec.attach()`` inside + ``MujocoSimModule.start``; the cooker never touches robot MJCFs. + """ + source = Path(source_path).expanduser().resolve() + if not source.exists(): + raise FileNotFoundError(f"scene source not found: {source}") + + align = alignment or SceneMeshAlignment() + visual = visual_spec or BrowserVisualSpec() + browser_collision = browser_collision_spec or BrowserCollisionSpec() + mujoco = mujoco_spec or MujocoSceneSpec() + cook_spec = SceneCookSpec( + source_path=source, + alignment=align, + browser_visual=visual, + browser_collision=browser_collision, + mujoco=mujoco, + ) + sidecar = cook_sidecar or SceneCookSidecar.auto_discover(source) + + package_dir = ( + Path(output_dir).expanduser().resolve() + if output_dir is not None + else SCENE_PACKAGE_DIR / _package_key(cook_spec, sidecar) + ) + browser_dir = package_dir / "browser" + mujoco_dir = package_dir / "mujoco" + package_dir.mkdir(parents=True, exist_ok=True) + + stats: dict[str, Any] = { + "source": inspect_scene_asset(source).to_json_dict(), + "cook_spec": _cook_spec_json(cook_spec), + "cook_version": _COOK_VERSION, + } + if sidecar.path is not None or sidecar.interactables: + stats["authored_sidecar"] = sidecar.to_json_dict() + + plan = build_scene_cook_plan( + source, + sidecar=sidecar, + alignment=align, + output_dir=package_dir, + collision_spec=collision_spec, + ) + stats["cook_plan"] = plan.to_json_dict() + + entities = plan.entities_metadata() + if entities: + stats["interactables"] = { + "count": len(entities), + "ids": [entity["id"] for entity in entities], + "static_visual_filter": "plan/blender", + } + + visual_source = source + # Only invoke Blender when at least one entity actually extracts from + # the source mesh; pure-synthetic sidecars (manip rigs) don't need it. + needs_blender = visual.enabled and any( + entity.visual_path is not None for entity in plan.entities + ) + if needs_blender: + visual_source = cook_plan_visual_assets( + source, + package_dir, + plan=plan, + rebake=rebake, + ) + + if mujoco.enabled: + hull_counts = _cook_entity_collision(entities, rebake=rebake) + if hull_counts: + stats["entity_collision"] = {"hulls_per_entity": hull_counts} + + visual_result = cook_browser_visual( + visual_source, + browser_dir, + spec=visual, + rebake=rebake, + ) + if visual_result is not None: + stats["browser_visual"] = { + "tool": visual_result.tool, + **visual_result.stats, + } + + browser_collision_result = cook_browser_collision( + source, + browser_dir, + alignment=SceneMeshAlignment(y_up=False), + spec=browser_collision, + collision_spec=plan.collision_spec, + rebake=rebake, + ) + if browser_collision_result is not None: + stats["browser_collision"] = browser_collision_result.stats + + mujoco_scene_path: Path | None = None + if mujoco.enabled: + mujoco_scene_path = load_or_bake( + scene_mesh_path=source, + alignment=align, + cache_root=mujoco_dir, + collision_spec=plan.collision_spec, + include_visual_mesh=mujoco.include_visual_mesh, + rebake=rebake, + ) + stats["mujoco"] = {"scene_path": str(mujoco_scene_path)} + + package = ScenePackage( + package_dir=package_dir, + source_path=source, + alignment=align, + visual_path=visual_result.path if visual_result else None, + browser_collision_path=browser_collision_result.path if browser_collision_result else None, + objects_path=browser_collision_result.objects_path if browser_collision_result else None, + mujoco_scene_path=mujoco_scene_path, + metadata_path=package_dir / "scene.meta.json", + entities=entities, + stats=stats, + ) + package.write_metadata() + logger.info("scene package cooked: %s", package.metadata_path) + return package + + +def _cook_entity_collision( + entities: list[dict[str, Any]], + *, + rebake: bool, +) -> dict[str, int]: + """Decompose every mesh entity's GLB into package collision hulls. + + Mutates the entity metadata in place, recording the hull files as + ``collision_paths`` so the runtime composer loads them from the + package instead of decomposing at boot. Returns hull counts by + entity id. + """ + hull_counts: dict[str, int] = {} + for entity in entities: + if entity.get("descriptor", {}).get("shape_hint") != "mesh": + continue + visual_path = entity.get("visual_path") + if not visual_path or not Path(visual_path).exists(): + logger.warning( + "entity %s: mesh entity has no cooked visual GLB; " + "no collision hulls (runtime falls back to AABB box)", + entity.get("id"), + ) + continue + hull_paths = cook_entity_collision_hulls( + visual_path, + Path(visual_path).parent / COLLISION_DIR_NAME, + rebake=rebake, + ) + if hull_paths: + entity["collision_paths"] = [str(path) for path in hull_paths] + hull_counts[str(entity.get("id"))] = len(hull_paths) + return hull_counts + + +def _package_key( + cook_spec: SceneCookSpec, + sidecar: SceneCookSidecar, +) -> str: + h = hashlib.sha256() + h.update(cook_spec.source_path.read_bytes()) + h.update(str(_COOK_VERSION).encode()) + h.update(json.dumps(_cook_spec_json(cook_spec), sort_keys=True).encode()) + h.update(json.dumps(sidecar.to_json_dict(), sort_keys=True).encode()) + return h.hexdigest()[:_PACKAGE_KEY_LEN] + + +def _cook_spec_json(cook_spec: SceneCookSpec) -> dict[str, Any]: + raw = asdict(cook_spec) + raw["source_path"] = str(cook_spec.source_path) + return raw + + +def _parse_xyz(value: str) -> tuple[float, float, float]: + parts = [float(part.strip()) for part in value.split(",")] + if len(parts) != 3: + raise argparse.ArgumentTypeError("expected comma-separated x,y,z") + return (parts[0], parts[1], parts[2]) + + +def cli_main() -> None: + parser = argparse.ArgumentParser( + description="Cook a scene asset into a robot-agnostic DimOS scene package.", + ) + parser.add_argument("source", type=Path) + parser.add_argument("--output-dir", type=Path) + parser.add_argument("--cook-spec", type=Path) + parser.add_argument("--scale", type=float, default=1.0) + parser.add_argument("--translation", type=_parse_xyz, default=(0.0, 0.0, 0.0)) + parser.add_argument("--rotation-zyx-deg", type=_parse_xyz, default=(0.0, 0.0, 0.0)) + parser.add_argument("--no-y-up", action="store_true") + parser.add_argument("--no-visual", action="store_true") + parser.add_argument( + "--visual-optimizer", + choices=("gltfpack", "blender", "copy"), + default="gltfpack", + ) + parser.add_argument("--visual-simplify-ratio", type=float, default=0.3) + parser.add_argument("--visual-simplify-error", type=float, default=0.02) + parser.add_argument("--visual-max-texture-size", type=int) + parser.add_argument( + "--visual-texture-format", + choices=("none", "webp", "ktx2"), + default="none", + ) + parser.add_argument("--no-browser-collision", action="store_true") + parser.add_argument("--browser-collision-target-faces", type=int, default=100_000) + parser.add_argument("--no-mujoco", action="store_true") + parser.add_argument("--include-mujoco-visual", action="store_true") + parser.add_argument("--rebake", action="store_true") + args = parser.parse_args() + + package = cook_scene_package( + args.source, + output_dir=args.output_dir, + alignment=SceneMeshAlignment( + scale=args.scale, + translation=args.translation, + rotation_zyx_deg=args.rotation_zyx_deg, + y_up=not args.no_y_up, + ), + cook_sidecar=SceneCookSidecar.from_json(args.cook_spec) if args.cook_spec else None, + visual_spec=BrowserVisualSpec( + enabled=not args.no_visual, + optimizer=args.visual_optimizer, + simplify_ratio=args.visual_simplify_ratio, + simplify_error=args.visual_simplify_error, + texture_format=( + None if args.visual_texture_format == "none" else args.visual_texture_format + ), + max_texture_size=args.visual_max_texture_size, + ), + browser_collision_spec=BrowserCollisionSpec( + enabled=not args.no_browser_collision, + target_faces=args.browser_collision_target_faces, + ), + mujoco_spec=MujocoSceneSpec( + enabled=not args.no_mujoco, + include_visual_mesh=args.include_mujoco_visual, + ), + rebake=args.rebake, + ) + print(package.metadata_path) + + +if __name__ == "__main__": + cli_main() + + +__all__ = ["SCENE_PACKAGE_DIR", "cook_scene_package"] diff --git a/dimos/experimental/pimsim/scene/entity_collision.py b/dimos/experimental/pimsim/scene/entity_collision.py new file mode 100644 index 0000000000..1ecd6619b3 --- /dev/null +++ b/dimos/experimental/pimsim/scene/entity_collision.py @@ -0,0 +1,141 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cook-time convex decomposition for scene-package entities. + +Mesh entities need convex collision geometry for MuJoCo's narrowphase. +The cooker decomposes each entity's ``visual.glb`` with CoACD (chair +legs / seat / back each get their own hull, so contacts are +chair-shaped) and writes the hulls into the package next to the visual: + +```text +entities// +├── visual.glb +└── mujoco_collision/ + ├── hull_000.obj + └── ... +``` + +The hull paths are recorded per entity as ``collision_paths`` in +``scene.meta.json`` and loaded verbatim by the runtime composer +(``dimos/simulation/mujoco/entity_scene.py``) — there is no runtime +decomposition and no per-machine cache; the package is self-contained. +""" + +from __future__ import annotations + +from pathlib import Path + +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +COLLISION_DIR_NAME = "mujoco_collision" + +_COACD_MAX_HULLS = 32 +_COACD_THRESHOLD = 0.05 +_COACD_RESOLUTION = 500 +_COACD_MCTS_ITERATIONS = 30 +_COACD_MCTS_NODES = 10 + + +def cook_entity_collision_hulls( + visual_mesh_path: str | Path, + out_dir: str | Path, + *, + rebake: bool = False, +) -> list[Path]: + """Decompose one entity mesh into convex hulls under ``out_dir``. + + Idempotent: existing ``hull_*.obj`` files are reused unless ``rebake``. + Falls back to a single convex hull when CoACD is unavailable or fails + on the mesh. Returns ``[]`` (with a warning) when the mesh can't be + read at all — the runtime composer then uses an AABB box. + """ + mesh_path = Path(visual_mesh_path) + out_dir = Path(out_dir) + + if not rebake: + existing = sorted(out_dir.glob("hull_*.obj")) + if existing: + return existing + + try: + import open3d as o3d # type: ignore[import-untyped] + except ImportError as exc: + logger.warning("entity hulls: open3d unavailable (%s); skipping %s", exc, mesh_path) + return [] + + mesh = o3d.io.read_triangle_mesh(str(mesh_path)) + if not mesh.has_vertices(): + logger.warning("entity hulls: no vertices in %s; skipping", mesh_path) + return [] + + parts = _run_coacd(mesh, mesh_path) + + out_dir.mkdir(parents=True, exist_ok=True) + for stale in out_dir.glob("hull_*.obj"): + stale.unlink() + + import numpy as np + + out_paths: list[Path] = [] + if parts: + for i, (vertices, triangles) in enumerate(parts): + hull_mesh = o3d.geometry.TriangleMesh() + hull_mesh.vertices = o3d.utility.Vector3dVector(np.asarray(vertices, dtype=np.float64)) + hull_mesh.triangles = o3d.utility.Vector3iVector(np.asarray(triangles, dtype=np.int32)) + path = out_dir / f"hull_{i:03d}.obj" + o3d.io.write_triangle_mesh(str(path), hull_mesh, write_vertex_normals=False) + out_paths.append(path) + else: + hull, _ = mesh.compute_convex_hull() + path = out_dir / "hull_000.obj" + o3d.io.write_triangle_mesh(str(path), hull, write_vertex_normals=False) + out_paths.append(path) + + return out_paths + + +def _run_coacd(mesh: object, mesh_path: Path) -> list[tuple[object, object]]: + """CoACD parts for an open3d mesh; ``[]`` means fall back to one hull.""" + try: + import coacd # type: ignore[import-not-found, import-untyped] + import numpy as np + + if not getattr(_run_coacd, "_coacd_silenced", False): + coacd.set_log_level("error") + _run_coacd._coacd_silenced = True # type: ignore[attr-defined] + + cm = coacd.Mesh( + np.asarray(mesh.vertices, dtype=np.float64), # type: ignore[attr-defined] + np.asarray(mesh.triangles, dtype=np.int32), # type: ignore[attr-defined] + ) + parts = coacd.run_coacd( + cm, + threshold=_COACD_THRESHOLD, + max_convex_hull=_COACD_MAX_HULLS, + resolution=_COACD_RESOLUTION, + mcts_iterations=_COACD_MCTS_ITERATIONS, + mcts_nodes=_COACD_MCTS_NODES, + ) + return list(parts) + except Exception as exc: + logger.warning( + "entity hulls: CoACD failed for %s (%s); using single convex hull", mesh_path, exc + ) + return [] + + +__all__ = ["COLLISION_DIR_NAME", "cook_entity_collision_hulls"] diff --git a/dimos/experimental/pimsim/scene/inspect.py b/dimos/experimental/pimsim/scene/inspect.py new file mode 100644 index 0000000000..41158b548d --- /dev/null +++ b/dimos/experimental/pimsim/scene/inspect.py @@ -0,0 +1,185 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fast scene asset inspection for cook reports and budget checks.""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] + + +@dataclass(frozen=True) +class SceneAssetStats: + path: str + bytes: int + format: str + mesh_count: int = 0 + node_count: int = 0 + material_count: int = 0 + texture_count: int = 0 + vertex_count: int = 0 + triangle_count: int = 0 + + def to_json_dict(self) -> dict[str, Any]: + return asdict(self) + + +def inspect_scene_asset(path: str | Path) -> SceneAssetStats: + """Return lightweight geometry/material counts for a supported scene file.""" + resolved = Path(path).expanduser().resolve() + if not resolved.exists(): + raise FileNotFoundError(f"scene asset not found: {resolved}") + + suffix = resolved.suffix.lower() + if suffix in {".glb", ".gltf"}: + return _inspect_gltf(resolved) + if suffix in {".usd", ".usda", ".usdc", ".usdz"}: + return _inspect_usd(resolved) + return _inspect_open3d(resolved) + + +def _inspect_gltf(path: Path) -> SceneAssetStats: + import trimesh + + loaded: Any = trimesh.load(str(path)) + if isinstance(loaded, trimesh.Trimesh): + # visual may be ColorVisuals (no material) or TextureVisuals. + material = getattr(loaded.visual, "material", None) + material_count = 1 if material is not None else 0 + return SceneAssetStats( + path=str(path), + bytes=path.stat().st_size, + format=path.suffix.lower().lstrip("."), + mesh_count=1, + node_count=1, + material_count=material_count, + texture_count=_count_material_textures([material]), + vertex_count=len(loaded.vertices), + triangle_count=len(loaded.faces), + ) + + scene = loaded + mesh_count = len(getattr(scene, "geometry", {})) + node_count = len(getattr(scene.graph, "nodes_geometry", [])) + materials = [] + vertex_count = 0 + triangle_count = 0 + for geom in scene.geometry.values(): + if not isinstance(geom, trimesh.Trimesh): + continue + vertex_count += len(geom.vertices) + triangle_count += len(geom.faces) + materials.append(getattr(geom.visual, "material", None)) + material_keys = {repr(material) for material in materials if material is not None} + return SceneAssetStats( + path=str(path), + bytes=path.stat().st_size, + format=path.suffix.lower().lstrip("."), + mesh_count=mesh_count, + node_count=node_count, + material_count=len(material_keys), + texture_count=_count_material_textures(materials), + vertex_count=vertex_count, + triangle_count=triangle_count, + ) + + +def _count_material_textures(materials: list[Any]) -> int: + textures: set[int] = set() + for material in materials: + if material is None: + continue + for name in ( + "baseColorTexture", + "metallicRoughnessTexture", + "normalTexture", + "emissiveTexture", + "occlusionTexture", + "image", + ): + image = getattr(material, name, None) + if image is not None: + textures.add(id(image)) + return len(textures) + + +def _inspect_usd(path: Path) -> SceneAssetStats: + try: + from pxr import Usd, UsdGeom, UsdShade # type: ignore[import-not-found, import-untyped] + except ImportError as exc: + raise ImportError("inspecting USD assets requires usd-core") from exc + + stage = Usd.Stage.Open(str(path)) + if stage is None: + raise RuntimeError(f"could not open USD stage: {path}") + + mesh_count = 0 + vertex_count = 0 + triangle_count = 0 + materials: set[str] = set() + textures: set[str] = set() + for prim in stage.Traverse(): + if prim.IsA(UsdGeom.Mesh): + mesh_count += 1 + mesh = UsdGeom.Mesh(prim) + points_raw = mesh.GetPointsAttr().Get() + counts_raw = mesh.GetFaceVertexCountsAttr().Get() + points = points_raw if points_raw is not None else [] + face_counts = np.asarray(counts_raw if counts_raw is not None else [], dtype=np.int32) + vertex_count += len(points) + triangle_count += int(np.maximum(face_counts - 2, 0).sum()) + bound = UsdShade.MaterialBindingAPI(prim).ComputeBoundMaterial()[0] + if bound: + materials.add(str(bound.GetPath())) + if prim.IsA(UsdShade.Shader): + shader = UsdShade.Shader(prim) + if shader.GetIdAttr().Get() == "UsdUVTexture": + file_input = shader.GetInput("file") + if file_input and file_input.Get() is not None: + textures.add(str(file_input.Get())) + + return SceneAssetStats( + path=str(path), + bytes=path.stat().st_size, + format=path.suffix.lower().lstrip("."), + mesh_count=mesh_count, + node_count=mesh_count, + material_count=len(materials), + texture_count=len(textures), + vertex_count=vertex_count, + triangle_count=triangle_count, + ) + + +def _inspect_open3d(path: Path) -> SceneAssetStats: + mesh = o3d.io.read_triangle_mesh(str(path)) + if len(mesh.triangles) == 0: + raise RuntimeError(f"empty mesh: {path}") + return SceneAssetStats( + path=str(path), + bytes=path.stat().st_size, + format=path.suffix.lower().lstrip("."), + mesh_count=1, + node_count=1, + vertex_count=len(mesh.vertices), + triangle_count=len(mesh.triangles), + ) + + +__all__ = ["SceneAssetStats", "inspect_scene_asset"] diff --git a/dimos/experimental/pimsim/scene/plan.py b/dimos/experimental/pimsim/scene/plan.py new file mode 100644 index 0000000000..68f81784e2 --- /dev/null +++ b/dimos/experimental/pimsim/scene/plan.py @@ -0,0 +1,341 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resolved scene cook plan. + +The authored sidecar is user intent. The cook plan is resolved source-scene +membership: exactly which source prims become each runtime entity, which visual +nodes Blender must extract/delete, and which collision policy every downstream +cook must consume. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field, replace +from pathlib import Path +import re +from typing import Any + +import numpy as np + +from dimos.experimental.pimsim.scene.sidecar import InteractableSpec, SceneCookSidecar +from dimos.simulation.mujoco.collision_spec import CollisionSpec +from dimos.simulation.scene_assets.mesh_scene import ( + SceneMeshAlignment, + ScenePrimMesh, + load_scene_prims, +) + +_HASH_SUFFIX_RE = re.compile(r"_[0-9a-fA-F]{6,}$") + + +@dataclass(frozen=True) +class EntityCookPlan: + """Resolved authored entity.""" + + spec: InteractableSpec + safe_id: str + matched_prim_paths: tuple[str, ...] + visual_node_patterns: tuple[str, ...] + aabb_min: tuple[float, float, float] + aabb_max: tuple[float, float, float] + center: tuple[float, float, float] + initial_quat: tuple[float, float, float, float] + descriptor: dict[str, Any] + visual_path: Path | None + + def to_metadata(self) -> dict[str, Any]: + return { + "id": self.spec.id, + "tags": list(self.spec.tags), + "source_prim_paths": list(self.spec.source_prim_paths), + "matched_prim_paths": list(self.matched_prim_paths), + "visual_node_patterns": list(self.visual_node_patterns), + "remove_from_static": self.spec.remove_from_static, + "spawn": self.spec.spawn, + "synthetic": self.spec.is_synthetic, + "aabb": { + "min": list(self.aabb_min), + "max": list(self.aabb_max), + }, + "initial_pose": { + "x": self.center[0], + "y": self.center[1], + "z": self.center[2], + "qw": self.initial_quat[0], + "qx": self.initial_quat[1], + "qy": self.initial_quat[2], + "qz": self.initial_quat[3], + }, + "visual_path": str(self.visual_path) if self.visual_path else None, + "descriptor": self.descriptor, + "physics": self.spec.physics, + "visual": self.spec.visual, + } + + def to_json_dict(self) -> dict[str, Any]: + return { + "id": self.spec.id, + "safe_id": self.safe_id, + "matched_prim_paths": list(self.matched_prim_paths), + "visual_node_patterns": list(self.visual_node_patterns), + "aabb": {"min": list(self.aabb_min), "max": list(self.aabb_max)}, + "center": list(self.center), + "synthetic": self.spec.is_synthetic, + "descriptor": self.descriptor, + "visual_path": str(self.visual_path) if self.visual_path else None, + "remove_from_static": self.spec.remove_from_static, + } + + +@dataclass(frozen=True) +class SceneCookPlan: + """Resolved plan shared by every artifact writer.""" + + source_path: Path + alignment: SceneMeshAlignment + sidecar: SceneCookSidecar + collision_spec: CollisionSpec + entities: tuple[EntityCookPlan, ...] = () + stats: dict[str, Any] = field(default_factory=dict) + + @property + def has_entities(self) -> bool: + return bool(self.entities) + + def entities_metadata(self) -> list[dict[str, Any]]: + return [entity.to_metadata() for entity in self.entities] + + def to_json_dict(self) -> dict[str, Any]: + return { + "source_path": str(self.source_path), + "alignment": { + "scale": self.alignment.scale, + "rotation_zyx_deg": list(self.alignment.rotation_zyx_deg), + "translation": list(self.alignment.translation), + "y_up": self.alignment.y_up, + }, + "sidecar_path": str(self.sidecar.path) if self.sidecar.path else None, + "entities": [entity.to_json_dict() for entity in self.entities], + "stats": self.stats, + } + + +def build_scene_cook_plan( + source_path: str | Path, + *, + sidecar: SceneCookSidecar, + alignment: SceneMeshAlignment, + output_dir: str | Path, + collision_spec: CollisionSpec | None = None, +) -> SceneCookPlan: + source = Path(source_path).expanduser().resolve() + base_collision = collision_spec or sidecar.collision + if not sidecar.interactables: + return SceneCookPlan( + source_path=source, + alignment=alignment, + sidecar=sidecar, + collision_spec=base_collision, + stats={"source_prims": 0, "entities": 0}, + ) + + entities_dir = Path(output_dir).expanduser().resolve() / "entities" + needs_prims = any(item.source_prim_paths for item in sidecar.interactables) + prims = load_scene_prims(source, alignment=alignment) if needs_prims else [] + entities = tuple( + ( + _build_synthetic_entity_plan(item, entities_dir) + if item.is_synthetic + else _build_matched_entity_plan(item, prims, entities_dir) + ) + for item in sidecar.interactables + ) + effective_collision = _collision_spec_with_entity_skips(base_collision, entities) + return SceneCookPlan( + source_path=source, + alignment=alignment, + sidecar=sidecar, + collision_spec=effective_collision, + entities=entities, + stats={"source_prims": len(prims), "entities": len(entities)}, + ) + + +def _build_matched_entity_plan( + spec: InteractableSpec, + prims: list[ScenePrimMesh], + entities_dir: Path, +) -> EntityCookPlan: + matched = sorted( + (prim for prim in prims if spec.matches(prim)), + key=_prim_sort_key, + ) + if not matched: + patterns = ", ".join(spec.source_prim_paths) + raise ValueError(f"scene interactable {spec.id!r} matched no source prims: {patterns}") + + vertices = np.concatenate([prim.vertices for prim in matched], axis=0) + aabb_min_np = vertices.min(axis=0).astype(float) + aabb_max_np = vertices.max(axis=0).astype(float) + center_np = ((aabb_min_np + aabb_max_np) * 0.5).astype(float) + extents = np.maximum(aabb_max_np - aabb_min_np, 1e-4).astype(float) + safe_id = _safe_entity_id(spec.id) + visual_path = entities_dir / safe_id / "visual.glb" + + shape_hint, shape_extents = _resolve_shape(spec, extents) + descriptor = _make_descriptor(spec, shape_hint, shape_extents, visual_path) + + return EntityCookPlan( + spec=spec, + safe_id=safe_id, + matched_prim_paths=tuple(prim.prim_path or prim.name for prim in matched), + visual_node_patterns=_visual_node_patterns(matched), + aabb_min=(float(aabb_min_np[0]), float(aabb_min_np[1]), float(aabb_min_np[2])), + aabb_max=(float(aabb_max_np[0]), float(aabb_max_np[1]), float(aabb_max_np[2])), + center=(float(center_np[0]), float(center_np[1]), float(center_np[2])), + initial_quat=(1.0, 0.0, 0.0, 0.0), + descriptor=descriptor, + visual_path=visual_path, + ) + + +def _build_synthetic_entity_plan( + spec: InteractableSpec, + entities_dir: Path, +) -> EntityCookPlan: + """Synthetic entity: no source-prim extraction, primitive geometry, + pose from the spec. Used for manip rigs, test cubes, props you want + in the scene that aren't in the asset.""" + pose = spec.pose or {} + center = ( + float(pose.get("x", 0.0)), + float(pose.get("y", 0.0)), + float(pose.get("z", 0.0)), + ) + quat = ( + float(pose.get("qw", 1.0)), + float(pose.get("qx", 0.0)), + float(pose.get("qy", 0.0)), + float(pose.get("qz", 0.0)), + ) + extents_raw = spec.physics.get("extents") + if not extents_raw: + raise ValueError( + f"synthetic interactable {spec.id!r}: physics.extents required " + f"(no source mesh to derive bounds from)" + ) + extents_np = np.asarray([float(v) for v in extents_raw], dtype=float) + half = extents_np / 2.0 if len(extents_np) == 3 else extents_np + aabb_half = np.zeros(3, dtype=float) + aabb_half[: len(half)] = half[:3] if len(half) >= 3 else half + aabb_min = tuple(c - h for c, h in zip(center, aabb_half, strict=True)) + aabb_max = tuple(c + h for c, h in zip(center, aabb_half, strict=True)) + + shape_hint, shape_extents = _resolve_shape(spec, extents_np) + safe_id = _safe_entity_id(spec.id) + descriptor = _make_descriptor(spec, shape_hint, shape_extents, visual_path=None) + + return EntityCookPlan( + spec=spec, + safe_id=safe_id, + matched_prim_paths=(), + visual_node_patterns=(), + aabb_min=aabb_min, + aabb_max=aabb_max, + center=center, + initial_quat=quat, + descriptor=descriptor, + visual_path=None, + ) + + +def _resolve_shape( + spec: InteractableSpec, + extents_np: np.ndarray, +) -> tuple[str, list[float]]: + shape_hint = str(spec.physics.get("shape", "box")) + shape_extents = spec.physics.get("extents") + if shape_extents is not None: + return shape_hint, [float(v) for v in shape_extents] + if shape_hint == "box": + return shape_hint, [float(v) for v in extents_np[:3]] + if shape_hint == "sphere": + return shape_hint, [float(max(extents_np) * 0.5)] + if shape_hint == "cylinder": + return shape_hint, [ + float(max(extents_np[0], extents_np[1]) * 0.5), + float(extents_np[2] if len(extents_np) >= 3 else extents_np[-1]), + ] + return shape_hint, [] + + +def _make_descriptor( + spec: InteractableSpec, + shape_hint: str, + shape_extents: list[float], + visual_path: Path | None, +) -> dict[str, Any]: + descriptor: dict[str, Any] = { + "entity_id": spec.id, + "kind": spec.kind, + "shape_hint": shape_hint, + "extents": [float(value) for value in shape_extents], + "mass": float(spec.mass), + } + if visual_path is not None: + safe_id = _safe_entity_id(spec.id) + descriptor["mesh_ref"] = f"entities/{safe_id}/visual.glb" + else: + descriptor["mesh_ref"] = "" + rgba = spec.visual.get("rgba") if spec.visual else None + if isinstance(rgba, list | tuple) and len(rgba) == 4: + descriptor["rgba"] = [float(v) for v in rgba] + return descriptor + + +def _visual_node_patterns(prims: list[ScenePrimMesh]) -> tuple[str, ...]: + names: list[str] = [] + for prim in prims: + prim_path = prim.visual_node_name or prim.prim_path or prim.name + basename = prim_path.lstrip("/").rsplit("/", 1)[-1] + visual_name = _HASH_SUFFIX_RE.sub("", basename) + if visual_name not in names: + names.append(visual_name) + return tuple(names) + + +def _collision_spec_with_entity_skips( + collision_spec: CollisionSpec, + entities: tuple[EntityCookPlan, ...], +) -> CollisionSpec: + prim_overrides: dict[str, dict[str, Any]] = dict(collision_spec.prim_overrides) + for entity in entities: + if not entity.spec.remove_from_static: + continue + for prim_path in sorted(entity.matched_prim_paths): + prim_overrides.setdefault(prim_path, {"type": "skip"}) + return replace(collision_spec, prim_overrides=prim_overrides) + + +def _prim_sort_key(prim: ScenePrimMesh) -> tuple[str, str]: + return (prim.prim_path or prim.name, prim.name) + + +def _safe_entity_id(entity_id: str) -> str: + safe = "".join(c if c.isalnum() or c in {"-", "_", "."} else "_" for c in entity_id) + return safe or "entity" + + +__all__ = ["EntityCookPlan", "SceneCookPlan", "build_scene_cook_plan"] diff --git a/dimos/experimental/pimsim/scene/sidecar.py b/dimos/experimental/pimsim/scene/sidecar.py new file mode 100644 index 0000000000..492700199b --- /dev/null +++ b/dimos/experimental/pimsim/scene/sidecar.py @@ -0,0 +1,189 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Authored scene-cook sidecar. + +The existing ``.collision.json`` file remains the low-level collision +contract. ``.cook.json`` is the wider authored-scene contract: it can +carry the same collision policy plus a small, explicit list of objects that +should be removed from static cooks and respawned as pimsim entities. +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +import fnmatch +import json +from pathlib import Path +from typing import Any, Literal + +from dimos.simulation.mujoco.collision_spec import CollisionSpec +from dimos.simulation.scene_assets.mesh_scene import ScenePrimMesh +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +CookEntitySpawn = Literal["initial", "manual"] +CookEntityKind = Literal["dynamic", "kinematic", "static"] + +_COOK_SIDECAR_SUFFIXES = (".cook.json", ".scene.json") + + +@dataclass(frozen=True) +class InteractableSpec: + """One hand-authored runtime entity. + + Two flavours: + * ``source_prim_paths`` set -> matched against scene prims, geometry + extracted from the source mesh (the chairs flow). + * ``source_prim_paths`` empty + ``pose`` set -> synthetic entity + with no scene-mesh source. Geometry comes from ``physics.shape`` + + ``physics.extents``; pose comes from the spec directly. Used for + manip rigs, test cubes, anything not already in the source asset. + """ + + id: str + source_prim_paths: tuple[str, ...] = () + pose: dict[str, float] | None = None + remove_from_static: bool = True + spawn: CookEntitySpawn = "initial" + kind: CookEntityKind = "dynamic" + mass: float = 1.0 + tags: tuple[str, ...] = () + physics: dict[str, Any] = field(default_factory=dict) + visual: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> InteractableSpec: + prims = raw.get("source_prim_paths", raw.get("prim_paths", ())) + if isinstance(prims, str): + prims = (prims,) + pose = raw.get("pose") + if not prims and pose is None: + raise ValueError( + f"interactable {raw.get('id')!r}: needs either source_prim_paths " + f"(extract from scene) or pose (synthetic entity)" + ) + tags = raw.get("tags", ()) + if isinstance(tags, str): + tags = (tags,) + return cls( + id=str(raw["id"]), + source_prim_paths=tuple(str(pattern) for pattern in prims), + pose=dict(pose) if pose is not None else None, + remove_from_static=bool(raw.get("remove_from_static", True)), + spawn=raw.get("spawn", "initial"), + kind=raw.get("kind", "dynamic"), + mass=float(raw.get("mass", 1.0)), + tags=tuple(str(tag) for tag in tags), + physics=dict(raw.get("physics", {})), + visual=dict(raw.get("visual", {})), + ) + + @property + def is_synthetic(self) -> bool: + return not self.source_prim_paths and self.pose is not None + + def to_json_dict(self) -> dict[str, Any]: + raw = asdict(self) + raw["source_prim_paths"] = list(self.source_prim_paths) + raw["tags"] = list(self.tags) + return raw + + def matches(self, prim: ScenePrimMesh) -> bool: + prim_candidates = tuple( + candidate for candidate in (prim.visual_node_name, prim.prim_path) if candidate + ) + return any( + match_prim_pattern(candidate, pattern, include_sanitized=False) + for candidate in prim_candidates + for pattern in self.source_prim_paths + ) + + +@dataclass(frozen=True) +class SceneCookSidecar: + """Authored policy loaded from ``.cook.json``. + + ``collision`` is exactly the older collision sidecar schema. Interactables + add surgical scene knowledge without forcing every object in a scene to + become semantic metadata. + """ + + path: Path | None = None + collision: CollisionSpec = field(default_factory=CollisionSpec) + interactables: tuple[InteractableSpec, ...] = () + + @classmethod + def from_dict(cls, raw: dict[str, Any], *, path: Path | None = None) -> SceneCookSidecar: + collision_raw = raw.get("collision") + if isinstance(collision_raw, dict): + collision = CollisionSpec.from_dict(collision_raw) + else: + # Accept old collision keys at top-level so authored sidecars can be + # promoted without wrapping every existing key manually. + collision = CollisionSpec.from_dict(raw) + interactables = tuple( + InteractableSpec.from_dict(item) for item in raw.get("interactables", ()) + ) + return cls(path=path, collision=collision, interactables=interactables) + + @classmethod + def from_json(cls, path: str | Path) -> SceneCookSidecar: + sidecar_path = Path(path).expanduser().resolve() + return cls.from_dict(json.loads(sidecar_path.read_text()), path=sidecar_path) + + @classmethod + def auto_discover(cls, scene_path: str | Path) -> SceneCookSidecar: + source = Path(scene_path).expanduser().resolve() + for suffix in _COOK_SIDECAR_SUFFIXES: + sidecar = source.with_suffix(suffix) + if sidecar.exists(): + logger.info("loading scene cook sidecar: %s", sidecar) + return cls.from_json(sidecar) + + legacy_collision = source.with_suffix(".collision.json") + if legacy_collision.exists(): + logger.info("loading legacy collision sidecar: %s", legacy_collision) + return cls(path=legacy_collision, collision=CollisionSpec.from_json(legacy_collision)) + return cls() + + def to_json_dict(self) -> dict[str, Any]: + return { + "path": str(self.path) if self.path else None, + "collision": asdict(self.collision), + "interactables": [item.to_json_dict() for item in self.interactables], + } + + +def match_prim_pattern( + prim_path: str, + pattern: str, + *, + include_sanitized: bool = True, +) -> bool: + stripped = prim_path.lstrip("/") + sanitized = "".join(c if c.isalnum() else "_" for c in stripped) + basename = stripped.rsplit("/", 1)[-1] + candidates = [prim_path, stripped, basename] + if include_sanitized: + candidates.append(sanitized) + return any(fnmatch.fnmatchcase(candidate, pattern) for candidate in candidates) + + +__all__ = [ + "InteractableSpec", + "SceneCookSidecar", + "match_prim_pattern", +] diff --git a/dimos/experimental/pimsim/scene/test_entity_collision.py b/dimos/experimental/pimsim/scene/test_entity_collision.py new file mode 100644 index 0000000000..1e5ea81d05 --- /dev/null +++ b/dimos/experimental/pimsim/scene/test_entity_collision.py @@ -0,0 +1,67 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from dimos.experimental.pimsim.scene.entity_collision import cook_entity_collision_hulls + + +def _write_box_mesh(path: Path) -> None: + o3d = pytest.importorskip("open3d") + mesh = o3d.geometry.TriangleMesh.create_box(0.1, 0.1, 0.1) + o3d.io.write_triangle_mesh(str(path), mesh) + + +def test_cook_entity_collision_hulls(tmp_path: Path) -> None: + pytest.importorskip("coacd") + source = tmp_path / "visual.obj" + _write_box_mesh(source) + out_dir = tmp_path / "mujoco_collision" + + hulls = cook_entity_collision_hulls(source, out_dir) + + assert hulls, "expected at least one hull" + assert all(path.exists() for path in hulls) + assert all(path.parent == out_dir for path in hulls) + assert all(path.name.startswith("hull_") for path in hulls) + + +def test_cook_entity_collision_hulls_is_idempotent(tmp_path: Path) -> None: + pytest.importorskip("coacd") + source = tmp_path / "visual.obj" + _write_box_mesh(source) + out_dir = tmp_path / "mujoco_collision" + + first = cook_entity_collision_hulls(source, out_dir) + mtimes = [path.stat().st_mtime_ns for path in first] + + second = cook_entity_collision_hulls(source, out_dir) + assert second == first + assert [path.stat().st_mtime_ns for path in second] == mtimes + + rebaked = cook_entity_collision_hulls(source, out_dir, rebake=True) + assert rebaked + assert all(path.exists() for path in rebaked) + + +def test_cook_entity_collision_hulls_unreadable_mesh(tmp_path: Path) -> None: + pytest.importorskip("open3d") + source = tmp_path / "visual.obj" + source.write_text("not a mesh\n") + + assert cook_entity_collision_hulls(source, tmp_path / "mujoco_collision") == [] diff --git a/dimos/experimental/pimsim/scene/test_spec.py b/dimos/experimental/pimsim/scene/test_spec.py new file mode 100644 index 0000000000..24e7f67011 --- /dev/null +++ b/dimos/experimental/pimsim/scene/test_spec.py @@ -0,0 +1,324 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import json +from pathlib import Path + +import numpy as np +import pytest + +from dimos.experimental.pimsim.scene import plan as plan_module +from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar +from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment, ScenePrimMesh +from dimos.simulation.scene_assets.spec import ARTIFACT_FRAMES, ScenePackage, load_scene_package + + +def _metadata(tmp_path: Path) -> dict[str, object]: + return { + "source_path": str(tmp_path / "source.glb"), + "package_dir": str(tmp_path), + "alignment": { + "scale": 1.0, + "rotation_zyx_deg": [0.0, 0.0, 0.0], + "translation": [0.0, 0.0, 0.0], + "y_up": True, + }, + "artifact_frames": ARTIFACT_FRAMES, + "artifacts": { + "browser_visual": str(tmp_path / "visual.glb"), + "browser_collision": str(tmp_path / "collision.glb"), + "objects": str(tmp_path / "objects.json"), + "mujoco_scene": str(tmp_path / "wrapper.xml"), + }, + "stats": {}, + } + + +def test_load_scene_package_rejects_missing_artifact_frames(tmp_path: Path) -> None: + raw = _metadata(tmp_path) + raw.pop("artifact_frames") + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(raw)) + + with pytest.raises(ValueError, match="missing artifact frame metadata"): + load_scene_package(metadata_path) + + +def test_load_scene_package_rejects_mismatched_artifact_frames(tmp_path: Path) -> None: + raw = _metadata(tmp_path) + raw["artifact_frames"] = { + "browser_visual": "dimos_world", + "browser_collision": "source", + "mujoco": "dimos_world", + } + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(raw)) + + with pytest.raises(ValueError, match="artifact frame mismatch"): + load_scene_package(metadata_path) + + +def test_entity_collision_paths_round_trip(tmp_path: Path) -> None: + raw = _metadata(tmp_path) + raw["entities"] = [ + { + "id": "chair", + "visual_path": "entities/chair/visual.glb", + "collision_paths": [ + "entities/chair/mujoco_collision/hull_000.obj", + "entities/chair/mujoco_collision/hull_001.obj", + ], + "descriptor": {"shape_hint": "mesh"}, + } + ] + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(raw)) + + package = load_scene_package(metadata_path) + + assert package.entities[0]["collision_paths"] == [ + str(tmp_path / "entities/chair/mujoco_collision/hull_000.obj"), + str(tmp_path / "entities/chair/mujoco_collision/hull_001.obj"), + ] + assert package.to_json_dict()["entities"][0]["collision_paths"] == [ + "entities/chair/mujoco_collision/hull_000.obj", + "entities/chair/mujoco_collision/hull_001.obj", + ] + + +def test_load_scene_package_accepts_expected_artifact_frames(tmp_path: Path) -> None: + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(_metadata(tmp_path))) + + package = load_scene_package(metadata_path) + + assert package.visual_path == tmp_path / "visual.glb" + assert package.browser_collision_path == tmp_path / "collision.glb" + assert package.objects_path == tmp_path / "objects.json" + assert package.mujoco_scene_path == tmp_path / "wrapper.xml" + + +def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> None: + package = ScenePackage( + package_dir=tmp_path, + source_path=tmp_path / "source.glb", + alignment=SceneMeshAlignment(), + visual_path=tmp_path / "browser" / "visual.glb", + browser_collision_path=tmp_path / "browser" / "collision.glb", + objects_path=tmp_path / "browser" / "objects.json", + mujoco_scene_path=tmp_path / "mujoco" / "abc123" / "wrapper.xml", + entities=[ + { + "id": "chair_001", + "visual_path": str(tmp_path / "entities" / "chair_001" / "visual.glb"), + } + ], + ) + + metadata_path = package.write_metadata() + raw = json.loads(metadata_path.read_text()) + + assert raw["package_dir"] == "." + assert raw["artifacts"]["browser_visual"] == "browser/visual.glb" + assert raw["artifacts"]["browser_collision"] == "browser/collision.glb" + assert raw["artifacts"]["objects"] == "browser/objects.json" + assert raw["artifacts"]["mujoco_scene"] == "mujoco/abc123/wrapper.xml" + assert raw["entities"][0]["visual_path"] == "entities/chair_001/visual.glb" + + loaded = load_scene_package(metadata_path) + assert loaded.package_dir == tmp_path + assert loaded.visual_path == tmp_path / "browser" / "visual.glb" + assert loaded.mujoco_scene_path == tmp_path / "mujoco" / "abc123" / "wrapper.xml" + assert loaded.entities[0]["visual_path"] == str( + tmp_path / "entities" / "chair_001" / "visual.glb" + ) + + +def test_load_scene_package_tolerates_missing_objects_sidecar(tmp_path: Path) -> None: + raw = _metadata(tmp_path) + # Older cooked packages without the semantic sidecar should still load. + raw["artifacts"].pop("objects") # type: ignore[union-attr] + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(raw)) + + package = load_scene_package(metadata_path) + + assert package.objects_path is None + + +def test_extract_scene_objects_emits_per_prim_aabb() -> None: + from dimos.experimental.pimsim.scene.browser_collision import extract_scene_objects + + triangles = np.array([[0, 1, 2]], dtype=np.int32) + prims = [ + ScenePrimMesh( + name="Sectional_seat", + prim_path="/Apt/Living/Sectional/seat", + vertices=np.array( + [[-1.0, -2.0, 0.0], [2.0, -2.0, 0.5], [-1.0, 1.0, 1.0]], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="Empty_prim", + prim_path="/Apt/Living/Empty", + vertices=np.empty((0, 3), dtype=np.float32), + triangles=np.empty((0, 3), dtype=np.int32), + ), + ] + + objects = extract_scene_objects(prims) + + assert len(objects) == 1 # empty prim filtered + entry = objects[0] + assert entry["id"] == "Sectional_seat" + assert entry["prim_path"] == "/Apt/Living/Sectional/seat" + assert entry["aabb"]["min"] == [-1.0, -2.0, 0.0] + assert entry["aabb"]["max"] == [2.0, 1.0, 1.0] + + +def test_load_scene_package_preserves_packaged_entities(tmp_path: Path) -> None: + raw = _metadata(tmp_path) + raw["entities"] = [ + { + "id": "chair_016", + "descriptor": {"entity_id": "chair_016", "shape_hint": "box"}, + } + ] + metadata_path = tmp_path / "scene.meta.json" + metadata_path.write_text(json.dumps(raw)) + + package = load_scene_package(metadata_path) + + assert package.entities[0]["id"] == "chair_016" + + +def test_scene_cook_plan_maps_collision_prims_to_blender_visual_nodes( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + def fake_load_scene_prims( + path: str | Path, + alignment: SceneMeshAlignment | None = None, + ) -> list[ScenePrimMesh]: + del path, alignment + triangles = np.array([[0, 1, 2]], dtype=np.int32) + return [ + ScenePrimMesh( + name="Chair_seat", + prim_path="Chair_a1b2c3", + vertices=np.array( + [[-1.0, -1.0, 0.2], [-0.5, -1.0, 0.2], [-1.0, -0.5, 0.8]], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="Chair.016_seat", + prim_path="Chair.016_a1b2c3", + vertices=np.array( + [[1.0, 2.0, 0.2], [2.0, 2.0, 0.2], [1.0, 3.0, 0.8]], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="Chair.016_back", + prim_path="Chair.016_d4e5f6", + vertices=np.array( + [[1.0, 2.0, 0.8], [2.0, 3.0, 1.4], [1.5, 2.5, 1.2]], + dtype=np.float32, + ), + triangles=triangles, + ), + ] + + monkeypatch.setattr(plan_module, "load_scene_prims", fake_load_scene_prims) + sidecar = SceneCookSidecar.from_dict( + { + "interactables": [ + { + "id": "chair_000", + "source_prim_paths": ["Chair_*"], + "physics": {"shape": "box"}, + }, + { + "id": "chair_016", + "source_prim_paths": ["Chair.016_*"], + "physics": {"shape": "box"}, + }, + ] + } + ) + + plan = plan_module.build_scene_cook_plan( + tmp_path / "office.glb", + sidecar=sidecar, + alignment=SceneMeshAlignment(scale=2.0, y_up=False), + output_dir=tmp_path, + ) + + base_entity = plan.entities[0] + assert base_entity.matched_prim_paths == ("Chair_a1b2c3",) + assert base_entity.visual_node_patterns == ("Chair",) + assert base_entity.descriptor["mesh_ref"] == "entities/chair_000/visual.glb" + + entity = plan.entities[1] + assert entity.matched_prim_paths == ("Chair.016_a1b2c3", "Chair.016_d4e5f6") + assert entity.visual_node_patterns == ("Chair.016",) + assert entity.descriptor["mesh_ref"] == "entities/chair_016/visual.glb" + assert plan.collision_spec.resolve("Chair_a1b2c3")["type"] == "skip" + assert plan.collision_spec.resolve("Chair.016_a1b2c3")["type"] == "skip" + assert plan.collision_spec.resolve("Chair.001_a1b2c3")["type"] == "auto" + + +def test_synthetic_entity_uses_pose_and_extents(tmp_path: Path) -> None: + sidecar = SceneCookSidecar.from_dict( + { + "interactables": [ + { + "id": "manip_cube", + "pose": {"x": 0.0, "y": 0.75, "z": 0.69}, + "kind": "dynamic", + "mass": 0.15, + "physics": {"shape": "box", "extents": [0.08, 0.08, 0.08]}, + "visual": {"rgba": [0.85, 0.20, 0.20, 1.0]}, + "tags": ["manipulation"], + }, + ] + } + ) + plan = plan_module.build_scene_cook_plan( + tmp_path / "office.glb", + sidecar=sidecar, + alignment=SceneMeshAlignment(), + output_dir=tmp_path, + ) + + entity = plan.entities[0] + assert entity.spec.is_synthetic + assert entity.matched_prim_paths == () + assert entity.visual_path is None + assert entity.center == (0.0, 0.75, 0.69) + assert entity.descriptor["shape_hint"] == "box" + assert entity.descriptor["extents"] == [0.08, 0.08, 0.08] + assert entity.descriptor["rgba"] == [0.85, 0.20, 0.20, 1.0] + assert entity.descriptor["mesh_ref"] == "" + + +def test_interactable_requires_prims_or_pose() -> None: + with pytest.raises(ValueError, match="source_prim_paths.*or pose"): + SceneCookSidecar.from_dict({"interactables": [{"id": "ghost"}]}) diff --git a/dimos/experimental/pimsim/scene/visual_blender.py b/dimos/experimental/pimsim/scene/visual_blender.py new file mode 100644 index 0000000000..0615d271e1 --- /dev/null +++ b/dimos/experimental/pimsim/scene/visual_blender.py @@ -0,0 +1,313 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Blender-backed visual compiler for authored scene plans.""" + +from __future__ import annotations + +import json +from pathlib import Path +import shutil +import subprocess +import tempfile + +from dimos.experimental.pimsim.scene.plan import SceneCookPlan + +_COMMAND_TAIL_LINES = 30 +_VISUAL_PLAN_VERSION = 2 + +_PLAN_VISUAL_SCRIPT = r""" +import fnmatch +import json +import math +import pathlib +import sys + +import bpy +from mathutils import Matrix, Vector + +source = pathlib.Path(sys.argv[-2]) +plan_path = pathlib.Path(sys.argv[-1]) +plan = json.loads(plan_path.read_text()) +static_visual_path = pathlib.Path(plan["static_visual_path"]) +suffix = source.suffix.lower() + + +def fail(message): + raise RuntimeError(message) + + +def import_source(): + bpy.ops.object.select_all(action="SELECT") + bpy.ops.object.delete() + if suffix in {".usd", ".usda", ".usdc", ".usdz"}: + bpy.ops.wm.usd_import(filepath=str(source)) + elif suffix in {".gltf", ".glb"}: + bpy.ops.import_scene.gltf(filepath=str(source)) + elif suffix == ".obj": + bpy.ops.wm.obj_import(filepath=str(source)) + elif suffix == ".stl": + bpy.ops.wm.stl_import(filepath=str(source)) + elif suffix == ".ply": + bpy.ops.wm.ply_import(filepath=str(source)) + else: + fail(f"unsupported visual source suffix: {suffix}") + + +def alignment_matrix(): + align = plan["alignment"] + yaw, pitch, roll = [math.radians(float(v)) for v in align["rotation_zyx_deg"]] + cz, sz = math.cos(yaw), math.sin(yaw) + cy, sy = math.cos(pitch), math.sin(pitch) + cx, sx = math.cos(roll), math.sin(roll) + rz = Matrix(((cz, -sz, 0.0), (sz, cz, 0.0), (0.0, 0.0, 1.0))) + ry = Matrix(((cy, 0.0, sy), (0.0, 1.0, 0.0), (-sy, 0.0, cy))) + rx = Matrix(((1.0, 0.0, 0.0), (0.0, cx, -sx), (0.0, sx, cx))) + r = rz @ ry @ rx + if bool(align["y_up"]): + y_to_z = Matrix(((1.0, 0.0, 0.0), (0.0, 0.0, -1.0), (0.0, 1.0, 0.0))) + r = r @ y_to_z + return r, float(align["scale"]), Vector(tuple(float(v) for v in align["translation"])) + + +ALIGN_R, ALIGN_SCALE, ALIGN_T = alignment_matrix() + + +def object_candidates(obj): + candidates = {obj.name, obj.name_full} + if obj.parent is not None: + candidates.add(obj.parent.name) + candidates.add(obj.parent.name_full) + return candidates + + +def matches(obj, pattern): + return any(fnmatch.fnmatchcase(candidate, pattern) for candidate in object_candidates(obj)) + + +def source_point_from_blender_world(point): + if suffix in {".gltf", ".glb"}: + # Blender imports glTF Y-up coordinates into its Z-up world as + # (x, -z, y). The cook plan was resolved in the source glTF frame, + # so convert back before applying SceneMeshAlignment. + return Vector((point.x, point.z, -point.y)) + return point + + +def resolve_objects(entity): + objects = [] + missing = [] + for pattern in entity["visual_node_patterns"]: + matched = [obj for obj in bpy.context.scene.objects if obj.type == "MESH" and matches(obj, pattern)] + if not matched: + missing.append(pattern) + continue + for obj in matched: + if obj not in objects: + objects.append(obj) + if missing: + fail(f"entity {entity['id']} visual nodes not found in Blender import: {missing}") + return objects + + +def aligned_local_point(source_world, center): + source_point = source_point_from_blender_world(source_world) + return (ALIGN_R @ (ALIGN_SCALE * source_point)) + ALIGN_T - center + + +def duplicate_for_entity(obj, center, suffix): + dup = obj.copy() + dup.data = obj.data.copy() + dup.animation_data_clear() + dup.name = f"{obj.name}__{suffix}" + bpy.context.collection.objects.link(dup) + for vertex in dup.data.vertices: + source_world = obj.matrix_world @ vertex.co + vertex.co = aligned_local_point(source_world, center) + dup.matrix_world = Matrix.Identity(4) + return dup + + +def export_entity(entity, objects): + visual_path = pathlib.Path(entity["visual_path"]) + visual_path.parent.mkdir(parents=True, exist_ok=True) + center = Vector(tuple(float(v) for v in entity["center"])) + duplicates = [duplicate_for_entity(obj, center, entity["safe_id"]) for obj in objects] + try: + bpy.ops.object.select_all(action="DESELECT") + for dup in duplicates: + dup.select_set(True) + bpy.context.view_layer.objects.active = duplicates[0] + bpy.ops.export_scene.gltf( + filepath=str(visual_path), + export_format="GLB", + use_selection=True, + export_yup=False, + export_apply=True, + ) + finally: + for dup in duplicates: + bpy.data.objects.remove(dup, do_unlink=True) + + +def export_static_visual(objects_to_remove): + for obj in sorted(objects_to_remove, key=lambda item: item.name): + if obj.name in bpy.data.objects: + bpy.data.objects.remove(obj, do_unlink=True) + remaining = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"] + if not remaining: + fail("static visual would contain no mesh objects after entity removal") + static_visual_path.parent.mkdir(parents=True, exist_ok=True) + bpy.ops.object.select_all(action="DESELECT") + bpy.ops.export_scene.gltf( + filepath=str(static_visual_path), + export_format="GLB", + export_yup=True, + export_apply=True, + ) + + +import_source() +remove_from_static = set() +report = [] +for entity in plan["entities"]: + objects = resolve_objects(entity) + export_entity(entity, objects) + if entity["remove_from_static"]: + remove_from_static.update(objects) + report.append( + { + "id": entity["id"], + "visual_nodes": [obj.name for obj in objects], + "removed_from_static": entity["remove_from_static"], + } + ) +export_static_visual(remove_from_static) +print("DIMOS_VISUAL_PLAN_REPORT=" + json.dumps(report, sort_keys=True)) +""" + + +def cook_plan_visual_assets( + source_path: str | Path, + output_dir: str | Path, + *, + plan: SceneCookPlan, + rebake: bool = False, +) -> Path: + """Extract entity visuals and write a filtered static visual source GLB.""" + source = Path(source_path).expanduser().resolve() + package_dir = Path(output_dir).expanduser().resolve() + static_visual_source = package_dir / "browser" / "static_visual_source.glb" + plan_manifest = package_dir / "browser" / "visual_plan.json" + plan_json = _blender_plan_json(plan, static_visual_source) + expected_paths = [ + static_visual_source, + *(entity.visual_path for entity in plan.entities if entity.visual_path is not None), + ] + if ( + expected_paths + and all(path.exists() for path in expected_paths) + and _manifest_matches(plan_manifest, plan_json) + and not rebake + ): + return static_visual_source + + blender = shutil.which("blender") + if blender is None: + raise RuntimeError("authored visual entity extraction requires Blender on PATH") + + with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as plan_file: + json.dump(plan_json, plan_file) + plan_path = Path(plan_file.name) + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as script_file: + script_file.write(_PLAN_VISUAL_SCRIPT) + script_path = Path(script_file.name) + try: + _run_command( + [ + blender, + "--background", + "--factory-startup", + "--python", + str(script_path), + "--", + str(source), + str(plan_path), + ], + "blender visual plan cook", + ) + finally: + plan_path.unlink(missing_ok=True) + script_path.unlink(missing_ok=True) + + plan_manifest.parent.mkdir(parents=True, exist_ok=True) + plan_manifest.write_text(json.dumps(plan_json, indent=2, sort_keys=True) + "\n") + return static_visual_source + + +def _blender_plan_json(plan: SceneCookPlan, static_visual_source: Path) -> dict[str, object]: + return { + "visual_plan_version": _VISUAL_PLAN_VERSION, + "alignment": { + "scale": plan.alignment.scale, + "rotation_zyx_deg": list(plan.alignment.rotation_zyx_deg), + "translation": list(plan.alignment.translation), + "y_up": plan.alignment.y_up, + }, + "static_visual_path": str(static_visual_source), + "entities": [ + { + "id": entity.spec.id, + "safe_id": entity.safe_id, + "visual_node_patterns": list(entity.visual_node_patterns), + "center": list(entity.center), + "visual_path": str(entity.visual_path), + "remove_from_static": entity.spec.remove_from_static, + } + # Synthetic entities have no source mesh to extract; skip them + # entirely so Blender doesn't try to match an empty pattern set. + for entity in plan.entities + if entity.visual_path is not None + ], + } + + +def _run_command(args: list[str], label: str) -> str: + result = subprocess.run( + args, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = result.stdout or "" + if result.returncode != 0: + raise RuntimeError(f"{label} failed with exit code {result.returncode}:\n{_tail(output)}") + return output + + +def _manifest_matches(path: Path, expected: dict[str, object]) -> bool: + if not path.exists(): + return False + try: + return bool(json.loads(path.read_text()) == expected) + except json.JSONDecodeError: + return False + + +def _tail(output: str) -> str: + return "\n".join(output.splitlines()[-_COMMAND_TAIL_LINES:]) + + +__all__ = ["cook_plan_visual_assets"] diff --git a/dimos/experimental/pimsim/scene/visual_glb.py b/dimos/experimental/pimsim/scene/visual_glb.py new file mode 100644 index 0000000000..4a2027d10c --- /dev/null +++ b/dimos/experimental/pimsim/scene/visual_glb.py @@ -0,0 +1,363 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cook browser visual assets for real-time browser rendering.""" + +from __future__ import annotations + +from dataclasses import dataclass +import json +from pathlib import Path +import shutil +import subprocess +import tempfile +from typing import Any + +from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset +from dimos.simulation.scene_assets.spec import BrowserVisualSpec +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +_BLENDER_INPUT_SUFFIXES = { + ".usd", + ".usda", + ".usdc", + ".usdz", + ".gltf", + ".glb", + ".obj", + ".stl", + ".ply", +} +_GLTFPACK_INPUT_SUFFIXES = {".gltf", ".glb", ".obj"} +_COMMAND_TAIL_LINES = 30 + +_BLENDER_SCRIPT = r""" +import pathlib +import sys + +import bpy + +source = pathlib.Path(sys.argv[-4]) +target = pathlib.Path(sys.argv[-3]) +simplify_ratio = float(sys.argv[-2]) +max_texture_size = int(sys.argv[-1]) +suffix = source.suffix.lower() + +bpy.ops.object.select_all(action="SELECT") +bpy.ops.object.delete() + +if suffix in {".usd", ".usda", ".usdc", ".usdz"}: + bpy.ops.wm.usd_import(filepath=str(source)) +elif suffix in {".gltf", ".glb"}: + bpy.ops.import_scene.gltf(filepath=str(source)) +elif suffix == ".obj": + bpy.ops.wm.obj_import(filepath=str(source)) +elif suffix == ".stl": + bpy.ops.wm.stl_import(filepath=str(source)) +elif suffix == ".ply": + bpy.ops.wm.ply_import(filepath=str(source)) +else: + raise RuntimeError(f"unsupported visual source suffix: {suffix}") + +for obj in list(bpy.context.scene.objects): + if obj.type != "MESH": + bpy.data.objects.remove(obj, do_unlink=True) + +if max_texture_size > 0: + for image in bpy.data.images: + width, height = image.size + largest = max(width, height) + if largest <= max_texture_size: + continue + scale = max_texture_size / largest + try: + image.scale(max(1, int(width * scale)), max(1, int(height * scale))) + except RuntimeError: + # Blender cannot scale some generated or missing images; keep those + # untouched instead of aborting the entire scene cook. + pass + +if 0.0 < simplify_ratio < 0.999: + for obj in list(bpy.context.scene.objects): + if obj.type != "MESH": + continue + bpy.ops.object.select_all(action="DESELECT") + obj.select_set(True) + bpy.context.view_layer.objects.active = obj + modifier = obj.modifiers.new("dimos_decimate", "DECIMATE") + modifier.ratio = simplify_ratio + try: + bpy.ops.object.modifier_apply(modifier=modifier.name) + except RuntimeError: + obj.modifiers.remove(modifier) + +mesh_objects = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"] +if len(mesh_objects) > 1: + bpy.ops.object.select_all(action="DESELECT") + for obj in mesh_objects: + obj.select_set(True) + bpy.context.view_layer.objects.active = mesh_objects[0] + bpy.ops.object.join() + +bpy.ops.export_scene.gltf( + filepath=str(target), + export_format="GLB", + export_yup=True, + export_apply=True, +) +""" + + +@dataclass(frozen=True) +class BrowserVisualCookResult: + path: Path + stats: dict[str, Any] + tool: str + + +def cook_browser_visual( + source_path: str | Path, + output_dir: str | Path, + *, + spec: BrowserVisualSpec | None = None, + rebake: bool = False, +) -> BrowserVisualCookResult | None: + """Write the browser visual GLB for a scene package. + + ``gltfpack`` is the default path because browser performance is dominated + by draw calls, scene nodes, decoded texture memory, and shader/material + switches. Blender is kept as a conversion fallback for formats gltfpack + does not read directly. + """ + visual_spec = spec or BrowserVisualSpec() + if not visual_spec.enabled: + return None + + source = Path(source_path).expanduser().resolve() + out_dir = Path(output_dir).expanduser().resolve() + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / visual_spec.output_name + if out_path.exists() and not rebake: + return BrowserVisualCookResult( + path=out_path, + stats=inspect_scene_asset(out_path).to_json_dict(), + tool="cache", + ) + + source_stats = inspect_scene_asset(source).to_json_dict() + with tempfile.TemporaryDirectory(prefix="dimos-visual-cook-") as temp_dir_raw: + temp_dir = Path(temp_dir_raw) + temp_out = temp_dir / out_path.name + tool, report = _cook_visual(source, temp_out, visual_spec) + stats = inspect_scene_asset(temp_out).to_json_dict() + _validate_output(source_stats, stats, visual_spec) + if report is not None: + stats["optimizer_report"] = report + shutil.move(str(temp_out), out_path) + stats["path"] = str(out_path) + + warnings = _budget_warnings(stats, visual_spec) + if warnings: + stats["warnings"] = warnings + for warning in warnings: + logger.warning("browser visual budget: %s", warning) + return BrowserVisualCookResult(path=out_path, stats=stats, tool=tool) + + +def _cook_visual( + source: Path, + target: Path, + spec: BrowserVisualSpec, +) -> tuple[str, dict[str, Any] | None]: + optimizer = spec.optimizer.lower() + if optimizer == "copy": + if source.suffix.lower() != ".glb": + raise RuntimeError("copy visual optimizer requires a GLB source") + shutil.copy2(source, target) + return ("copy", None) + if optimizer == "blender": + _export_with_blender( + source, + target, + simplify_ratio=spec.simplify_ratio, + max_texture_size=spec.max_texture_size, + ) + return ("blender", None) + if optimizer == "gltfpack": + return _export_with_gltfpack(source, target, spec) + raise ValueError(f"unknown browser visual optimizer: {spec.optimizer}") + + +def _export_with_blender( + source: Path, + target: Path, + *, + simplify_ratio: float = 1.0, + max_texture_size: int | None = None, +) -> None: + blender = shutil.which("blender") + if blender is None: + raise RuntimeError( + f"{source.suffix} visual export requires Blender on PATH. Install Blender." + ) + + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as script: + script.write(_BLENDER_SCRIPT) + script_path = Path(script.name) + try: + _run_command( + [ + blender, + "--background", + "--factory-startup", + "--python", + str(script_path), + "--", + str(source), + str(target), + str(simplify_ratio), + str(max_texture_size or 0), + ], + "blender", + ) + finally: + script_path.unlink(missing_ok=True) + + +def _export_with_gltfpack( + source: Path, + target: Path, + spec: BrowserVisualSpec, +) -> tuple[str, dict[str, Any] | None]: + command = _gltfpack_command() + source_for_gltfpack = source + with tempfile.TemporaryDirectory(prefix="dimos-gltfpack-source-") as temp_dir_raw: + if source.suffix.lower() not in _GLTFPACK_INPUT_SUFFIXES: + if source.suffix.lower() not in _BLENDER_INPUT_SUFFIXES: + raise RuntimeError(f"unsupported visual source suffix: {source.suffix}") + source_for_gltfpack = Path(temp_dir_raw) / "source.glb" + _export_with_blender(source, source_for_gltfpack) + + report_path = target.with_suffix(".gltfpack.json") + args = [ + *command, + "-i", + str(source_for_gltfpack), + "-o", + str(target), + "-mm", + "-si", + str(spec.simplify_ratio), + "-se", + str(spec.simplify_error), + "-r", + str(report_path), + ] + if spec.texture_format == "webp": + args.append("-tw") + elif spec.texture_format == "ktx2": + args.append("-tc") + elif spec.texture_format is not None: + raise ValueError(f"unknown browser texture format: {spec.texture_format}") + if spec.max_texture_size is not None: + if spec.texture_format is None: + raise ValueError("max_texture_size requires texture_format='webp' or 'ktx2'") + args.extend(["-tl", str(spec.max_texture_size)]) + + output = _run_command(args, "gltfpack") + if output and "Warning:" in output: + logger.warning("gltfpack output:\n%s", _tail(output)) + report = _read_json(report_path) + return ("gltfpack", report) + + +def _gltfpack_command() -> list[str]: + gltfpack = shutil.which("gltfpack") + if gltfpack is not None: + return [gltfpack] + npx = shutil.which("npx") + if npx is not None: + return [npx, "-y", "gltfpack"] + raise RuntimeError( + "browser visual optimization requires gltfpack. Install it with " + "`npm install -g gltfpack` or use --visual-optimizer blender/copy." + ) + + +def _run_command(args: list[str], label: str) -> str: + result = subprocess.run( + args, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = result.stdout or "" + if result.returncode != 0: + raise RuntimeError(f"{label} failed with exit code {result.returncode}:\n{_tail(output)}") + return output + + +def _read_json(path: Path) -> dict[str, Any] | None: + if not path.exists(): + return None + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError: + logger.warning("failed to parse optimizer report: %s", path) + return None + return data if isinstance(data, dict) else None + + +def _validate_output( + source_stats: dict[str, Any], + output_stats: dict[str, Any], + spec: BrowserVisualSpec, +) -> None: + source_vertices = int(source_stats.get("vertex_count") or 0) + output_vertices = int(output_stats.get("vertex_count") or 0) + if source_vertices <= 0 or output_vertices <= 0: + return + max_vertices = int(source_vertices * spec.max_vertex_growth_ratio) + if output_vertices > max_vertices: + raise RuntimeError( + "browser visual cook increased vertex count from " + f"{source_vertices} to {output_vertices}; refusing to write worse asset" + ) + + +def _tail(output: str) -> str: + return "\n".join(output.splitlines()[-_COMMAND_TAIL_LINES:]) + + +def _budget_warnings(stats: dict[str, Any], spec: BrowserVisualSpec) -> list[str]: + warnings: list[str] = [] + mesh_count = int(stats.get("node_count") or stats.get("mesh_count") or 0) + material_count = int(stats.get("material_count") or 0) + texture_count = int(stats.get("texture_count") or 0) + vertex_count = int(stats.get("vertex_count") or 0) + if mesh_count > spec.max_meshes: + warnings.append(f"{mesh_count} render nodes exceeds target {spec.max_meshes}") + if material_count > spec.max_materials: + warnings.append(f"{material_count} materials exceeds target {spec.max_materials}") + if texture_count > spec.max_textures: + warnings.append(f"{texture_count} textures exceeds target {spec.max_textures}") + if vertex_count > spec.max_vertices: + warnings.append(f"{vertex_count} vertices exceeds target {spec.max_vertices}") + return warnings + + +__all__ = ["BrowserVisualCookResult", "cook_browser_visual"] diff --git a/dimos/simulation/mujoco/collision_spec.py b/dimos/simulation/mujoco/collision_spec.py new file mode 100644 index 0000000000..a11427352b --- /dev/null +++ b/dimos/simulation/mujoco/collision_spec.py @@ -0,0 +1,968 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Per-prim collision-shape decision-making for ``bake_scene_mjcf``. + +The bake's job is to turn each USD/glTF/OBJ prim into one or more MuJoCo +````s. This module separates the *decision* (what shape to emit) +from the *emission* (the OBJ/MJCF writing). Three layers cooperate: + +1. **Generic geometric heuristics** — applied to every prim regardless of + source. Tiny-prim skip, aspect-ratio box override, near-convex check. + Safe defaults; no scene-specific knowledge. + +2. **Primitive auto-fit** — try OBB box / Ritter sphere / PCA cylinder / + PCA capsule. Accept the best fit if + ``hull_volume / primitive_volume >= fill_threshold``. Geometric only. + +3. **Sidecar overrides** — a JSON file (``.collision.json`` next + to the source mesh, or explicit path) with ``fnmatch`` patterns over + USD prim paths. Lets users skip lamps, force cylinders on pillars, + tune CoACD per pattern. Whoever produces this file (a human, a + future UE-side extractor, an LLM…) doesn't matter to the bake — the + sidecar is the contract. + +The dispatcher ``emit_for_prim()`` walks: sidecar override → generic +heuristics → primitive auto-fit → CoACD fallback, and returns a list +of ``GeomEmission`` describing every ```` the wrapper should +include for the prim. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +import fnmatch +import json +from pathlib import Path +from typing import Any, Literal + +import numpy as np +from scipy.spatial import ConvexHull, QhullError # type: ignore[import-untyped] + +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +PrimitiveFit = dict[str, Any] +OverrideConfig = dict[str, Any] + + +# --------------------------------------------------------------------------- # +# Sidecar spec dataclasses # +# --------------------------------------------------------------------------- # + + +@dataclass +class CollisionSpec: + """User-facing collision configuration loaded from ``.collision.json``. + + Patterns in ``prim_overrides`` are matched with ``fnmatch`` (Unix-shell + globs) against the USD prim path of each prim — e.g. ``/Root/SM_Pillar_*``. + First-match wins; iteration order of the dict is preserved (Python 3.7+). + + Each override value is a dict with at minimum ``"type"``: + + - ``"box"`` / ``"sphere"`` / ``"cylinder"`` / ``"capsule"`` / ``"plane"``: + force the corresponding primitive. Auto-fit picks the parameters + unless explicit ``"size"`` / ``"pos"`` / ``"quat"`` is provided. + - ``"hull"``: force single convex hull, no CoACD. + - ``"mesh"`` / ``"decimate"``: emit this prim as a mesh geom. Optional + ``"target_faces"`` simplifies the source mesh before MJCF emission. + - ``"decompose"``: force CoACD even if auto-fit would have accepted a + primitive. Optional ``"max_hulls"`` overrides ``coacd_max_hulls``. + - ``"skip"``: emit no collision geom. Visual mesh still drawn. + - ``"auto"``: same as the global default (useful to scope a pattern + back to default behaviour inside a wider override). + + Optional override keys: + + - ``"friction"``: list ``[slide, spin, roll]``. + - ``"min_thickness"``: for ``"box"`` overrides, minimum full + thickness in metres along world Z. Useful for authored walkable + floor slabs exported as zero-thickness sheets. + - ``"preserve"``: with ``"min_thickness"``, which world-Z face to + keep fixed while expanding: ``"top"``, ``"bottom"``, or + ``"center"``. Defaults to ``"center"``. + - ``"max_hulls"``: per-pattern CoACD cap. + - ``"target_faces"``: per-pattern triangle target for ``mesh`` / + ``decimate`` outputs, or a post-process cap for hull outputs. + """ + + #: Fallback policy when no pattern matches. ``"auto"`` runs the full + #: heuristics→primitive→CoACD pipeline. ``"hull"`` always emits one + #: convex hull. ``"skip"`` emits nothing (visual only). + default: Literal["auto", "hull", "skip"] = "auto" + + #: A primitive is accepted in auto-fit if + #: ``hull_volume / primitive_volume >= fill_threshold``. Higher = + #: stricter (more prims fall through to CoACD). + fill_threshold: float = 0.85 + + #: Prims whose largest extent is below this (metres) emit no geom. + #: Catches trim/fasteners that the robot can't meaningfully contact. + tiny_prim_extent_m: float = 0.03 + + #: If one axis is ``>= aspect_ratio_box`` times the smaller two, the + #: prim is forced to a box even if auto-fit fill ratio is borderline. + #: Catches wall panels, floor slabs, doors. + aspect_ratio_box: float = 20.0 + + #: If mesh's hull is this close to its actual mesh volume, use one + #: hull and skip CoACD entirely (mesh is already near-convex). + near_convex_threshold: float = 0.9 + + #: CoACD concavity threshold (URLab default). Lower = finer + #: decomposition (more sub-hulls). + coacd_threshold: float = 0.05 + + #: Hard cap on hulls per CoACD invocation. -1 = unlimited. + coacd_max_hulls: int = 64 + + #: Only run decomposition when the prim's single-hull volume exceeds + #: this (m³). Smaller furniture-scale prims use one hull regardless. + shell_volume_m3: float = 2.0 + + #: Preserve large non-rectangular sheet footprints with thin triangle + #: prisms. This helps moderate indoor scenes with angular floors, but + #: is disabled by ``bake_scene_mjcf`` for very large scenes unless + #: explicitly overridden. + enable_sheet_prisms: bool = True + + #: Scene-level guard used by ``bake_scene_mjcf``. Above this many + #: source prims, sheet prisms can explode the geom count; use sidecar + #: overrides for specific floors instead. + sheet_prism_max_scene_prims: int = 2500 + + #: ``USD-path-glob -> override-dict``. See class docstring. + prim_overrides: dict[str, OverrideConfig] = field(default_factory=dict) + + #: Split suspicious scene-graph nodes that are really many disconnected + #: tiny meshes spread over a large area before running primitive fitting. + split_disconnected_components: bool = True + + #: Minimum component count before a prim is considered grouped clutter. + split_min_components: int = 8 + + #: Combined prim extent must be this much larger than the median component + #: extent before splitting. This avoids splitting normal multi-part props. + split_extent_ratio: float = 4.0 + + #: A prim must span at least this far before the cooker spends time + #: checking disconnected components. This keeps normal props cheap. + split_prim_min_extent_m: float = 5.0 + + #: The prim must also be slab-like by axis ratio before splitting. + #: This targets the path that can otherwise emit one giant box. + split_axis_ratio: float = 20.0 + + #: Components smaller than this largest extent are dropped after splitting. + #: This catches leaves, cups, bottles, and other decorative clutter that can + #: destabilize navigation collision while being too small to matter. + split_component_min_extent_m: float = 0.15 + + #: Very small triangle islands are dropped after splitting. + split_component_min_faces: int = 16 + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> CollisionSpec: + """Build a collision spec from decoded JSON. + + Unknown keys are ignored to keep authored sidecars forwards-compatible. + """ + known = { + "default", + "fill_threshold", + "tiny_prim_extent_m", + "aspect_ratio_box", + "near_convex_threshold", + "coacd_threshold", + "coacd_max_hulls", + "shell_volume_m3", + "enable_sheet_prisms", + "sheet_prism_max_scene_prims", + "prim_overrides", + "split_disconnected_components", + "split_min_components", + "split_extent_ratio", + "split_prim_min_extent_m", + "split_axis_ratio", + "split_component_min_extent_m", + "split_component_min_faces", + } + kwargs = {k: v for k, v in raw.items() if k in known} + # Ignore "$schema" and any future top-level keys silently. + return cls(**kwargs) + + @classmethod + def from_json(cls, path: Path | str) -> CollisionSpec: + """Load a sidecar. Unknown keys are ignored to keep the format forwards-compatible.""" + path = Path(path) + raw = json.loads(path.read_text()) + return cls.from_dict(raw) + + @classmethod + def auto_discover(cls, scene_path: Path | str) -> CollisionSpec: + """Return the sidecar next to ``scene_path`` if it exists, else defaults.""" + scene_path = Path(scene_path) + sidecar = scene_path.with_suffix(".collision.json") + if sidecar.exists(): + logger.info(f"loading collision sidecar: {sidecar}") + return cls.from_json(sidecar) + return cls() + + def resolve(self, prim_path: str) -> OverrideConfig: + """Find the matching override for ``prim_path`` (USD path). + + Returns a dict with at least ``"type"``. Falls back to + ``{"type": self.default}`` when no pattern matches. + """ + stripped = prim_path.lstrip("/") + sanitized = "".join(c if c.isalnum() else "_" for c in stripped) + candidates = (prim_path, stripped, sanitized) + for pattern, override in self.prim_overrides.items(): + if any(fnmatch.fnmatchcase(candidate, pattern) for candidate in candidates): + # Pattern's "auto" defers to global default. + if override.get("type") == "auto": + return {**override, "type": self.default} + return override + return {"type": self.default} + + +# --------------------------------------------------------------------------- # +# Emission record # +# --------------------------------------------------------------------------- # + + +@dataclass +class GeomEmission: + """One MuJoCo ```` to emit, in dimos world frame. + + Either ``mesh_path`` is set (mesh-type geom — also emits a + ```` asset) or the primitive parameters (``size``, ``pos``, + ``quat``) are set. + """ + + name: str + purpose: Literal["collision", "visual"] + kind: Literal["mesh", "box", "sphere", "cylinder", "capsule", "plane"] + + #: For ``kind="mesh"``: absolute path to the OBJ. + mesh_path: Path | None = None + + #: Primitive size (semantics depend on ``kind``): + #: * box → (hx, hy, hz) (half-extents) + #: * sphere → (r,) + #: * cylinder→ (r, half_height) + #: * capsule → (r, half_height) (caps extend beyond half_height) + #: * plane → (hx, hy, _grid_spacing) — last is cosmetic only + size: tuple[float, ...] | None = None + + #: World-frame position of the primitive centre. ``None`` for meshes + #: (their geometry is already in world frame). + pos: tuple[float, float, float] | None = None + + #: World-frame orientation (wxyz, MuJoCo convention). ``None`` → + #: identity. Not used for meshes. + quat: tuple[float, float, float, float] | None = None + + #: Optional friction override (slide, spin, roll). + friction: tuple[float, float, float] | None = None + + +# --------------------------------------------------------------------------- # +# Math helpers # +# --------------------------------------------------------------------------- # + + +def _matrix_to_quat_wxyz(R: np.ndarray) -> tuple[float, float, float, float]: + """3x3 right-handed rotation → quaternion ``(w, x, y, z)``. + + Standard Shepperd's method; avoids the singularity at ``trace == -1``. + """ + R = np.asarray(R, dtype=np.float64) + tr = R[0, 0] + R[1, 1] + R[2, 2] + if tr > 0: + s = 0.5 / np.sqrt(tr + 1.0) + w = 0.25 / s + x = (R[2, 1] - R[1, 2]) * s + y = (R[0, 2] - R[2, 0]) * s + z = (R[1, 0] - R[0, 1]) * s + elif (R[0, 0] > R[1, 1]) and (R[0, 0] > R[2, 2]): + s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2]) + w = (R[2, 1] - R[1, 2]) / s + x = 0.25 * s + y = (R[0, 1] + R[1, 0]) / s + z = (R[0, 2] + R[2, 0]) / s + elif R[1, 1] > R[2, 2]: + s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2]) + w = (R[0, 2] - R[2, 0]) / s + x = (R[0, 1] + R[1, 0]) / s + y = 0.25 * s + z = (R[1, 2] + R[2, 1]) / s + else: + s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1]) + w = (R[1, 0] - R[0, 1]) / s + x = (R[0, 2] + R[2, 0]) / s + y = (R[1, 2] + R[2, 1]) / s + z = 0.25 * s + return (float(w), float(x), float(y), float(z)) + + +def _quat_z_to(axis: np.ndarray) -> tuple[float, float, float, float]: + """Quaternion that rotates ``+Z`` onto ``axis`` (unit vector). + + Used for cylinder/capsule placement — MuJoCo's primitive long-axis + is local +Z; we orient by aligning Z to the prim's PCA principal + direction. + """ + z = np.array([0.0, 0.0, 1.0]) + a = axis / (np.linalg.norm(axis) + 1e-12) + d = float(np.dot(z, a)) + if d > 0.99999: + return (1.0, 0.0, 0.0, 0.0) + if d < -0.99999: + # 180° about any axis perpendicular to Z; use X. + return (0.0, 1.0, 0.0, 0.0) + cross = np.cross(z, a) + s = float(np.sqrt(2.0 * (1.0 + d))) + w = s * 0.5 + xyz = cross / s + return (w, float(xyz[0]), float(xyz[1]), float(xyz[2])) + + +# --------------------------------------------------------------------------- # +# Primitive fits # +# --------------------------------------------------------------------------- # + + +#: Lower bound on any primitive's half-extent / radius. MuJoCo rejects +#: a ```` with any size component <= 0, but truly flat prims (road +#: tiles, ceiling panels) can come out of the OBB / cylinder fit with one +#: axis at exactly 0. Clamping to 1 mm yields a valid geom that's still +#: physically reasonable as a thin slab. +_MIN_SIZE_M = 1e-3 +_SHEET_PRISM_THICKNESS_M = 0.03 +_SHEET_BOX_FILL_MIN = 0.85 +_SHEET_BOX_FILL_MAX = 1.15 +_HORIZONTAL_BOX_MAX_THICKNESS_M = 0.05 +_SHEET_PRISM_MIN_FOOTPRINT_AREA_M2 = 2.0 +_SHEET_PRISM_MAX_TRIANGLES = 1024 + + +def _fit_aabb_box(vertices: np.ndarray) -> PrimitiveFit: + """Axis-aligned bounding box. Identity quat.""" + mn, mx = vertices.min(0), vertices.max(0) + half_ext = np.maximum((mx - mn) / 2.0, _MIN_SIZE_M) + center = (mx + mn) / 2.0 + return { + "type": "box", + "size": tuple(map(float, half_ext)), + "pos": tuple(map(float, center)), + "quat": (1.0, 0.0, 0.0, 0.0), + "volume": float(np.prod(2.0 * half_ext)), + } + + +def _fit_obb_box(vertices: np.ndarray) -> PrimitiveFit: + """Oriented bounding box via PCA. Tighter than AABB when the prim + is rotated relative to world axes (most UE props are world-aligned, + so OBB ≈ AABB, but rotated assets benefit).""" + centroid = vertices.mean(0) + centered = vertices - centroid + cov = np.cov(centered.T) + _, evecs = np.linalg.eigh(cov) + # Ensure right-handed. + if np.linalg.det(evecs) < 0: + evecs[:, 0] = -evecs[:, 0] + local = centered @ evecs + mn, mx = local.min(0), local.max(0) + half_ext = np.maximum((mx - mn) / 2.0, _MIN_SIZE_M) + local_center = (mx + mn) / 2.0 + world_center = centroid + evecs @ local_center + return { + "type": "box", + "size": tuple(map(float, half_ext)), + "pos": tuple(map(float, world_center)), + "quat": _matrix_to_quat_wxyz(evecs), + "volume": float(np.prod(2.0 * half_ext)), + } + + +def _fit_sphere(vertices: np.ndarray) -> PrimitiveFit: + """Centroid + farthest-vertex. Looser than Welzl/Ritter but fine for + fill-ratio comparison.""" + centroid = vertices.mean(0) + r = max(float(np.linalg.norm(vertices - centroid, axis=1).max()), _MIN_SIZE_M) + return { + "type": "sphere", + "size": (r,), + "pos": tuple(map(float, centroid)), + "quat": (1.0, 0.0, 0.0, 0.0), + "volume": float((4.0 / 3.0) * np.pi * r**3), + } + + +def _fit_cylinder(vertices: np.ndarray) -> PrimitiveFit: + """Cylinder along PCA principal axis.""" + centroid = vertices.mean(0) + centered = vertices - centroid + cov = np.cov(centered.T) + evals, evecs = np.linalg.eigh(cov) + axis = evecs[:, -1] # largest variance + proj = centered @ axis + half_h = max(float((proj.max() - proj.min()) / 2.0), _MIN_SIZE_M) + centre_along = float((proj.max() + proj.min()) / 2.0) + pos = centroid + axis * centre_along + # radius = max perp distance from axis line + perp = centered - np.outer(centered @ axis, axis) + r = max(float(np.linalg.norm(perp, axis=1).max()), _MIN_SIZE_M) + return { + "type": "cylinder", + "size": (r, half_h), + "pos": tuple(map(float, pos)), + "quat": _quat_z_to(axis), + "volume": float(np.pi * r * r * 2.0 * half_h), + } + + +def _fit_capsule(vertices: np.ndarray) -> PrimitiveFit: + """Capsule along PCA principal axis. MuJoCo capsule half-height is + the *cylindrical* portion only; total length = 2*(half_h + r).""" + cyl = _fit_cylinder(vertices) + r, h = cyl["size"] + new_h = max(float(h - r), _MIN_SIZE_M) + vol = float(np.pi * r * r * 2.0 * new_h) + float((4.0 / 3.0) * np.pi * r**3) + return { + "type": "capsule", + "size": (r, new_h), + "pos": cyl["pos"], + "quat": cyl["quat"], + "volume": vol, + } + + +def _hull_volume(vertices: np.ndarray) -> float | None: + """Convex-hull volume in m³, or ``None`` if qhull rejects the points.""" + try: + return float(ConvexHull(vertices).volume) + except (QhullError, ValueError): + return None + + +def _mesh_volume(vertices: np.ndarray, triangles: np.ndarray) -> float: + """Signed mesh volume (Gauss / divergence theorem on triangle fans). + + Closed meshes return a positive number; for non-closed inputs the + absolute value is a coarse estimate.""" + v0 = vertices[triangles[:, 0]] + v1 = vertices[triangles[:, 1]] + v2 = vertices[triangles[:, 2]] + return float(abs(np.sum(np.einsum("ij,ij->i", v0, np.cross(v1, v2))) / 6.0)) + + +def _best_primitive_fit( + vertices: np.ndarray, + hull_vol: float, + candidates: tuple[str, ...] = ("box", "cylinder", "sphere", "capsule"), +) -> PrimitiveFit | None: + """Try every primitive in ``candidates``; return the one with the + highest fill ratio. Returns ``None`` if no fit succeeds (e.g. < 4 + points).""" + fitters = { + "box": _fit_obb_box, + "sphere": _fit_sphere, + "cylinder": _fit_cylinder, + "capsule": _fit_capsule, + } + fits: list[PrimitiveFit] = [] + for kind in candidates: + try: + f = fitters[kind](vertices) + if f["volume"] <= 0: + continue + f["fill_ratio"] = hull_vol / f["volume"] + fits.append(f) + except Exception as e: + logger.debug(f" primitive fit {kind} failed: {e}") + if not fits: + return None + return max(fits, key=lambda f: f["fill_ratio"]) + + +# --------------------------------------------------------------------------- # +# Generic geometric heuristics # +# --------------------------------------------------------------------------- # + + +def _is_tiny(extent: np.ndarray, threshold_m: float) -> bool: + return bool(extent.max() < threshold_m) + + +def _is_slab(extent: np.ndarray, aspect_ratio: float) -> bool: + """Wall / floor / door / panel — one axis is much smaller than the + other two (or one much larger than the other two — covers beams).""" + sorted_ext = np.sort(extent) + if sorted_ext[0] < 1e-6: + return True + return bool((sorted_ext[2] / sorted_ext[0]) >= aspect_ratio) + + +def _sheet_footprint_stats( + vertices: np.ndarray, + triangles: np.ndarray, + thin_axis: int, +) -> tuple[float, float] | None: + """Return ``(projected_aabb_area, projected_triangle_fill)`` for a sheet.""" + axes = [i for i in range(3) if i != thin_axis] + projected = vertices[:, axes] + span = projected.max(axis=0) - projected.min(axis=0) + box_area = float(span[0] * span[1]) + if box_area < 1e-9: + return None + + tri = projected[triangles] + edge_a = tri[:, 1] - tri[:, 0] + edge_b = tri[:, 2] - tri[:, 0] + area = 0.5 * np.abs(edge_a[:, 0] * edge_b[:, 1] - edge_a[:, 1] * edge_b[:, 0]).sum() + fill = float(area / box_area) + return box_area, fill + + +def _is_boxlike_sheet( + vertices: np.ndarray, + triangles: np.ndarray, + thin_axis: int, +) -> bool: + """Whether a thin mesh roughly fills its projected bounding rectangle. + + A single primitive box is only acceptable when the source sheet's + projected triangle area is close to the projected AABB area. Low + ratios mean an L-shape / beam strip / holes; high ratios usually mean + overlapping, folded, or angled sheets inside one prim. + """ + stats = _sheet_footprint_stats(vertices, triangles, thin_axis) + if stats is None: + return False + _, fill = stats + return _SHEET_BOX_FILL_MIN <= fill <= _SHEET_BOX_FILL_MAX + + +def _should_emit_triangle_prisms( + vertices: np.ndarray, + triangles: np.ndarray, + thin_axis: int, +) -> bool: + """Use exact-ish triangle prisms only for large horizontal sheets. + + This avoids placing huge slabs over angular floors and roof strips, + without exploding tiny decorative meshes into thousands of geoms. + """ + if thin_axis != 2: + return False + if len(triangles) > _SHEET_PRISM_MAX_TRIANGLES: + return False + stats = _sheet_footprint_stats(vertices, triangles, thin_axis) + if stats is None: + return False + footprint_area, _ = stats + return footprint_area >= _SHEET_PRISM_MIN_FOOTPRINT_AREA_M2 + + +def _thin_sheet_hulls( + vertices: np.ndarray, + triangles: np.ndarray, + thickness: float = _SHEET_PRISM_THICKNESS_M, +) -> list[tuple[np.ndarray, np.ndarray]]: + """Represent a thin non-rectangular sheet as convex triangle prisms.""" + hulls: list[tuple[np.ndarray, np.ndarray]] = [] + faces = np.asarray( + [ + [0, 1, 2], + [5, 4, 3], + [0, 3, 4], + [0, 4, 1], + [1, 4, 5], + [1, 5, 2], + [2, 5, 3], + [2, 3, 0], + ], + dtype=np.int32, + ) + + for tri_idx in triangles: + tri = vertices[tri_idx].astype(np.float64) + if not np.isfinite(tri).all(): + continue + normal = np.cross(tri[1] - tri[0], tri[2] - tri[0]) + norm = float(np.linalg.norm(normal)) + if norm < 1e-9: + continue + offset = normal / norm * (thickness * 0.5) + prism = np.vstack((tri + offset, tri - offset)).astype(np.float32) + hulls.append((prism, faces)) + + return hulls + + +def _is_flat_horizontal_box(extent: np.ndarray, thin_axis: int) -> bool: + """Thin in world Z, broad in world X/Y, and flat enough to box safely. + + PCA boxes are unstable for nearly flat floors/ceilings: any small + triangulation asymmetry can rotate the OBB basis and turn a walkable + surface into a shallow ramp. For world-horizontal slabs, the AABB is + the physically safer collision approximation. + """ + if thin_axis != 2: + return False + xy_min = float(min(extent[0], extent[1])) + z_extent = float(extent[2]) + if xy_min < 1e-6: + return False + return z_extent <= _HORIZONTAL_BOX_MAX_THICKNESS_M + + +# --------------------------------------------------------------------------- # +# Dispatcher: per-prim decision # +# --------------------------------------------------------------------------- # + + +@dataclass +class PrimDecision: + """What the dispatcher decided for one prim. Consumed by the bake + which materialises ``GeomEmission`` records and writes OBJs.""" + + #: ``"skip"`` (no collision), ``"primitive"`` (one ```` with + #: kind ∈ {box, sphere, cylinder, capsule, plane}), or ``"hulls"`` + #: (one or more mesh ````s from convex-hull decomposition). + mode: Literal["skip", "primitive", "hulls"] + + #: For ``"primitive"``: the fit dict (``type``, ``size``, ``pos``, + #: ``quat``, ``volume``, ``fill_ratio``). + primitive: PrimitiveFit | None = None + + #: For ``"hulls"``: list of ``(vertices, triangles)`` ready to write. + hulls: list[tuple[np.ndarray, np.ndarray]] = field(default_factory=list) + + #: For diagnostics: which rule fired. + reason: str = "" + + #: Optional friction override from sidecar. + friction: tuple[float, float, float] | None = None + + #: Optional per-mesh triangle cap from sidecar. The bake applies this + #: before writing OBJ assets for mesh geoms. + target_faces: int | None = None + + +def decide_for_prim( + vertices: np.ndarray, + triangles: np.ndarray, + prim_path: str, + spec: CollisionSpec, +) -> PrimDecision: + """Resolve sidecar + heuristics + auto-fit for a single prim. + + Pure function — does no I/O. The caller (bake) materialises the + decision: writes hull OBJs to disk, emits MJCF lines. + + Args: + vertices: ``(N, 3)`` float, world-frame after ``SceneMeshAlignment``. + triangles: ``(M, 3)`` int vertex indices. + prim_path: USD-style prim path used as sidecar key. + spec: parsed sidecar. + """ + extent = vertices.max(0) - vertices.min(0) + override = spec.resolve(prim_path) + kind = override.get("type", spec.default) + friction = override.get("friction") + if friction is not None: + friction = tuple(float(x) for x in friction) + target_faces = _target_faces(override) + + # 0. Explicit "skip" — short-circuit. + if kind == "skip": + return PrimDecision(mode="skip", reason="sidecar:skip", friction=friction) + + # 1. Tiny-prim guard (applies to "auto" path; explicit overrides win). + if kind in ("auto",) and _is_tiny(extent, spec.tiny_prim_extent_m): + return PrimDecision(mode="skip", reason="tiny-prim", friction=friction) + + # 2. Explicit primitive in sidecar — fit if size/pos not provided. + if kind in ("box", "sphere", "cylinder", "capsule", "plane"): + fit = _resolve_explicit_primitive(vertices, kind, override) + fit["fill_ratio"] = float("nan") # unknown — user asserted this shape + return PrimDecision( + mode="primitive", primitive=fit, reason=f"sidecar:{kind}", friction=friction + ) + + # 3. Explicit hull / decompose paths. + if kind == "hull": + return PrimDecision( + mode="hulls", + hulls=[(vertices, triangles)], # signal: single-hull, no decomp + reason="sidecar:hull", + friction=friction, + target_faces=target_faces, + ) + if kind == "decompose": + max_h = int(override.get("max_hulls", spec.coacd_max_hulls)) + hulls = _coacd_decompose(vertices, triangles, spec.coacd_threshold, max_h) + return PrimDecision( + mode="hulls", + hulls=hulls, + reason="sidecar:decompose", + friction=friction, + target_faces=target_faces, + ) + if kind in {"mesh", "decimate"}: + return PrimDecision( + mode="hulls", + hulls=[(vertices, triangles)], + reason=f"sidecar:{kind}", + friction=friction, + target_faces=target_faces, + ) + + # 4. From here on: kind == "auto". Generic heuristics first. + + # 4a. Aspect-ratio: slab/beam → force box (fill ratio may be + # marginal because of moulding/profile, but a box collision is + # the right physical answer for walls and slabs). Non-rectangular + # sheets are emitted as triangle prisms so we don't fill holes or + # angular roof/floor outlines with one huge invisible slab. + if _is_slab(extent, spec.aspect_ratio_box): + thin_axis = int(np.argmin(extent)) + if ( + spec.enable_sheet_prisms + and not _is_boxlike_sheet(vertices, triangles, thin_axis) + and _should_emit_triangle_prisms(vertices, triangles, thin_axis) + ): + hulls = _thin_sheet_hulls(vertices, triangles) + if hulls: + return PrimDecision( + mode="hulls", + hulls=hulls, + reason=f"thin-sheet:triangle-prisms({len(hulls)})", + friction=friction, + ) + + if _is_flat_horizontal_box(extent, thin_axis): + fit = _fit_aabb_box(vertices) + reason = "aspect-ratio:horizontal-slab" + else: + fit = _fit_obb_box(vertices) + reason = "aspect-ratio:slab" + fit["fill_ratio"] = float("nan") + return PrimDecision(mode="primitive", primitive=fit, reason=reason, friction=friction) + + # 4b. Need hull volume for the rest. + hull_vol = _hull_volume(vertices) + if hull_vol is None: + return PrimDecision(mode="skip", reason="degenerate (qhull rejected)", friction=friction) + + # 4c. Try primitive auto-fit. + auto_fit = _best_primitive_fit(vertices, hull_vol) + if auto_fit is not None and 0.0 < auto_fit["fill_ratio"] <= 1.5: + # fill_ratio > 1 happens for non-closed hulls; cap to keep this + # finite when reporting. Accept if within tolerance. + if auto_fit["fill_ratio"] >= spec.fill_threshold: + return PrimDecision( + mode="primitive", + primitive=auto_fit, + reason=f"auto:{auto_fit['type']}({auto_fit['fill_ratio']:.2f})", + friction=friction, + ) + + # 4d. Near-convex shortcut: skip CoACD, single hull. + mesh_vol = _mesh_volume(vertices, triangles) + if hull_vol > 0 and mesh_vol / hull_vol > spec.near_convex_threshold: + return PrimDecision( + mode="hulls", + hulls=[(vertices, triangles)], + reason=f"near-convex({mesh_vol / hull_vol:.2f})", + friction=friction, + ) + + # 4e. Small concave prim → single hull (matches today's behaviour + # for furniture-scale things; faster than CoACD). + if hull_vol < spec.shell_volume_m3: + return PrimDecision( + mode="hulls", + hulls=[(vertices, triangles)], + reason="small-shell:single-hull", + friction=friction, + ) + + # 4f. Large concave shell → CoACD. + hulls = _coacd_decompose(vertices, triangles, spec.coacd_threshold, spec.coacd_max_hulls) + return PrimDecision( + mode="hulls", + hulls=hulls, + reason=f"coacd:{len(hulls)}", + friction=friction, + ) + + +# --------------------------------------------------------------------------- # +# Helpers used by the dispatcher # +# --------------------------------------------------------------------------- # + + +def _resolve_explicit_primitive( + vertices: np.ndarray, + kind: str, + override: OverrideConfig, +) -> PrimitiveFit: + """Build a primitive fit dict from a sidecar override. + + If the override supplies ``size`` (and optionally ``pos`` / ``quat``), + those win. Otherwise we auto-fit the requested primitive and use + those params. ``plane`` is special-cased — we always derive from + the prim's xy footprint at its min z. + """ + if kind == "plane": + mn = vertices.min(0) + mx = vertices.max(0) + return { + "type": "plane", + "size": (float((mx[0] - mn[0]) / 2.0), float((mx[1] - mn[1]) / 2.0), 0.5), + "pos": ( + float((mx[0] + mn[0]) / 2.0), + float((mx[1] + mn[1]) / 2.0), + float(mn[2]), + ), + "quat": (1.0, 0.0, 0.0, 0.0), + "volume": 0.0, + } + + box_fitter = _fit_aabb_box if "min_thickness" in override else _fit_obb_box + fitters = { + "box": box_fitter, + "sphere": _fit_sphere, + "cylinder": _fit_cylinder, + "capsule": _fit_capsule, + } + fit = fitters[kind](vertices) + # Apply explicit overrides if provided. + if "size" in override: + fit["size"] = tuple(float(x) for x in override["size"]) + if "pos" in override: + fit["pos"] = tuple(float(x) for x in override["pos"]) + if "quat" in override: + fit["quat"] = tuple(float(x) for x in override["quat"]) + if kind == "box": + _apply_box_min_thickness(fit, vertices, override) + return fit + + +def _apply_box_min_thickness( + fit: PrimitiveFit, + vertices: np.ndarray, + override: OverrideConfig, +) -> None: + raw_min_thickness = override.get("min_thickness") + if raw_min_thickness is None: + return + + min_half_z = max(float(raw_min_thickness) * 0.5, _MIN_SIZE_M) + size = np.asarray(fit["size"], dtype=np.float64) + pos = np.asarray(fit["pos"], dtype=np.float64) + if size.shape[0] < 3 or pos.shape[0] < 3 or size[2] >= min_half_z: + return + + old_half_z = float(size[2]) + size[2] = min_half_z + + preserve = str(override.get("preserve", "center")).lower() + if preserve == "top": + top_z = ( + pos[2] + old_half_z + if "pos" in override or "size" in override + else float(np.max(vertices[:, 2])) + ) + pos[2] = top_z - min_half_z + elif preserve == "bottom": + bottom_z = ( + pos[2] - old_half_z + if "pos" in override or "size" in override + else float(np.min(vertices[:, 2])) + ) + pos[2] = bottom_z + min_half_z + elif preserve in {"center", "centre"}: + pass + else: + raise ValueError("box min_thickness preserve must be one of: top, bottom, center") + + fit["size"] = tuple(map(float, size)) + fit["pos"] = tuple(map(float, pos)) + + +def _target_faces(override: OverrideConfig) -> int | None: + raw = override.get("target_faces", override.get("max_faces")) + if raw is None: + return None + target_faces = int(raw) + if target_faces <= 0: + return None + return max(4, target_faces) + + +def _coacd_decompose( + vertices: np.ndarray, + triangles: np.ndarray, + threshold: float, + max_hulls: int, +) -> list[tuple[np.ndarray, np.ndarray]]: + """Run CoACD on a single prim, return list of ``(verts, tris)`` hulls. + + CoACD is imported lazily — it ships its own C library and we don't + want every import of ``collision_spec`` to pay that cost. + """ + import coacd # type: ignore[import-not-found, import-untyped] + + # CoACD's C lib prints a lot per invocation; quiet it once per process. + if not getattr(_coacd_decompose, "_silenced", False): + coacd.set_log_level("error") + _coacd_decompose._silenced = True # type: ignore[attr-defined] + + mesh = coacd.Mesh(vertices.astype(np.float64), triangles.astype(np.int32)) + # CoACD's MCTS defaults (mcts_iterations=150, resolution=2000) are tuned + # for offline asset prep — minutes per shell on a multi-thousand-prim + # scene. We dial both down ~5x; the resulting hulls are slightly + # noisier but the bake finishes in minutes, not hours. For a one-off + # final bake users can override via the sidecar (future work) or call + # ``bake_scene_mjcf`` directly with a custom ``CollisionSpec``. + parts = coacd.run_coacd( + mesh, + threshold=threshold, + max_convex_hull=max_hulls, + resolution=500, + mcts_iterations=30, + mcts_nodes=10, + ) + out: list[tuple[np.ndarray, np.ndarray]] = [] + for v, t in parts: + v = np.asarray(v, dtype=np.float32) + t = np.asarray(t, dtype=np.int32) + if len(v) >= 4 and len(t) >= 1: + out.append((v, t)) + return out + + +__all__ = [ + "CollisionSpec", + "GeomEmission", + "PrimDecision", + "decide_for_prim", +] diff --git a/dimos/simulation/mujoco/scene_mesh_to_mjcf.py b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py new file mode 100644 index 0000000000..67af2a04c2 --- /dev/null +++ b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py @@ -0,0 +1,1003 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bake a scene mesh into an MJCF wrapper around a robot MJCF. + +The bake walks every prim returned by :func:`load_scene_prims` and asks +:func:`dimos.simulation.mujoco.collision_spec.decide_for_prim` what to +emit for it. The dispatcher returns one of three modes: + +- ``"primitive"`` -- a single MuJoCo primitive ```` (box / sphere / + cylinder / capsule / plane). Used when the prim is approximately + prismatic (auto-fit) or when a sidecar override forces it. +- ``"hulls"`` -- one or more mesh ````s. Either a single convex + hull (small / near-convex prims) or a CoACD decomposition (genuine + concave shells: stairs, planters). +- ``"skip"`` -- no collision geom at all. Used for sidecar-tagged + decoration (lamps, signs) and prims smaller than a threshold. + +Hulls produced by either path are validated with :func:`_valid_hull` +(matrix-rank coplanarity check + scipy ``Qt`` qhull pre-flight); when a +hull is invalid we fall back to a thin OBB box via +:func:`_fallback_box_geom` rather than dropping the geometry, so the +robot doesn't sink through holes. + +When ``include_visual_mesh=True`` the bake additionally writes the +prim's original triangles as a non-colliding visual geom (group 2, +``contype=0 conaffinity=0``). UE's USD exporter culls hidden faces on +static meshes (a floor slab ships with only top + bottom face pairs, +no sides) -- we route visual writes through :func:`_write_visual_obj`, +which substitutes the prim's convex hull when it isn't watertight, so +the viewer renders solid geometry instead of see-through slabs. + +Per-prim work is fanned across worker processes since each prim's +decision is independent and CoACD calls dominate wall time. Standalone +CLI bakes use forked processes; in an already-threaded DimOS runtime we +use ``forkserver`` so workers do not inherit the parent process's active +threads. + +Output is cached at ``~/.cache/dimos/scene_meshes//`` keyed on +the SHA256 of (source mesh, robot MJCF, alignment, meshdir, sidecar +spec, visual flag, schema version). :func:`load_or_bake` is the +recommended entry point -- it handles a three-tier cache: + + 1. ``compiled.mjb`` exists -> load directly (~1 s) + 2. ``wrapper.xml`` + OBJs exist -> compile XML, save ``.mjb`` + 3. Nothing exists -> full bake, then compile + save ``.mjb`` +""" + +from __future__ import annotations + +import argparse +from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import asdict, dataclass, replace +import hashlib +import multiprocessing +import os +from pathlib import Path +import time +from typing import Any + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] + +from dimos.simulation.mujoco.collision_spec import ( + CollisionSpec, + decide_for_prim, +) +from dimos.simulation.scene_assets.mesh_scene import ( + SceneMeshAlignment, + ScenePrimMesh, + load_scene_prims, + split_disconnected_scene_prims, +) +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +CACHE_DIR = Path.home() / ".cache" / "dimos" / "scene_meshes" + + +# Scene-only wrapper -- no robot include. Robots are attached at runtime +# via ``MjSpec.attach()`` (see ``MujocoSimModule.start``), keeping the +# cooked package robot-agnostic. ``meshdir="."`` resolves the cooked +# scene OBJs that sit alongside this file. +# +# The dummy ```` on dimos_scene bypasses MuJoCo's auto- +# computation of body inertia from geom volumes -- the body is static +# (no joint) so the values don't affect physics, but without this any +# zero-volume visual mesh (road tiles, ceiling panels, flat slabs) +# triggers ``Error: mesh volume is too small`` at compile time. +_WRAPPER_TEMPLATE = """\ + + + + + + + +{asset_meshes} + + + + +{scene_geoms} + + + +""" + +# ``inertia="shell"`` makes MuJoCo compute mesh inertia from surface +# area instead of enclosed volume -- robust to non-watertight visual +# meshes from art tools. Safe for closed CoACD hulls too, so we apply +# it universally for one fewer code path. +_ASSET_LINE = ' ' + +# Collision (group 3) -- actually collides. Keep it opaque so MuJoCo +# depth renders treat the scene as solid for lidar/camera simulation. +_COL_MESH_LINE = ( + ' ' +) +_COL_BOX_LINE = ( + ' ' +) +_COL_SPHERE_LINE = ( + ' ' +) +_COL_CYL_LINE = ( + ' ' +) +_COL_CAP_LINE = ( + ' ' +) +_COL_PLANE_LINE = ( + ' ' +) + +# Visual (group 2) -- drawn, doesn't collide. +_VISUAL_GEOM_LINE = ( + ' ' +) + + +# Constants kept from the prior implementation -- conservative +# fallback thresholds for hull validity / box-fallback geometry. +_DEGENERATE_EPS = 1e-3 +_MIN_HULL_EXTENT_M = 5e-3 +_FALLBACK_BOX_THICKNESS_M = 0.03 +_MIN_FALLBACK_BOX_EXTENT_M = 0.25 +_MIN_FALLBACK_BOX_AREA_M2 = 0.05 + +_CACHE_KEY_LEN = 12 +# Bump when the bake pipeline's output format changes so old caches +# invalidate on the next call. Increment for any change that could +# affect MJCF emission (new geom kinds, rewritten visual policy, etc.). +# This is only a local cache salt; it is not a persisted file format +# contract and old cache directories can safely stay on disk. +_CACHE_SCHEMA_VERSION = "scene-only-v10" + + +@dataclass +class _BakeArtifacts: + """Aggregated stats + emission lines from one bake.""" + + asset_lines: list[str] + geom_lines: list[str] + n_primitive: int + n_hulls_total: int + n_box_fallbacks: int + n_skipped: int + n_visuals: int + n_degenerate_dropped: int + decision_reasons: dict[str, int] + + +def _resolve_existing_file(path: str | Path, label: str) -> Path: + resolved = Path(path).expanduser().resolve() + if not resolved.exists(): + raise FileNotFoundError(f"{label} not found: {resolved}") + if not resolved.is_file(): + raise ValueError(f"{label} must be a file, got: {resolved}") + return resolved + + +def bake_scene_mjcf( + scene_mesh_path: str | Path, + alignment: SceneMeshAlignment | None = None, + cache_root: Path | None = None, + collision_spec: CollisionSpec | None = None, + include_visual_mesh: bool = False, + rebake: bool = False, +) -> Path: + """Convert ``scene_mesh_path`` to OBJs + scene-only MJCF wrapper. + + The wrapper is robot-agnostic: it declares the cooked scene as the + ``dimos_scene`` static body and nothing else. Robots are attached at + runtime via ``MjSpec.attach()`` inside ``MujocoSimModule.start``. + + Args: + scene_mesh_path: ``.usdz`` / ``.usda`` / ``.glb`` / ``.obj`` / + etc. Anything ``mesh_scene.load_scene_prims`` accepts. + alignment: scale / translation / rotation / y-up swap to bake + into world frame before any geom is emitted. + cache_root: override the cache root (defaults to + ``~/.cache/dimos/scene_meshes``). + collision_spec: per-prim policy. ``None`` auto-discovers a + sidecar ``.collision.json`` next to the source, or + falls back to ``CollisionSpec()`` defaults (auto-fit + primitives, CoACD on large concave shells). + include_visual_mesh: also emit a non-colliding visual geom for + every prim showing its original triangles. The viewer + renders these instead of the collision hulls -- much nicer + for visual debugging, but doubles disk usage. When ``True`` + non-watertight prim meshes are substituted with their convex + hull so they don't appear see-through. + rebake: ignore an existing ``wrapper.xml`` in the computed cache + directory and regenerate the scene collision geometry. + + Returns: + Path to the scene-only wrapper MJCF. Load with + ``mujoco.MjSpec.from_file`` and attach a robot via ``attach()``. + """ + scene_mesh_path = _resolve_existing_file(scene_mesh_path, "scene mesh") + align = alignment or SceneMeshAlignment() + spec = collision_spec or CollisionSpec.auto_discover(scene_mesh_path) + + cache_key = _cache_key( + scene_mesh_path, + align, + spec=spec, + include_visual_mesh=include_visual_mesh, + ) + root = (cache_root or CACHE_DIR).expanduser() + cache_dir = root / cache_key + wrapper_path = cache_dir / "wrapper.xml" + + if not rebake and _cache_hit(wrapper_path): + logger.info(f"bake_scene_mjcf: cache hit at {cache_dir}") + return wrapper_path + + cache_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"bake_scene_mjcf: loading + aligning {scene_mesh_path}") + prims = load_scene_prims(scene_mesh_path, alignment=align) + if spec.split_disconnected_components: + prims, split_stats = split_disconnected_scene_prims( + prims, + min_components=spec.split_min_components, + extent_ratio=spec.split_extent_ratio, + prim_min_extent=spec.split_prim_min_extent_m, + axis_ratio=spec.split_axis_ratio, + min_component_extent=spec.split_component_min_extent_m, + min_component_faces=spec.split_component_min_faces, + can_split=lambda prim: ( + spec.resolve(prim.prim_path or prim.name).get("type", spec.default) == "auto" + ), + ) + if split_stats["split_prims"]: + logger.info( + "bake_scene_mjcf: split %s disconnected prims into %s kept " + "components; dropped %s tiny components", + split_stats["split_prims"], + split_stats["emitted_components"], + split_stats["dropped_components"], + ) + logger.info(f"bake_scene_mjcf: {len(prims)} prims to process") + if spec.enable_sheet_prisms and len(prims) > spec.sheet_prism_max_scene_prims: + logger.info( + "bake_scene_mjcf: disabling thin-sheet triangle prisms for " + f"{len(prims)}-prim scene; use a collision sidecar to opt in" + ) + spec = replace(spec, enable_sheet_prisms=False) + scene_center, scene_extent = _scene_bounds(prims) + + artifacts = _bake_prims( + prims, + cache_dir, + spec=spec, + include_visual_mesh=include_visual_mesh, + ) + if not artifacts.geom_lines: + raise RuntimeError( + "bake_scene_mjcf: every prim got skipped or produced only " + "degenerate hulls; nothing left to collide against. Check " + "the source mesh and alignment." + ) + + logger.info( + f"bake_scene_mjcf: {artifacts.n_primitive} primitive geoms, " + f"{artifacts.n_hulls_total} hull geoms, " + f"{artifacts.n_box_fallbacks} box fallbacks, " + f"{artifacts.n_visuals} visual passthrough meshes, " + f"{artifacts.n_skipped} skipped, " + f"{artifacts.n_degenerate_dropped} degenerate hulls dropped" + ) + # Top-10 decision reasons -- useful when tuning a sidecar. + for reason, n in sorted(artifacts.decision_reasons.items(), key=lambda kv: -kv[1])[:10]: + logger.info(f" reason {reason:32s} {n}") + + _write_wrapper( + wrapper_path=wrapper_path, + cache_key=cache_key, + asset_lines=artifacts.asset_lines, + geom_lines=artifacts.geom_lines, + statistic_center=scene_center, + statistic_extent=scene_extent, + ) + return wrapper_path + + +def load_or_bake( + scene_mesh_path: str | Path, + alignment: SceneMeshAlignment | None = None, + cache_root: Path | None = None, + collision_spec: CollisionSpec | None = None, + include_visual_mesh: bool = False, + rebake: bool = False, +) -> Path: + """Return the cached or freshly baked scene-only wrapper MJCF. + + Robots are attached at runtime via ``MjSpec``; no ``compiled.mjb`` is + produced at cook time. The cache key is over the source mesh, + alignment, collision spec, and schema version -- robot-agnostic. + """ + scene_mesh_path = _resolve_existing_file(scene_mesh_path, "scene mesh") + return bake_scene_mjcf( + scene_mesh_path=scene_mesh_path, + alignment=alignment, + cache_root=cache_root, + collision_spec=collision_spec, + include_visual_mesh=include_visual_mesh, + rebake=rebake, + ) + + +# --------------------------------------------------------------------------- # +# Cache key # +# --------------------------------------------------------------------------- # + + +def _cache_key( + scene_mesh_path: Path, + alignment: SceneMeshAlignment, + *, + spec: CollisionSpec, + include_visual_mesh: bool, +) -> str: + """SHA256-12 over every input that affects bake output. + + Robot-agnostic: the cooked scene wrapper is the same regardless of + which robot will eventually be attached at runtime via ``MjSpec``. + """ + import json + + def _file_signature(path: Path) -> str: + st = path.stat() + return f"{path}:{st.st_size}:{st.st_mtime_ns}" + + h = hashlib.sha256() + h.update(_CACHE_SCHEMA_VERSION.encode()) + h.update(_file_signature(scene_mesh_path).encode()) + h.update(repr(sorted(asdict(alignment).items())).encode()) + h.update(json.dumps(asdict(spec), sort_keys=True).encode()) + h.update(b"visual=" + (b"1" if include_visual_mesh else b"0")) + return h.hexdigest()[:_CACHE_KEY_LEN] + + +def _cache_hit(wrapper_path: Path) -> bool: + if not wrapper_path.exists(): + return False + try: + text = wrapper_path.read_text() + except OSError: + return False + return " _BakeArtifacts: + """Fan per-prim work across cores; aggregate the resulting MJCF lines. + + Standalone bakes use ``fork`` so workers inherit the parent's + already-imported modules. Runtime bakes inside DimOS may happen + after other modules have started threads; in that case use + ``forkserver`` so workers do not inherit locks from the parent + process's C extension state. + """ + asset_lines: list[str] = [] + geom_lines: list[str] = [] + n_primitive = 0 + n_hulls_total = 0 + n_box_fallbacks = 0 + n_skipped = 0 + n_visuals = 0 + n_degenerate = 0 + reasons: dict[str, int] = {} + + work_items = [(prim, cache_dir, spec, include_visual_mesh) for prim in prims] + n_workers = max(1, (os.cpu_count() or 4) - 1) + if _native_thread_count() > 1: + n_workers = min(n_workers, 8) + start_method = ( + "forkserver" if "forkserver" in multiprocessing.get_all_start_methods() else "spawn" + ) + else: + start_method = "fork" + logger.info( + f"_bake_prims: fanning {len(prims)} prims across {n_workers} workers ({start_method})" + ) + + t0 = time.time() + mp_ctx = multiprocessing.get_context(start_method) + executor = ProcessPoolExecutor(max_workers=n_workers, mp_context=mp_ctx) + + progress_every = 25 if len(prims) <= 500 else 250 + with executor as ex: + futures = [ex.submit(_process_one_prim, item) for item in work_items] + done = 0 + for fut in as_completed(futures): + a_lines, g_lines, mode, reason, counters = fut.result() + asset_lines.extend(a_lines) + geom_lines.extend(g_lines) + reasons[reason] = reasons.get(reason, 0) + 1 + if mode == "primitive": + n_primitive += 1 + elif mode == "skip": + n_skipped += 1 + n_hulls_total += counters["hulls"] + n_box_fallbacks += counters["box_fallbacks"] + n_visuals += counters["visuals"] + n_degenerate += counters["degenerate"] + done += 1 + if done % progress_every == 0 or done == len(prims): + elapsed = time.time() - t0 + eta = elapsed * (len(prims) - done) / max(done, 1) + logger.info( + f" prim {done}/{len(prims)} " + f"({100 * done / len(prims):.0f}%) " + f"elapsed={elapsed:.0f}s eta={eta:.0f}s " + f"hulls_so_far={n_hulls_total}" + ) + + return _BakeArtifacts( + asset_lines=asset_lines, + geom_lines=geom_lines, + n_primitive=n_primitive, + n_hulls_total=n_hulls_total, + n_box_fallbacks=n_box_fallbacks, + n_skipped=n_skipped, + n_visuals=n_visuals, + n_degenerate_dropped=n_degenerate, + decision_reasons=reasons, + ) + + +def _native_thread_count() -> int: + try: + return len(os.listdir("/proc/self/task")) + except OSError: + return 1 + + +# --------------------------------------------------------------------------- # +# Geom emission helpers # +# --------------------------------------------------------------------------- # + + +def _emit_primitive_geom( + prim_name: str, + fit: dict[str, Any] | None, + friction_attr: str, +) -> str | None: + """Render one ``PrimDecision.primitive`` dict to MJCF text. + + Returns ``None`` if ``fit`` is missing required fields (defensive -- + ``decide_for_prim`` should always populate them, but a malformed + sidecar override could slip through). + """ + if fit is None: + return None + kind = fit.get("type") + pos = _fmt_vec(np.asarray(fit["pos"])) + size = _fmt_vec(np.asarray(fit["size"])) + quat = ( + _fmt_vec(np.asarray(fit["quat"])) + if "quat" in fit and fit["quat"] is not None + else "1 0 0 0" + ) + name = f"{prim_name}_col" + if kind == "box": + return _COL_BOX_LINE.format( + name=name, pos=pos, quat=quat, size=size, friction=friction_attr + ) + if kind == "sphere": + return _COL_SPHERE_LINE.format(name=name, pos=pos, size=size, friction=friction_attr) + if kind == "cylinder": + return _COL_CYL_LINE.format( + name=name, pos=pos, quat=quat, size=size, friction=friction_attr + ) + if kind == "capsule": + return _COL_CAP_LINE.format( + name=name, pos=pos, quat=quat, size=size, friction=friction_attr + ) + if kind == "plane": + return _COL_PLANE_LINE.format( + name=name, pos=pos, quat=quat, size=size, friction=friction_attr + ) + return None + + +# --------------------------------------------------------------------------- # +# Hull validity & box fallback (preserved from prior implementation) # +# --------------------------------------------------------------------------- # + + +def _valid_hull(v: np.ndarray, f: np.ndarray) -> bool: + """Reject hulls that MuJoCo's qhull would choke on at compile time. + + Four layers: + 1. trivial -- < 4 vertices or < 4 faces. + 2. extent -- all-axis ``> 5 mm`` (matches MuJoCo's mj_loadXML + coplanarity tolerance for ~100mm-wide hulls). + 3. rank -- centred vertex matrix must have rank 3 (catches + coplanar hulls the extent check misses, e.g. a T-shaped + hull whose XY extent is large but Z is zero). + 4. scipy ConvexHull pre-flight with ``Qt`` -- same options + MuJoCo uses; if scipy can't build it, mj_loadXML can't either. + """ + if len(v) < 4 or len(f) < 4: + return False + extent = v.max(axis=0) - v.min(axis=0) + if (extent < _DEGENERATE_EPS).any(): + return False + if float(extent.min()) < _MIN_HULL_EXTENT_M: + return False + centered = v.astype(np.float64) - v.astype(np.float64).mean(axis=0) + if np.linalg.matrix_rank(centered, tol=_DEGENERATE_EPS) < 3: + return False + try: + from scipy.spatial import ConvexHull, QhullError # type: ignore[import-untyped] + + ConvexHull(v, qhull_options="Qt") + except (QhullError, ValueError): + return False + return True + + +def _fallback_box_geom(name: str, vertices: np.ndarray, friction_attr: str = "") -> str | None: + """Emit a thin OBB box geom for vertices that can't form a valid hull. + + The thickness floor (``_FALLBACK_BOX_THICKNESS_M = 3 cm``) keeps the + box thick enough that the robot can stand on it without falling + through. Returns ``None`` for prims too small to bother (< 25 cm + largest extent or < 0.05 m^2 face area) -- those fall through to + the degenerate counter. + """ + finite = vertices[np.isfinite(vertices).all(axis=1)].astype(np.float64) + if len(finite) < 3: + return None + aabb_extent = finite.max(axis=0) - finite.min(axis=0) + sorted_extents = np.sort(aabb_extent) + if sorted_extents[-1] < _MIN_FALLBACK_BOX_EXTENT_M: + return None + if sorted_extents[-1] * sorted_extents[-2] < _MIN_FALLBACK_BOX_AREA_M2: + return None + + center, rotation, extent = _oriented_box(finite) + extent = np.maximum(extent, _FALLBACK_BOX_THICKNESS_M) + half_size = 0.5 * extent + quat = _rotation_matrix_to_wxyz(rotation) + return _COL_BOX_LINE.format( + name=name, + pos=_fmt_vec(center), + quat=_fmt_vec(quat), + size=_fmt_vec(half_size), + friction=friction_attr, + ) + + +def _oriented_box( + vertices: np.ndarray, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """OBB via trimesh's ``bounding_box_oriented``. + + Falls back to AABB if trimesh's OBB fitter produces non-finite + output or the prim has < 3 vertices. + """ + try: + import trimesh # type: ignore[import-untyped] + + tm = trimesh.Trimesh(vertices=vertices, faces=np.empty((0, 3), dtype=np.int32)) + obb = tm.bounding_box_oriented + transform = np.asarray(obb.primitive.transform, dtype=np.float64) + extent = np.asarray(obb.primitive.extents, dtype=np.float64) + rotation = transform[:3, :3] + center = transform[:3, 3] + if np.linalg.det(rotation) < 0: + rotation[:, 0] *= -1.0 + if np.isfinite(center).all() and np.isfinite(rotation).all() and np.isfinite(extent).all(): + return center, rotation, np.abs(extent) + except Exception: + pass + + lo = vertices.min(axis=0) + hi = vertices.max(axis=0) + return (lo + hi) * 0.5, np.eye(3), hi - lo + + +def _rotation_matrix_to_wxyz(rotation: np.ndarray) -> np.ndarray: + """3x3 rotation -> ``(w, x, y, z)`` quaternion.""" + from scipy.spatial.transform import Rotation # type: ignore[import-untyped] + + xyzw = Rotation.from_matrix(rotation).as_quat() + return np.array([xyzw[3], xyzw[0], xyzw[1], xyzw[2]], dtype=np.float64) + + +def _fmt_vec(values: np.ndarray) -> str: + return " ".join(f"{float(v):.9g}" for v in values) + + +def _scene_bounds(prims: list[ScenePrimMesh]) -> tuple[np.ndarray, float]: + """Return a viewer-friendly center and extent for the aligned scene. + + MuJoCo's viewer uses ``statistic.center`` / ``statistic.extent`` for + camera framing and clipping. The included robot MJCF's defaults are + much too small for baked building-scale scenes, so wrappers need to + advertise the scene bounds explicitly. + """ + mins: list[np.ndarray] = [] + maxs: list[np.ndarray] = [] + for prim in prims: + vertices = np.asarray(prim.vertices, dtype=np.float64) + if vertices.ndim != 2 or vertices.shape[1] != 3 or len(vertices) == 0: + continue + finite = vertices[np.isfinite(vertices).all(axis=1)] + if len(finite) == 0: + continue + mins.append(finite.min(axis=0)) + maxs.append(finite.max(axis=0)) + + if not mins: + return np.zeros(3, dtype=np.float64), 1.0 + + scene_min = np.min(np.vstack(mins), axis=0) + scene_max = np.max(np.vstack(maxs), axis=0) + center = (scene_min + scene_max) * 0.5 + diagonal = scene_max - scene_min + extent = max(float(np.linalg.norm(diagonal) * 0.5 * 1.1), 1.0) + return center, extent + + +# --------------------------------------------------------------------------- # +# OBJ I/O # +# --------------------------------------------------------------------------- # + + +def _write_hull_obj(obj_file: Path, vertices: np.ndarray, faces: np.ndarray) -> None: + """Write a CoACD/single-hull mesh. No watertight check -- hulls are + closed by construction.""" + _write_mesh_obj(obj_file, vertices, faces) + + +def _simplify_mesh_geom( + vertices: np.ndarray, + faces: np.ndarray, + target_faces: int, +) -> tuple[np.ndarray, np.ndarray]: + if target_faces <= 0 or len(faces) <= target_faces: + return vertices, faces + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(vertices.astype(np.float64)) + mesh.triangles = o3d.utility.Vector3iVector(faces.astype(np.int32)) + try: + mesh.remove_duplicated_vertices() + mesh.remove_duplicated_triangles() + mesh.remove_degenerate_triangles() + mesh.remove_unreferenced_vertices() + simplified = mesh + for _ in range(3): + if len(simplified.triangles) <= target_faces: + break + simplified = simplified.simplify_quadric_decimation( + target_number_of_triangles=target_faces + ) + simplified.remove_degenerate_triangles() + simplified.remove_duplicated_triangles() + simplified.remove_unreferenced_vertices() + out_vertices = np.asarray(simplified.vertices, dtype=np.float32) + out_faces = np.asarray(simplified.triangles, dtype=np.int32) + if len(out_vertices) >= 4 and 4 <= len(out_faces) <= target_faces: + return out_vertices, out_faces + except Exception: + logger.debug("mesh simplification failed; falling back to convex hull", exc_info=True) + + hull = _convex_hull_mesh(vertices) + return hull if hull is not None else (vertices, faces) + + +def _convex_hull_mesh(vertices: np.ndarray) -> tuple[np.ndarray, np.ndarray] | None: + try: + from scipy.spatial import ConvexHull, QhullError # type: ignore[import-untyped] + + hull = ConvexHull(vertices.astype(np.float64)) + except (QhullError, ValueError): + return None + + faces = np.asarray(hull.simplices, dtype=np.int32) + used = np.unique(faces.reshape(-1)) + remap = {int(old): idx for idx, old in enumerate(used)} + remapped_faces = np.vectorize(remap.__getitem__, otypes=[np.int32])(faces) + return vertices[used].astype(np.float32), remapped_faces.astype(np.int32) + + +def _write_visual_obj(obj_file: Path, vertices: np.ndarray, faces: np.ndarray) -> None: + """Write a *renderable* OBJ -- closed under all viewing angles. + + UE's static-mesh exporter culls hidden faces (a floor slab ships + with only top + bottom face pairs, no sides), so writing the + artist's geometry verbatim produces meshes that appear see-through + in MuJoCo's viewer from any oblique angle. We check + ``trimesh.is_watertight`` and, if not, substitute the prim's + convex hull (which is always closed). + + For non-prismatic prims (chairs, plants) the hull is a coarse + visual approximation; for the most common offenders (floor / roof + / wall / ceiling slabs that are box-shaped to begin with) the hull + matches the original exactly. Watertight prims (full furniture + meshes from UE) keep their original geometry. + """ + import trimesh # type: ignore[import-untyped] + + tm = trimesh.Trimesh( + vertices=np.asarray(vertices, dtype=np.float64), + faces=np.asarray(faces, dtype=np.int32), + process=False, + ) + if not tm.is_watertight: + try: + hull = tm.convex_hull + if len(hull.vertices) >= 4 and len(hull.faces) >= 4: + vertices = np.asarray(hull.vertices, dtype=np.float64) + faces = np.asarray(hull.faces, dtype=np.int32) + except Exception: + pass # fall back to original; visual may look hollow + _write_mesh_obj(obj_file, vertices, faces) + + +def _write_mesh_obj(obj_file: Path, vertices: np.ndarray, faces: np.ndarray) -> None: + o3d_mesh = o3d.geometry.TriangleMesh() + o3d_mesh.vertices = o3d.utility.Vector3dVector(vertices.astype(np.float64)) + o3d_mesh.triangles = o3d.utility.Vector3iVector(faces) + o3d_mesh.compute_vertex_normals() + if not o3d.io.write_triangle_mesh( + str(obj_file), + o3d_mesh, + write_vertex_normals=True, + write_vertex_colors=False, + ): + raise RuntimeError(f"open3d failed to write OBJ: {obj_file}") + + +# --------------------------------------------------------------------------- # +# Wrapper writer + CLI # +# --------------------------------------------------------------------------- # + + +def _write_wrapper( + *, + wrapper_path: Path, + cache_key: str, + asset_lines: list[str], + geom_lines: list[str], + statistic_center: np.ndarray, + statistic_extent: float, +) -> None: + """Emit the scene-only wrapper.xml. Robots attach at runtime via + ``MjSpec``; the wrapper directory holds only this file plus the + cooked scene OBJs that it references with relative paths. + """ + visual_zfar = max(float(statistic_extent) * 20.0, 10000.0) + wrapper_xml = _WRAPPER_TEMPLATE.format( + model_name=f"scene_{cache_key}", + statistic_center=_fmt_vec(statistic_center), + statistic_extent=f"{float(statistic_extent):.9g}", + visual_zfar=f"{visual_zfar:.9g}", + asset_meshes="\n".join(asset_lines), + scene_geoms="\n".join(geom_lines), + ) + wrapper_path.write_text(wrapper_xml) + logger.info(f"_write_wrapper: wrote {wrapper_path}") + + +def cli_main() -> None: + """``python -m dimos.simulation.mujoco.scene_mesh_to_mjcf [opts]``. + + Bake (or load from cache), optionally launch the MuJoCo viewer. + """ + p = argparse.ArgumentParser( + prog="python -m dimos.simulation.mujoco.scene_mesh_to_mjcf", + description="Bake a USD/GLB/OBJ scene into a robot-agnostic scene-only MJCF wrapper.", + ) + p.add_argument("scene", type=Path, help="scene mesh path (.usda, .usdz, .glb, ...)") + p.add_argument( + "--scale", + type=float, + default=1.0, + help="multiplicative scale (use 0.01 for UE / centimeter sources). Default 1.0.", + ) + p.add_argument( + "--no-y-up", + action="store_true", + help="source is already Z-up (UE exports with metersPerUnit=0.01 and " + "upAxis=Z). Default assumes Y-up source (Blender, glTF, Apple USDZ).", + ) + p.add_argument( + "--collision-spec", + type=Path, + default=None, + help="path to a collision-spec sidecar JSON. Default auto-discovers " + "``.collision.json`` next to the source.", + ) + p.add_argument( + "--visual", + action="store_true", + help="emit visual passthrough meshes (group 2). Off by default -- " + "saves disk and render cost, but the MuJoCo viewer only shows " + "collision shapes without it.", + ) + p.add_argument( + "--rebake", + action="store_true", + help="ignore cached wrapper.xml and OBJs; do a full re-bake.", + ) + p.add_argument( + "--view", + action="store_true", + help="launch the MuJoCo native viewer after baking (blocks, scene only — no robot).", + ) + args = p.parse_args() + + try: + scene_path = _resolve_existing_file(args.scene, "scene mesh") + except (FileNotFoundError, ValueError) as exc: + p.error(str(exc)) + + align = SceneMeshAlignment(scale=args.scale, y_up=not args.no_y_up) + spec = ( + CollisionSpec.from_json(args.collision_spec) + if args.collision_spec is not None + else CollisionSpec.auto_discover(scene_path) + ) + + wrapper = bake_scene_mjcf( + scene_mesh_path=scene_path, + alignment=align, + collision_spec=spec, + include_visual_mesh=args.visual, + rebake=args.rebake, + ) + print(f"wrapper: {wrapper}") + + if args.view: + import mujoco # type: ignore[import-untyped] + import mujoco.viewer # type: ignore[import-untyped] + + viewer: Any = mujoco.viewer + model = mujoco.MjModel.from_xml_path(str(wrapper)) + print(f"loaded: {model.nbody} bodies, {model.ngeom} geoms, {model.nmesh} meshes") + print("\nlaunching MuJoCo viewer (scene only — no robot attached)") + viewer.launch(model) + + +if __name__ == "__main__": + cli_main() + + +__all__ = ["bake_scene_mjcf", "load_or_bake"] diff --git a/dimos/simulation/mujoco/test_collision_spec.py b/dimos/simulation/mujoco/test_collision_spec.py new file mode 100644 index 0000000000..670f8a9037 --- /dev/null +++ b/dimos/simulation/mujoco/test_collision_spec.py @@ -0,0 +1,65 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from dimos.simulation.mujoco.collision_spec import CollisionSpec, decide_for_prim + + +def _flat_square_floor() -> tuple[np.ndarray, np.ndarray]: + vertices = np.asarray( + [ + [-1.0, -1.0, 0.0], + [1.0, -1.0, 0.0], + [1.0, 1.0, 0.0], + [-1.0, 1.0, 0.0], + ], + dtype=np.float64, + ) + triangles = np.asarray([[0, 1, 2], [0, 2, 3]], dtype=np.int32) + return vertices, triangles + + +def test_box_override_min_thickness_preserves_floor_top() -> None: + vertices, triangles = _flat_square_floor() + spec = CollisionSpec( + prim_overrides={ + "Floor*": { + "type": "box", + "min_thickness": 0.04, + "preserve": "top", + } + } + ) + + decision = decide_for_prim(vertices, triangles, "Floor_Plane.002", spec) + + assert decision.mode == "primitive" + assert decision.reason == "sidecar:box" + assert decision.primitive is not None + assert decision.primitive["size"] == pytest.approx((1.0, 1.0, 0.02)) + assert decision.primitive["pos"] == pytest.approx((0.0, 0.0, -0.02)) + + +def test_box_override_without_min_thickness_keeps_default_box_fit() -> None: + vertices, triangles = _flat_square_floor() + spec = CollisionSpec(prim_overrides={"Floor*": {"type": "box"}}) + + decision = decide_for_prim(vertices, triangles, "Floor_Plane.002", spec) + + assert decision.mode == "primitive" + assert decision.primitive is not None + assert min(decision.primitive["size"]) == pytest.approx(0.001) + assert decision.primitive["pos"][2] == pytest.approx(0.0) diff --git a/dimos/simulation/scene_assets/mesh_scene.py b/dimos/simulation/scene_assets/mesh_scene.py new file mode 100644 index 0000000000..ba16f7f396 --- /dev/null +++ b/dimos/simulation/scene_assets/mesh_scene.py @@ -0,0 +1,710 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Load a 3D scene asset into DimOS world-frame geometry. + +Supports: + * ``.glb`` / ``.gltf`` / ``.obj`` / ``.ply`` / ``.stl`` — via Open3D's + ``read_triangle_mesh``. + * ``.usdz`` / ``.usd`` / ``.usdc`` — via ``pxr.Usd`` (install ``usd-core``). + +Returned meshes are in DimOS world frame, with optional scale, +Y-up-to-Z-up rotation, Euler rotation, and translation applied. + +This loader is intentionally physics/viewer agnostic. MuJoCo collision +baking, browser collision baking, ray-casting, and asset inspection all +share the same source transform instead of each subsystem guessing its own +coordinate convention. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +import re +from typing import Any + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] + +from dimos.simulation.scene_assets.spec import SceneMeshAlignment + +_TRIMESH_DUPLICATE_SUFFIX_RE = re.compile(r"_[0-9a-f]{6}$", re.IGNORECASE) + + +def _world_rotation(alignment: SceneMeshAlignment) -> np.ndarray: + """Compose the y-up swap + ZYX Euler into one 3x3.""" + rad = np.radians(alignment.rotation_zyx_deg) + cz, sz = np.cos(rad[0]), np.sin(rad[0]) + cy, sy = np.cos(rad[1]), np.sin(rad[1]) + cx, sx = np.cos(rad[2]), np.sin(rad[2]) + rz = np.array([[cz, -sz, 0], [sz, cz, 0], [0, 0, 1]], dtype=np.float64) + ry = np.array([[cy, 0, sy], [0, 1, 0], [-sy, 0, cy]], dtype=np.float64) + rx = np.array([[1, 0, 0], [0, cx, -sx], [0, sx, cx]], dtype=np.float64) + rzyx = rz @ ry @ rx + if alignment.y_up: + y_to_z = np.array( + [[1, 0, 0], [0, 0, -1], [0, 1, 0]], + dtype=np.float64, + ) + return rzyx @ y_to_z + return rzyx + + +def _average_per_face_vertex( + per_fv: np.ndarray, face_verts: np.ndarray, n_verts: int +) -> np.ndarray: + """Scatter-average ``(n_face_verts, 3)`` values onto ``(n_verts, 3)`` indices.""" + out = np.zeros((n_verts, 3), dtype=np.float32) + counts = np.zeros(n_verts, dtype=np.int32) + np.add.at(out, face_verts, per_fv) + np.add.at(counts, face_verts, 1) + counts = np.maximum(counts, 1)[:, None] + return out / counts + + +def _color_from_displaycolor( + mesh: Any, + n_verts: int, + face_counts: np.ndarray, + face_verts: np.ndarray, +) -> np.ndarray | None: + """Per-vertex RGB from ``primvars:displayColor`` if present and valued. + + Handles the four standard interpolations: ``constant`` / ``vertex`` / + ``uniform`` / ``faceVarying``. Returns ``None`` when the primvar + isn't authored with a value (Sketchfab USDZ exports typically declare + the primvar but leave it empty — colors live on the bound material). + """ + from pxr import UsdGeom # type: ignore[import-not-found, import-untyped] + + pv = UsdGeom.PrimvarsAPI(mesh.GetPrim()).GetPrimvar("displayColor") + if not pv or not pv.HasValue(): + return None + raw = pv.Get() + if raw is None: + return None + colors = np.asarray(raw, dtype=np.float32) + if colors.ndim != 2 or colors.shape[1] != 3 or colors.size == 0: + return None + interp = pv.GetInterpolation() + + if interp == UsdGeom.Tokens.constant: + return np.tile(colors[0:1], (n_verts, 1)) + + if interp == UsdGeom.Tokens.vertex and len(colors) == n_verts: + return colors + + if interp == UsdGeom.Tokens.uniform and len(colors) == len(face_counts): + per_fv = np.repeat(colors, face_counts, axis=0) + return _average_per_face_vertex(per_fv, face_verts, n_verts) + + if interp == UsdGeom.Tokens.faceVarying and len(colors) == len(face_verts): + return _average_per_face_vertex(colors, face_verts, n_verts) + + return None + + +def _color_from_material( + prim: Any, material_color_cache: dict[str, np.ndarray | None] +) -> np.ndarray | None: + """Per-prim RGB from the bound material's ``inputs:diffuseColor``. + + Walks ``UsdShadeMaterialBindingAPI`` → surface shader → ``inputs:diffuseColor``, + handling ``UsdPreviewSurface`` (the format Sketchfab USDZ uses). Texture + inputs aren't sampled — if ``diffuseColor`` is connected to a ``UsdUVTexture`` + rather than authored as a literal, this returns ``None`` and the caller + falls back to the next strategy. + + Results are cached per material path so we don't re-walk the shader graph + for every prim that shares a material. + """ + from pxr import UsdShade # type: ignore[import-not-found, import-untyped] + + mat_api = UsdShade.MaterialBindingAPI(prim) + bound = mat_api.ComputeBoundMaterial()[0] + if not bound: + return None + mat_path = str(bound.GetPath()) + if mat_path in material_color_cache: + return material_color_cache[mat_path] + + color = _resolve_diffuse_color(bound) + material_color_cache[mat_path] = color + return color + + +def _resolve_diffuse_color(material: Any) -> np.ndarray | None: + """Pull a literal ``diffuseColor`` out of a UsdShade material's surface shader.""" + from pxr import UsdShade # type: ignore[import-not-found, import-untyped] + + surface = material.ComputeSurfaceSource("")[0] + if not surface: + return None + diffuse_input = surface.GetInput("diffuseColor") + if not diffuse_input: + return None + # If the input is connected (texture-driven), bail — we don't sample images. + if diffuse_input.HasConnectedSource(): + connected = diffuse_input.GetConnectedSource()[0] + if connected: + shader = UsdShade.Shader(connected.GetPrim()) + if shader and shader.GetIdAttr().Get() == "UsdUVTexture": + return None + val = diffuse_input.Get() + if val is None: + return None + arr = np.asarray(val, dtype=np.float32).reshape(-1) + if arr.size != 3: + return None + return arr # (3,) RGB in [0, 1] + + +def _load_usd_mesh(path: Path) -> o3d.geometry.TriangleMesh: + """Walk every Mesh prim in a USD stage and concatenate to one o3d mesh. + + Also extracts per-vertex colors from ``primvars:displayColor`` when + present so downstream consumers can render textured-looking Sketchfab + exports without having to chase materials/textures. + """ + try: + from pxr import Usd, UsdGeom # type: ignore[import-not-found, import-untyped] + except ImportError as e: + raise ImportError("loading .usdz/.usd requires usd-core: `uv pip install usd-core`") from e + + stage = Usd.Stage.Open(str(path)) + if stage is None: + raise RuntimeError(f"could not open USD stage: {path}") + + all_pts: list[np.ndarray] = [] + all_tris: list[np.ndarray] = [] + all_colors: list[np.ndarray] = [] + any_color = False + vtx_offset = 0 + material_color_cache: dict[str, np.ndarray | None] = {} + + for prim in stage.Traverse(): + if not prim.IsA(UsdGeom.Mesh): + continue + mesh = UsdGeom.Mesh(prim) + pts_attr = mesh.GetPointsAttr().Get() + if pts_attr is None or len(pts_attr) == 0: + continue + pts = np.asarray(pts_attr, dtype=np.float32) + face_verts = np.asarray(mesh.GetFaceVertexIndicesAttr().Get(), dtype=np.int32) + face_counts = np.asarray(mesh.GetFaceVertexCountsAttr().Get(), dtype=np.int32) + + # Bake the prim's local-to-world transform into the points so the + # composite scene comes out in stage-root coordinates. + xform = UsdGeom.Xformable(prim).ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + m = np.asarray(xform, dtype=np.float64).T # USD matrices are row-major + pts_h = np.hstack([pts, np.ones((len(pts), 1), dtype=np.float32)]) + pts_world = (m @ pts_h.T).T[:, :3].astype(np.float32) + + # Per-prim color resolution. Try in order: + # 1. ``primvars:displayColor`` (vertex / faceVarying / uniform / constant) + # 2. Bound material's ``inputs:diffuseColor`` (UsdPreviewSurface — what + # Sketchfab USDZ uses, with one constant color per material). + # 3. Neutral grey fallback. + prim_colors = _color_from_displaycolor(mesh, len(pts), face_counts, face_verts) + if prim_colors is None: + mat_color = _color_from_material(prim, material_color_cache) + if mat_color is not None: + prim_colors = np.tile(mat_color[None, :], (len(pts), 1)) + if prim_colors is not None: + any_color = True + else: + prim_colors = np.full((len(pts), 3), 0.7, dtype=np.float32) + + # USD allows quads / n-gons; fan-triangulate so o3d gets pure tris. + tris: list[tuple[int, int, int]] = [] + cursor = 0 + for n in face_counts: + for k in range(1, n - 1): + tris.append( + ( + int(face_verts[cursor]) + vtx_offset, + int(face_verts[cursor + k]) + vtx_offset, + int(face_verts[cursor + k + 1]) + vtx_offset, + ) + ) + cursor += n + + if not tris: + continue + all_pts.append(pts_world) + all_tris.append(np.asarray(tris, dtype=np.int32)) + all_colors.append(prim_colors) + vtx_offset += len(pts_world) + + if not all_pts: + raise RuntimeError(f"no Mesh prims with triangles found in {path}") + + pts = np.concatenate(all_pts, axis=0).astype(np.float64) + tris = np.concatenate(all_tris, axis=0) + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(pts) + mesh.triangles = o3d.utility.Vector3iVector(tris) + if any_color: + colors = np.concatenate(all_colors, axis=0).astype(np.float64) + mesh.vertex_colors = o3d.utility.Vector3dVector(np.clip(colors, 0.0, 1.0)) + return mesh + + +def load_scene_mesh( + path: str | Path, + alignment: SceneMeshAlignment | None = None, +) -> o3d.geometry.TriangleMesh: + """Load a scene mesh from disk and apply alignment to put it in dimos world frame. + + Args: + path: file path. Supported extensions: ``.usdz``, ``.usd``, ``.usdc``, + ``.glb``, ``.gltf``, ``.obj``, ``.ply``, ``.stl``. + alignment: scale / rotation / translation to apply. + + Returns: + an ``open3d.geometry.TriangleMesh`` in dimos world frame with vertex + normals computed. + """ + path = Path(path) + if not path.exists(): + raise FileNotFoundError(f"scene mesh not found: {path}") + suffix = path.suffix.lower() + if suffix in {".usdz", ".usd", ".usdc", ".usda"}: + mesh = _load_usd_mesh(path) + elif suffix in {".glb", ".gltf"}: + # GEOMETRY-ONLY GLB load. Used by floor-z probing and ray-casting; + # it does not need PBR materials. ``trimesh.load(path, force="mesh")`` + # would flatten the scene by decompressing every embedded texture and + # sampling per-vertex colors. For a scene with hundreds of 4K PBR + # textures, that allocates ~10 GB transiently and OOMs 32 GB boxes. + # We open in Scene mode (no flattening, no texture decode), walk the + # instance graph applying each instance's world transform, and emit a + # single concatenated mesh — peak stays under ~1 GB. + import trimesh + + scene_or_mesh: Any = trimesh.load(str(path)) + if isinstance(scene_or_mesh, trimesh.Trimesh): + verts_world = np.asarray(scene_or_mesh.vertices, dtype=np.float64) + faces_world = np.asarray(scene_or_mesh.faces, dtype=np.int64) + else: + scene = scene_or_mesh + verts_chunks: list[np.ndarray] = [] + faces_chunks: list[np.ndarray] = [] + v_off = 0 + for node_name in scene.graph.nodes_geometry: + xform, geom_name = scene.graph[node_name] + geom = scene.geometry.get(geom_name) + if geom is None or not isinstance(geom, trimesh.Trimesh) or len(geom.faces) == 0: + continue + v_local = np.asarray(geom.vertices, dtype=np.float64) + f_local = np.asarray(geom.faces, dtype=np.int64) + m = np.asarray(xform, dtype=np.float64) + v_h = np.hstack([v_local, np.ones((len(v_local), 1), dtype=np.float64)]) + v_world = (m @ v_h.T).T[:, :3] + verts_chunks.append(v_world) + faces_chunks.append(f_local + v_off) + v_off += len(v_local) + if not verts_chunks: + raise RuntimeError(f"glTF loaded but no Trimesh instances found: {path}") + verts_world = np.concatenate(verts_chunks, axis=0) + faces_world = np.concatenate(faces_chunks, axis=0) + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(verts_world) + mesh.triangles = o3d.utility.Vector3iVector(faces_world.astype(np.int32)) + else: + mesh = o3d.io.read_triangle_mesh(str(path)) + if len(mesh.triangles) == 0: + raise RuntimeError(f"o3d.io.read_triangle_mesh returned an empty mesh for {path}") + + align = alignment or SceneMeshAlignment() + if align.scale != 1.0: + mesh.scale(align.scale, center=np.zeros(3)) + rot = _world_rotation(align) + if not np.allclose(rot, np.eye(3)): + mesh.rotate(rot, center=np.zeros(3)) + if any(align.translation): + mesh.translate(np.asarray(align.translation, dtype=np.float64)) + + mesh.compute_vertex_normals() + return mesh + + +def floor_z_under_origin( + scene_mesh_path: str | Path, + alignment: SceneMeshAlignment | None = None, +) -> float: + """Return the first scene surface under world ``x=0, y=0``. + + Falls back to the mesh bbox minimum when the origin ray misses. + """ + import open3d.core as o3c # type: ignore[import-untyped] + + mesh = load_scene_mesh(scene_mesh_path, alignment=alignment) + scene = make_raycasting_scene(mesh) + rays = o3c.Tensor( + np.array([[0.0, 0.0, 1000.0, 0.0, 0.0, -1.0]], dtype=np.float32), + dtype=o3c.Dtype.Float32, + ) + t_hit = float(scene.cast_rays(rays)["t_hit"].numpy()[0]) + if np.isfinite(t_hit): + return 1000.0 - t_hit + bbox = mesh.get_axis_aligned_bounding_box() + return float(bbox.min_bound[2]) + + +def make_raycasting_scene( + mesh: o3d.geometry.TriangleMesh, +) -> o3d.t.geometry.RaycastingScene: + """Wrap a TriangleMesh into Open3D's BVH-backed ray-casting scene.""" + scene = o3d.t.geometry.RaycastingScene() + scene.add_triangles(o3d.t.geometry.TriangleMesh.from_legacy(mesh)) + return scene + + +@dataclass +class ScenePrimMesh: + """One USD ``Mesh`` prim's geometry, ready to write to OBJ. + + Used by ``load_scene_prims`` to keep prims separate so MuJoCo can + treat each as its own (approximately convex) collision shape. When + the loader handles a non-USD format the input is returned as a + single-element list with the whole mesh in it. + """ + + name: str + """Sanitized identifier (safe for MJCF asset names) — typically the + USD prim path with non-alphanumerics replaced.""" + + vertices: np.ndarray + """``(N, 3)`` float32, in world frame after alignment.""" + + triangles: np.ndarray + """``(M, 3)`` int32 vertex indices.""" + + prim_path: str | None = None + """Original scene-graph path when the source format provides one.""" + + visual_node_name: str | None = None + """Stable source node name used by visual extraction when available.""" + + +def split_disconnected_scene_prims( + prims: list[ScenePrimMesh], + *, + min_components: int, + extent_ratio: float, + prim_min_extent: float, + axis_ratio: float, + min_component_extent: float, + min_component_faces: int, + can_split: Callable[[ScenePrimMesh], bool] | None = None, +) -> tuple[list[ScenePrimMesh], dict[str, int]]: + """Split scene-graph nodes that are disconnected prop clusters. + + Some game exports group many small disconnected objects under one node + (for example leaves, cups, bottles). Primitive fitting sees only the + combined bounds and can turn the group into one scene-scale slab. This + helper keeps normal connected props intact, but splits suspicious wide + clusters so tiny decorative islands can be dropped and larger islands can + be fitted independently. + """ + import trimesh + + result: list[ScenePrimMesh] = [] + stats = { + "source_prims": len(prims), + "split_prims": 0, + "emitted_components": 0, + "dropped_components": 0, + } + + for prim in prims: + if can_split is not None and not can_split(prim): + result.append(prim) + continue + if len(prim.triangles) < max(min_component_faces * 2, 1): + result.append(prim) + continue + prim_extent = np.ptp(prim.vertices, axis=0) + if float(prim_extent.max()) < prim_min_extent: + result.append(prim) + continue + positive_extent = prim_extent[prim_extent > 1e-6] + if ( + len(positive_extent) < 3 + or float(positive_extent.max() / positive_extent.min()) < axis_ratio + ): + result.append(prim) + continue + + mesh = trimesh.Trimesh(vertices=prim.vertices, faces=prim.triangles, process=False) + parts = mesh.split(only_watertight=False) + if len(parts) < min_components: + result.append(prim) + continue + + component_extents = np.array( + [np.ptp(np.asarray(part.vertices), axis=0).max() for part in parts], + dtype=np.float64, + ) + median_component_extent = float(np.median(component_extents)) + if median_component_extent <= 0.0: + result.append(prim) + continue + if float(prim_extent.max()) / median_component_extent < extent_ratio: + result.append(prim) + continue + + emitted = 0 + dropped = 0 + for index, part in enumerate(parts): + vertices = np.asarray(part.vertices, dtype=np.float32) + triangles = np.asarray(part.faces, dtype=np.int32) + component_extent = float(np.ptp(vertices, axis=0).max()) if len(vertices) else 0.0 + if len(triangles) < min_component_faces or component_extent < min_component_extent: + dropped += 1 + continue + result.append( + ScenePrimMesh( + name=f"{prim.name}_part{index:04d}", + vertices=vertices, + triangles=triangles, + prim_path=( + f"{prim.prim_path}/component_{index:04d}" + if prim.prim_path is not None + else f"{prim.name}/component_{index:04d}" + ), + ) + ) + emitted += 1 + + stats["split_prims"] += 1 + stats["emitted_components"] += emitted + stats["dropped_components"] += dropped + if emitted == 0: + continue + + return result, stats + + +def _load_glb_prims(path: Path, alignment: SceneMeshAlignment) -> list[ScenePrimMesh]: + """Enumerate per-instance prims from a glTF/GLB. + + ``trimesh.load(file.glb)`` returns a ``Scene`` whose ``graph`` records + the world transform for every geometry instance. Iterating + ``graph.nodes_geometry`` is the trimesh equivalent of USD's + ``stage.Traverse()`` — it yields one entry per instance, even when + multiple instances share the same underlying mesh (typical for chairs, + cabinets, etc.). Without this enumeration, ``trimesh.load(... force="mesh")`` + collapses the whole scene to one mesh and CoACD produces a single coarse + decomposition, which is essentially useless for collision against + multi-object scenes. + """ + import trimesh + + loaded: Any = trimesh.load(str(path)) + R = _world_rotation(alignment) + T = np.asarray(alignment.translation, dtype=np.float64) + s = float(alignment.scale) + + if isinstance(loaded, trimesh.Trimesh): + # Single-mesh GLB (no scene graph). Treat as one prim. + pts = np.asarray(loaded.vertices, dtype=np.float64) + faces = np.asarray(loaded.faces, dtype=np.int32) + if len(faces) == 0: + return [] + pts_world = (R @ (s * pts).T).T + T + return [ + ScenePrimMesh( + name="scene", + vertices=pts_world.astype(np.float32), + triangles=faces, + prim_path="scene", + ) + ] + + scene = loaded + prims: list[ScenePrimMesh] = [] + name_counts: dict[str, int] = {} + prim_path_counts: dict[str, int] = {} + for node_name in scene.graph.nodes_geometry: + xform, geom_name = scene.graph[node_name] + geom = scene.geometry.get(geom_name) + if geom is None or not isinstance(geom, trimesh.Trimesh): + continue + if len(geom.faces) == 0: + continue + + pts_local = np.asarray(geom.vertices, dtype=np.float64) + faces = np.asarray(geom.faces, dtype=np.int32) + + # Local → scene-root via the instance transform. + m = np.asarray(xform, dtype=np.float64) + pts_h = np.hstack([pts_local, np.ones((len(pts_local), 1), dtype=np.float64)]) + pts_stage = (m @ pts_h.T).T[:, :3] + + # Scene-root → dimos world via SceneMeshAlignment. + pts_world = (R @ (s * pts_stage).T).T + T + + stable_node = _stable_trimesh_node_name(str(node_name)) + stable_prim_path = _unique_stable_name( + f"{stable_node}_{geom_name}", + prim_path_counts, + ) + clean = _unique_stable_name(_sanitize_scene_name(stable_prim_path), name_counts) + prims.append( + ScenePrimMesh( + name=clean, + vertices=pts_world.astype(np.float32), + triangles=faces, + prim_path=stable_prim_path, + visual_node_name=stable_node, + ) + ) + return sorted(prims, key=lambda prim: prim.prim_path or prim.name) + + +def _stable_trimesh_node_name(node_name: str) -> str: + """Drop random duplicate suffixes that trimesh adds to glTF nodes.""" + return _TRIMESH_DUPLICATE_SUFFIX_RE.sub("", node_name) + + +def _sanitize_scene_name(raw: str) -> str: + return "".join(c if c.isalnum() else "_" for c in raw) + + +def _unique_stable_name(raw: str, counts: dict[str, int]) -> str: + count = counts.get(raw, 0) + counts[raw] = count + 1 + if count == 0: + return raw + return f"{raw}__{count:03d}" + + +def load_scene_prims( + path: str | Path, + alignment: SceneMeshAlignment | None = None, +) -> list[ScenePrimMesh]: + """Load a USD/USDZ scene as one ``ScenePrimMesh`` per Mesh prim. + + Per-prim splitting is what MuJoCo wants for non-trivial scenes: + each prim's convex hull approximates the prim well, while the + convex hull of the *whole* scene is its bounding box. Falls back + to a single ScenePrimMesh for non-USD inputs (a single ``.obj`` or + ``.glb`` doesn't carry per-part semantics in our loader). + + Same alignment rules as ``load_scene_mesh``. + """ + path = Path(path) + align = alignment or SceneMeshAlignment() + suffix = path.suffix.lower() + + if suffix in {".glb", ".gltf"}: + return _load_glb_prims(path, align) + + if suffix not in {".usdz", ".usd", ".usdc", ".usda"}: + # Non-USD, non-glTF (e.g. .obj/.ply/.stl): one part, whole mesh. + whole = load_scene_mesh(path, alignment=align) + return [ + ScenePrimMesh( + name="scene", + vertices=np.asarray(whole.vertices, dtype=np.float32), + triangles=np.asarray(whole.triangles, dtype=np.int32), + prim_path="scene", + ) + ] + + try: + from pxr import Usd, UsdGeom # type: ignore[import-not-found, import-untyped] + except ImportError as e: + raise ImportError("loading .usdz/.usd requires usd-core: `uv pip install usd-core`") from e + + stage = Usd.Stage.Open(str(path)) + if stage is None: + raise RuntimeError(f"could not open USD stage: {path}") + + R = _world_rotation(align) + T = np.asarray(align.translation, dtype=np.float64) + s = float(align.scale) + + prims: list[ScenePrimMesh] = [] + for prim in stage.Traverse(): + if not prim.IsA(UsdGeom.Mesh): + continue + usd_mesh = UsdGeom.Mesh(prim) + pts_attr = usd_mesh.GetPointsAttr().Get() + if pts_attr is None or len(pts_attr) == 0: + continue + pts = np.asarray(pts_attr, dtype=np.float64) + face_verts = np.asarray(usd_mesh.GetFaceVertexIndicesAttr().Get(), dtype=np.int32) + face_counts = np.asarray(usd_mesh.GetFaceVertexCountsAttr().Get(), dtype=np.int32) + + # Local → stage-root via the USD prim's accumulated transform. + xform = UsdGeom.Xformable(prim).ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + m = np.asarray(xform, dtype=np.float64).T + pts_h = np.hstack([pts, np.ones((len(pts), 1), dtype=np.float64)]) + pts_stage = (m @ pts_h.T).T[:, :3] + + # Stage-root → dimos world via SceneMeshAlignment (scale → rot → trans). + pts_world = (R @ (s * pts_stage).T).T + T + + # Triangulate any quads / n-gons (vertex indices are local to this prim now). + tris: list[tuple[int, int, int]] = [] + cursor = 0 + for n in face_counts: + for k in range(1, n - 1): + tris.append( + ( + int(face_verts[cursor]), + int(face_verts[cursor + k]), + int(face_verts[cursor + k + 1]), + ) + ) + cursor += n + if not tris: + continue + + # MJCF asset names: strip the leading slash, swap remaining + # path separators / dots for underscores. USD prim paths can + # collide on the same leaf; suffix the index so each is unique. + raw = str(prim.GetPath()).lstrip("/") + clean = "".join(c if c.isalnum() else "_" for c in raw) + prim_path = str(prim.GetPath()) + prims.append( + ScenePrimMesh( + name=f"{clean}__{len(prims)}", + vertices=pts_world.astype(np.float32), + triangles=np.asarray(tris, dtype=np.int32), + prim_path=prim_path, + ) + ) + + if not prims: + raise RuntimeError(f"no Mesh prims with triangles found in {path}") + return prims + + +__all__ = [ + "SceneMeshAlignment", + "ScenePrimMesh", + "floor_z_under_origin", + "load_scene_mesh", + "load_scene_prims", + "make_raycasting_scene", + "split_disconnected_scene_prims", +] diff --git a/pyproject.toml b/pyproject.toml index e4f03e5406..86be7568f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -313,8 +313,18 @@ apriltag = [ "reportlab>=4.5.0", ] +# Offline scene-package cooking + loading: mesh/USD import (open3d, usd-core), +# convex decomposition for collision (coacd), and mesh ops (trimesh). The +# Blender visual bake is an optional external `blender` binary, not a wheel. +scene = [ + "open3d>=0.18.0", + "trimesh>=4.0.0", + "coacd>=1.0.0", + "usd-core>=23.11", +] + all = [ - "dimos[agents,apriltag,base,cpu,cuda,drone,manipulation,misc,perception,sim,unitree,visualization,web]", + "dimos[agents,apriltag,base,cpu,cuda,drone,manipulation,misc,perception,scene,sim,unitree,visualization,web]", ] [dependency-groups] @@ -548,6 +558,13 @@ module = [ "websocket", "xarm.*", "ament_index_python.*", + "coacd", + "open3d", + "open3d.*", + "pxr", + "pxr.*", + "trimesh", + "trimesh.*", ] ignore_missing_imports = true diff --git a/uv.lock b/uv.lock index a22bbafc62..10f8a26e12 100644 --- a/uv.lock +++ b/uv.lock @@ -1256,6 +1256,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/f3/4da9d5c5308ef2019ab65a8a9f519ac95004446902d01e859f9ac6b8cdd6/cmeel_zlib-1.3.1-0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1e36ac8dccca22ff1f6e4df428ae5597f6288d9e6f85b08c9b767dc63e90fb55", size = 285662, upload-time = "2025-02-11T12:20:37.298Z" }, ] +[[package]] +name = "coacd" +version = "1.0.11" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/3b/434beab9e1754ca0ae3a619b383243dd85aa65d03a4dc7333c8296c97a92/coacd-1.0.11-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:adbc58259a721cc5ede24cc8c2671a95e75a8a52dc1ee4d953d80d236b192da9", size = 3299264, upload-time = "2026-05-04T19:23:36.183Z" }, + { url = "https://files.pythonhosted.org/packages/cb/a7/06f63baa29198f681ba60848e3271cc625eddd61cd4e59071f56ffce9362/coacd-1.0.11-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e376fadb22790444c7253f0cee9104a1af01ec965488c1318e84e3b2dbf1e2a3", size = 2573832, upload-time = "2026-05-04T19:23:38.008Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b4/057c78f7b16b87871cfcecc6febe70522e77a2a33f8788960377152c224d/coacd-1.0.11-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60f700a52e5b40462e508c14bb756cd63ce7e6a95ff72ae0b1592be1dbb0106", size = 2640170, upload-time = "2026-05-04T19:23:39.322Z" }, + { url = "https://files.pythonhosted.org/packages/00/83/c50472ce98175fddd86d4aba861fea89f30350a5487880b3a81d34915a85/coacd-1.0.11-cp39-abi3-win_amd64.whl", hash = "sha256:4de22f70d1a3fa8c44698c8006a223fe5fb0ee84b76adecf3726cf2003e9145f", size = 1465079, upload-time = "2026-05-04T19:23:41.604Z" }, +] + [[package]] name = "coal" version = "3.0.2" @@ -2015,6 +2030,7 @@ agents = [ all = [ { name = "a750-control", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "chromadb" }, + { name = "coacd" }, { name = "cupy-cuda12x", marker = "platform_machine == 'x86_64'" }, { name = "dimos-viewer" }, { name = "drake", version = "1.45.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'aarch64' and sys_platform == 'darwin'" }, @@ -2044,6 +2060,7 @@ all = [ { name = "onnxruntime" }, { name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64'" }, { name = "open-clip-torch" }, + { name = "open3d" }, { name = "openai" }, { name = "opencv-contrib-python" }, { name = "pillow" }, @@ -2070,6 +2087,7 @@ all = [ { name = "trimesh" }, { name = "ultralytics" }, { name = "unitree-webrtc-connect" }, + { name = "usd-core" }, { name = "uvicorn" }, { name = "viser", extra = ["urdf"] }, { name = "xacro" }, @@ -2163,6 +2181,12 @@ perception = [ { name = "transformers", extra = ["torch"] }, { name = "ultralytics" }, ] +scene = [ + { name = "coacd" }, + { name = "open3d" }, + { name = "trimesh" }, + { name = "usd-core" }, +] sim = [ { name = "mujoco" }, { name = "playground" }, @@ -2405,12 +2429,13 @@ requires-dist = [ { name = "a750-control", marker = "platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'manipulation'" }, { name = "annotation-protocol", specifier = ">=1.4.0" }, { name = "bleak", specifier = ">=3.0.2" }, + { name = "coacd", marker = "extra == 'scene'", specifier = ">=1.0.0" }, { name = "chromadb", marker = "extra == 'perception'", specifier = ">=1.0.0" }, { name = "cryptography", specifier = ">=46.0.5" }, { name = "cupy-cuda12x", marker = "platform_machine == 'x86_64' and extra == 'cuda'", specifier = "==13.6.0" }, { name = "cyclonedds", marker = "extra == 'dds'", specifier = ">=0.10.5" }, { name = "cyclonedds", marker = "extra == 'unitree-dds'", specifier = ">=0.10.5" }, - { name = "dimos", extras = ["agents", "apriltag", "base", "cpu", "cuda", "drone", "manipulation", "misc", "perception", "sim", "unitree", "visualization", "web"], marker = "extra == 'all'" }, + { name = "dimos", extras = ["agents", "apriltag", "base", "cpu", "cuda", "drone", "manipulation", "misc", "perception", "scene", "sim", "unitree", "visualization", "web"], marker = "extra == 'all'" }, { name = "dimos", extras = ["agents", "web", "perception", "visualization"], marker = "extra == 'base'" }, { name = "dimos", extras = ["base", "mapping"], marker = "extra == 'unitree'" }, { name = "dimos", extras = ["unitree"], marker = "extra == 'unitree-dds'" }, @@ -2450,6 +2475,7 @@ requires-dist = [ { name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64' and extra == 'cuda'", specifier = ">=1.17.1" }, { name = "open-clip-torch", marker = "extra == 'misc'", specifier = "==3.2.0" }, { name = "open3d", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'", specifier = ">=0.18.0" }, + { name = "open3d", marker = "extra == 'scene'", specifier = ">=0.18.0" }, { name = "open3d-unofficial-arm", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'", specifier = ">=0.19.0.post9" }, { name = "openai", marker = "extra == 'agents'" }, { name = "opencv-contrib-python", marker = "extra == 'apriltag'", specifier = "==4.10.0.84" }, @@ -2495,11 +2521,13 @@ requires-dist = [ { name = "torchreid", marker = "extra == 'misc'", specifier = "==0.2.5" }, { name = "transformers", extras = ["torch"], marker = "extra == 'perception'", specifier = ">=4.53.0,<4.54" }, { name = "trimesh", marker = "extra == 'manipulation'" }, + { name = "trimesh", marker = "extra == 'scene'", specifier = ">=4.0.0" }, { name = "typer", specifier = ">=0.19.2,<1" }, { name = "typing-extensions", marker = "python_full_version < '3.11'", specifier = ">=4.0" }, { name = "ultralytics", marker = "extra == 'perception'", specifier = ">=8.3.70" }, { name = "unitree-sdk2py-dimos", marker = "extra == 'unitree-dds'", specifier = ">=1.0.2" }, { name = "unitree-webrtc-connect", marker = "extra == 'unitree'", specifier = ">=2.1.2" }, + { name = "usd-core", marker = "extra == 'scene'", specifier = ">=23.11" }, { name = "uvicorn", marker = "extra == 'web'", specifier = ">=0.34.0" }, { name = "viser", extras = ["urdf"], marker = "extra == 'manipulation'", specifier = ">=1.0.29" }, { name = "websocket-client", specifier = ">=1.8" }, @@ -2507,7 +2535,7 @@ requires-dist = [ { name = "xarm-python-sdk", marker = "extra == 'manipulation'", specifier = ">=1.17.0" }, { name = "xarm-python-sdk", marker = "extra == 'misc'", specifier = ">=1.17.0" }, ] -provides-extras = ["misc", "visualization", "agents", "web", "perception", "unitree", "unitree-dds", "manipulation", "cpu", "cuda", "sim", "mapping", "drone", "dds", "base", "apriltag", "all"] +provides-extras = ["misc", "visualization", "agents", "web", "perception", "unitree", "unitree-dds", "manipulation", "cpu", "cuda", "sim", "mapping", "drone", "dds", "base", "apriltag", "scene", "all"] [package.metadata.requires-dev] autofix = [{ name = "ruff", specifier = "==0.14.3" }] @@ -6860,23 +6888,23 @@ name = "open3d" version = "0.19.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "addict", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "configargparse", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "dash", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "flask", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "matplotlib", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "nbformat", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, - { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, - { name = "pillow", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "pyquaternion", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "pyyaml", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, - { name = "tqdm", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "werkzeug", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, + { name = "addict" }, + { name = "configargparse" }, + { name = "dash" }, + { name = "flask" }, + { name = "matplotlib" }, + { name = "nbformat" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "pyquaternion" }, + { name = "pyyaml" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tqdm" }, + { name = "werkzeug" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/5c/4b/91e8a4100adf0ccd2f7ad21dd24c2e3d8f12925396528d0462cfb1735e5a/open3d-0.19.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:f7128ded206e07987cc29d0917195fb64033dea31e0d60dead3629b33d3c175f", size = 103086005, upload-time = "2025-01-08T07:25:56.755Z" }, @@ -7305,16 +7333,17 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine != 'x86_64' and sys_platform == 'darwin'", + "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'win32'", "python_full_version < '3.11' and platform_machine != 'x86_64' and sys_platform == 'win32'", "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform != 'darwin' and sys_platform != 'win32'", "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 'x86_64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", ] dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "python-dateutil", marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "pytz", marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "tzdata", marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "python-dateutil", marker = "python_full_version < '3.11'" }, + { name = "pytz", marker = "python_full_version < '3.11'" }, + { name = "tzdata", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" } wheels = [ @@ -7378,6 +7407,9 @@ resolution-markers = [ "python_full_version == '3.13.*' and platform_machine != 'x86_64' and sys_platform == 'darwin'", "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'darwin'", + "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.14' and platform_machine == 'x86_64' and sys_platform == 'win32'", "python_full_version >= '3.14' and platform_machine != 'x86_64' and sys_platform == 'win32'", "python_full_version == '3.13.*' and platform_machine == 'x86_64' and sys_platform == 'win32'", @@ -7392,14 +7424,15 @@ resolution-markers = [ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine != 'x86_64' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform == 'win32'", "python_full_version == '3.11.*' and platform_machine != 'x86_64' and sys_platform == 'win32'", "python_full_version == '3.11.*' and platform_machine == 'x86_64' and sys_platform != 'darwin' and sys_platform != 'win32'", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 'x86_64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", ] dependencies = [ - { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, - { name = "python-dateutil", marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "python-dateutil", marker = "python_full_version >= '3.11'" }, { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/de/da/b1dc0481ab8d55d0f46e343cfe67d4551a0e14fcee52bd38ca1bd73258d8/pandas-3.0.0.tar.gz", hash = "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", size = 4633005, upload-time = "2026-01-21T15:52:04.726Z" } @@ -8859,8 +8892,8 @@ name = "pyquaternion" version = "0.9.9" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'aarch64') or (python_full_version < '3.11' and sys_platform != 'linux')" }, - { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64') or (python_full_version >= '3.11' and sys_platform != 'linux')" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/3d092aa20efaedacb89c3221a92c6491be5b28f618a2c36b52b53e7446c2/pyquaternion-0.9.9.tar.gz", hash = "sha256:b1f61af219cb2fe966b5fb79a192124f2e63a3f7a777ac3cadf2957b1a81bea8", size = 15530, upload-time = "2020-10-05T01:31:30.327Z" } wheels = [ @@ -11403,6 +11436,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "usd-core" +version = "26.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/a0/639e148c16a0ec201cc4848aa3da4aba8805e17a2d9e2398eec399fd3051/usd_core-26.5-cp310-none-macosx_10_15_universal2.whl", hash = "sha256:d6a3a567e313841b7390ea7a930bf5aef08bdb912974c725becd725d83edb0f9", size = 39723088, upload-time = "2026-04-24T20:17:23.663Z" }, + { url = "https://files.pythonhosted.org/packages/d7/26/6cb620a64f3fafa38b84008d916eee47c70e5313c5d88c9087edf4d57522/usd_core-26.5-cp310-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85a1484024cdcefd77aac32a3b98e698f655e01951d62cc4d3fb3826e232400c", size = 28820064, upload-time = "2026-04-24T20:17:27.161Z" }, + { url = "https://files.pythonhosted.org/packages/00/d7/7814c95ca0b13a26313e5256472f90cfa2ab7f7cf3103b0d3611d41156e6/usd_core-26.5-cp310-none-win_amd64.whl", hash = "sha256:dff985cbfe24870a5dfe1c578acd918a358cd1680a17777d83b55d50f5560c18", size = 13450099, upload-time = "2026-04-24T20:17:29.994Z" }, + { url = "https://files.pythonhosted.org/packages/39/3a/adf7a4043e70974b84d3a572f928ffdd1176a070595cd17f028062622ade/usd_core-26.5-cp311-none-macosx_10_15_universal2.whl", hash = "sha256:b5416a108080311632b975da71b4ea480757ac6e7ea19b30bcd0eed6a3b6081f", size = 39723550, upload-time = "2026-04-24T20:17:32.975Z" }, + { url = "https://files.pythonhosted.org/packages/e2/7f/575b0ddc2a3effa1dc1f50ed67ae0def8f9ed961c69bfbb89a0a1c9ceaf8/usd_core-26.5-cp311-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:60076c97f0de2611dc39d2d25826e3b22a2b0e391c73806b4a072d69929f329e", size = 28825210, upload-time = "2026-04-24T20:17:37.136Z" }, + { url = "https://files.pythonhosted.org/packages/9f/51/9fb7c817f1ee7aff02adde8ec4805ff4add06482e036fe0914ab8e9cdbc5/usd_core-26.5-cp311-none-win_amd64.whl", hash = "sha256:1ff2031095ecdc2f9ff4e245114e6ab7001f7dec8fe75436b5beb72e1a280f57", size = 13450734, upload-time = "2026-04-24T20:17:39.641Z" }, + { url = "https://files.pythonhosted.org/packages/8d/cc/04870cc3ae8e1b3a4e168efea47e389cfab6ab4f619005da2443a10390d4/usd_core-26.5-cp312-none-macosx_10_15_universal2.whl", hash = "sha256:a9df2864e84b83ffc9cc0f2777a49170180f84f2b679bcd014d72036a51d057c", size = 39775789, upload-time = "2026-04-24T20:17:43.025Z" }, + { url = "https://files.pythonhosted.org/packages/77/62/963d3aba966539917d01e4a2169a1c07f7b3df087fc16ee39fc764214969/usd_core-26.5-cp312-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caa2447252aeada8c158faacd4d448f29cf1617aeccef5bb954734b93c8f3f62", size = 28743527, upload-time = "2026-04-24T20:17:46.631Z" }, + { url = "https://files.pythonhosted.org/packages/3b/b0/645ae6e27a9768e570c1044efd6d2369c10c5c2412669314b3d6cd914803/usd_core-26.5-cp312-none-win_amd64.whl", hash = "sha256:6d887b010c756508d2e1f770626201f1f4ba5227c052c1135ba9c19932c4da8e", size = 13494028, upload-time = "2026-04-24T20:17:49.599Z" }, + { url = "https://files.pythonhosted.org/packages/2e/cd/128de2e16d597eb0868dde7cc837a908b28ec2a0d90d4697714b6770449b/usd_core-26.5-cp313-none-macosx_10_15_universal2.whl", hash = "sha256:ce5e90a6795b93d7e744694e5209ea2f1754f9d596e67a89f0cc3590e9fff578", size = 39776038, upload-time = "2026-04-24T20:17:52.535Z" }, + { url = "https://files.pythonhosted.org/packages/f1/10/88838fd371592cfc3d972547ab4361e2deef5891d89c22a509de0e6696ce/usd_core-26.5-cp313-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd4d3de388e6dfec91fa5ee9fa29800d43ebe86cbf7a10380ec02b15386fca67", size = 28743992, upload-time = "2026-04-24T20:17:55.995Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/da8f44024e0f947c13da3bdae0d4ac6c04cb86de92a6f1b9bf03e6bb8ae8/usd_core-26.5-cp313-none-win_amd64.whl", hash = "sha256:b077ea37dfeb15ca6b24ca33b65c2fe9b1656138e1fda74e4eae9793a149a7d5", size = 13494201, upload-time = "2026-04-24T20:17:59.015Z" }, + { url = "https://files.pythonhosted.org/packages/3d/57/01cc4e412feaad5aaee09d09ead2afbd5b4022e3d3b5461adcbf726ca3f8/usd_core-26.5-cp314-none-macosx_10_15_universal2.whl", hash = "sha256:5b0acd9a1d804cb73d58815365ccb141727f635f4e6764609fade3bf4ef5cbba", size = 39927684, upload-time = "2026-04-24T20:18:01.828Z" }, + { url = "https://files.pythonhosted.org/packages/fd/0d/5b87f5d7c3501bd5296b0bba7ba8a3eaf639ded53b9a17e910ee3363dfc0/usd_core-26.5-cp314-none-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:755c469ec762f3b69d87f5e8af8f8098e4c107bf4c15ce570a042ac2fc2dbb76", size = 28776483, upload-time = "2026-04-24T20:18:05.082Z" }, + { url = "https://files.pythonhosted.org/packages/5a/48/d29a4649df00455174a5979fc8291021199bb2115d623378364b58055bb5/usd_core-26.5-cp314-none-win_amd64.whl", hash = "sha256:7654b5dfef6e7177849aa7e69962feb82a5312ad08469983214aae5821601296", size = 14043860, upload-time = "2026-04-24T20:18:07.896Z" }, +] + [[package]] name = "uuid-utils" version = "0.14.0" From eefa3b0af1c7edbb785b92a13ddf7d47f84961f4 Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Fri, 26 Jun 2026 04:49:11 +0200 Subject: [PATCH 2/6] Refine scene cooking workflow --- dimos/experimental/pimsim/scene/README.md | 178 +++++++++ .../pimsim/scene/browser_collision.py | 36 +- dimos/experimental/pimsim/scene/command.py | 157 ++++++++ dimos/experimental/pimsim/scene/cook.py | 177 ++++++++- .../pimsim/scene/entity_collision.py | 3 - dimos/experimental/pimsim/scene/inspect.py | 3 - dimos/experimental/pimsim/scene/plan.py | 227 ++++++++++- dimos/experimental/pimsim/scene/sidecar.py | 85 ++++- .../experimental/pimsim/scene/source_asset.py | 328 ++++++++++++++++ .../pimsim/scene/test_source_asset.py | 109 ++++++ dimos/experimental/pimsim/scene/test_spec.py | 125 ++++++ .../pimsim/scene/test_visual_glb.py | 135 +++++++ .../pimsim/scene/visual_blender.py | 53 +-- dimos/experimental/pimsim/scene/visual_glb.py | 355 ++++++++++++++++-- dimos/simulation/mujoco/collision_spec.py | 15 +- dimos/simulation/mujoco/scene_mesh_to_mjcf.py | 85 ++++- dimos/simulation/scene_assets/mesh_scene.py | 63 ++-- dimos/simulation/scene_assets/spec.py | 12 + 18 files changed, 1982 insertions(+), 164 deletions(-) create mode 100644 dimos/experimental/pimsim/scene/command.py create mode 100644 dimos/experimental/pimsim/scene/source_asset.py create mode 100644 dimos/experimental/pimsim/scene/test_source_asset.py create mode 100644 dimos/experimental/pimsim/scene/test_visual_glb.py diff --git a/dimos/experimental/pimsim/scene/README.md b/dimos/experimental/pimsim/scene/README.md index a2e52079cb..74aa8a31f3 100644 --- a/dimos/experimental/pimsim/scene/README.md +++ b/dimos/experimental/pimsim/scene/README.md @@ -21,7 +21,10 @@ data/scene_packages// ├── scene.meta.json manifest: alignment, artifact paths, entities, stats ├── mujoco// │ ├── wrapper.xml scene-only MJCF, no robot +│ ├── wrapper.mjb optional scene-only compiled MuJoCo model │ └── *.obj static collision assets +├── mujoco/composed/ +│ └── _.mjb optional robot+scene compiled MuJoCo model ├── entities// │ ├── visual.glb per-entity visual, in entity-local frame │ └── mujoco_collision/ cook-time convex hulls @@ -34,6 +37,51 @@ data/scene_packages// Packages are content-hash keyed on the source mesh, alignment, sidecar, and cook schema version. Changing one of those inputs creates a new cooked output. +## MuJoCo Artifact Split + +There are two MuJoCo loading modes. Keep the distinction explicit when reviewing +or testing scene packages. + +### Scene Package XML + +This is the default path: + +```text +scene.meta.json -> mujoco//wrapper.xml + entities -> runtime attaches robot -> compile MjModel +``` + +Use it for normal scenes and for robot-agnostic packages. The package contains +the world only. At runtime `MujocoSimModule` loads `wrapper.xml`, attaches the +requested robot MJCF, adds dynamic entities from `scene.meta.json`, and compiles +one in-memory model. This keeps one cooked package usable by many robots and +spawn points. + +Tradeoff: MuJoCo XML compile cost is paid at startup. That is fine for office- +scale scenes. It is not fine for product-dense scenes with tens of thousands of +individual geoms. + +### Composed Binary MJB + +This is the fast-load path for huge scenes: + +```text +wrapper.xml + robot MJCF + selected spawn/entities -> composed/_.mjb +``` + +Use it when XML compile time dominates startup, such as the supermarket scene +with thousands of shelf products. The `.mjb` already contains the robot, scene, +spawn pose, static collision, and any runtime entities chosen for that build. +MuJoCo loads that binary model directly. + +Tradeoff: a composed `.mjb` is not robot-agnostic. Build one per robot model, +spawn/entity configuration, and meaningful scene revision. You also cannot edit +it with `MjSpec` after loading it; if the robot, spawn, or dynamic-entity set +changes, rebuild the binary from the XML package. + +The scene-only `wrapper.mjb` produced by `--compile-mujoco-binary` is useful for +profiling and cache validation, but it does not replace runtime robot attach. +For G1 WBC testing, use a composed robot+scene `.mjb`. + ## Spec And Backends `dimos/simulation/scene_assets/spec.py` is the shared scene-package contract. It @@ -84,6 +132,121 @@ systems load their artifacts. The DimOS office scene is the reference example below. +## Authored Blender Sources + +The cooker accepts `.blend` files as authored scene sources. A Blender file is +not a concrete mesh asset: it can contain view layers, disabled source +collections, Geometry Nodes, procedural instances, cameras, lights, text, and +other authoring data. Before the normal cook starts, DimOS runs Blender +headlessly and normalizes the evaluated dependency graph into a GLB: + +```text +scene.blend -> evaluated depsgraph GLB -> normal scene cook +``` + +The normalizer walks `depsgraph.object_instances`, so Geometry Nodes and +instanced collection content are realized as concrete mesh nodes instead of +being dropped by a plain Blender glTF export. Direct mesh formats (`.glb`, +`.gltf`, `.obj`, `.ply`, `.stl`, `.usd`, `.usda`, `.usdc`, `.usdz`) skip this +step. + +To inspect a `.blend` scene with the same geometry the cooker will see: + +```bash +python - <<'PY' +from pathlib import Path + +import numpy as np + +from dimos.experimental.pimsim.scene.source_asset import prepare_scene_source +from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment, load_scene_prims + +prepared = prepare_scene_source(Path("data/my_scene/source.blend")) +print(prepared.to_json_dict()) + +for prim in load_scene_prims(prepared.cook_path, alignment=SceneMeshAlignment()): + name = prim.visual_node_name or prim.prim_path or prim.name + if "Floor" not in name: + continue + lo = np.min(prim.vertices, axis=0) + hi = np.max(prim.vertices, axis=0) + print(name, lo.round(4).tolist(), hi.round(4).tolist()) +PY +``` + +Then cook the `.blend` directly: + +```bash +python -m dimos.experimental.pimsim.scene.cook \ + data/my_scene/source.blend \ + --cook-spec data/my_scene/source.cook.json \ + --output-dir data/scene_packages/my_scene \ + --rebake +``` + +The sidecar still targets normalized mesh prim names. Inspect first, then write +the sidecar against the names printed from `load_scene_prims(prepared.cook_path)`. + +To also emit a scene-only MuJoCo binary for profiling or cache validation, add +`--compile-mujoco-binary`: + +```bash +python -m dimos.experimental.pimsim.scene.cook \ + data/my_scene/source.blend \ + --cook-spec data/my_scene/source.cook.json \ + --output-dir data/scene_packages/my_scene \ + --compile-mujoco-binary \ + --rebake +``` + +That writes `mujoco//wrapper.mjb`. It is still scene-only; it does not +include a robot. + +## Tool Diagnostics + +Scene cooking shells out to production asset tools. Their output is streamed +through the DimOS logger with a command label, elapsed-time heartbeats, and a +tail of recent output on failure. Long Blender imports should therefore show +which stage is running: source normalization, authored visual extraction, +browser visual import, decimation, join, or export. + +`gltfpack` is the preferred browser visual optimizer for GLB inputs, but install +a native meshoptimizer `gltfpack` binary when using texture compression. The +Node/npx package can optimize geometry, but it is built without WebP/KTX texture +compression support. If a cook fails immediately after requesting +`--visual-texture-format webp` or `ktx2`, put a native `gltfpack` on `PATH` or +use `--visual-texture-format none`. + +The default visual cook is conservative: DimOS passes `-noq` to gltfpack so the +output GLB has no required quantization extension. This produces larger files, +but keeps the artifact loadable by generic GLB consumers such as Rerun. Use +`--visual-quantize` only when the target viewer supports `KHR_mesh_quantization`. +With uncompressed textures, DimOS also rewrites embedded images to ordinary +8-bit PNG payloads after gltfpack. This keeps the source materials textured +while avoiding renderer-specific failures from high-bit-depth PNGs or required +texture-compression extensions. Use `--no-visual-texture-normalization` only +when a downstream viewer needs the original embedded texture encoding. +The final GLB sanitize pass also demotes `KHR_texture_transform` from +`extensionsRequired` to `extensionsUsed`, since Rerun rejects it as required but +can still load the asset when the transform is treated as optional. + +If `gltfpack` exits with only `unreachable`, that is an internal optimizer +crash. Re-run with the native binary first. If it still fails, diagnose with +`--visual-optimizer blender` or `copy`, or split the visual asset into smaller +source chunks before optimizing. + +For product-dense authored scenes, glTF GPU instancing can keep browser visuals +small. The supermarket source has tens of thousands of repeated product +instances; without instancing, an optimizer can turn compact source meshes into +millions of duplicated output triangles. Keep the default off when the package +must render in generic Rerun viewers, and enable it explicitly with +`--visual-gpu-instancing` only for viewers that support +`EXT_mesh_gpu_instancing`. +If preserving the authored node instancing matters more than reducing draw +calls, use `--visual-optimizer copy` after the filtered static visual source has +been generated. That keeps repeated assets as normal glTF nodes instead of +flattening them into a giant mesh. + ## Office Example The office source mesh lives in data: @@ -227,6 +390,21 @@ At runtime, `MujocoSimModule`: The robot MJCF must stay robot-only: no office floor, no scene walls, no furniture, no manipulation rig. Scene geometry belongs in the cooked package. +For a large scene where XML compile is too slow, load a composed binary model +instead of the scene package metadata: + +```bash +python -m dimos.robot.cli.dimos \ + --simulation mujoco \ + --scene /home/pim/Desktop/dimos-scene-cooking-part2/data/scene_packages/supermarket_static_product_primitives_20dyn/mujoco/composed/unitree-g1-groot-wbc_spawn_9p2_11p8_yaw_m1p57.mjb \ + --n-workers 10 \ + run unitree-g1-groot-wbc \ + -o mujocosimmodule.headless=false +``` + +That path skips scene-package runtime composition. The `.mjb` already contains +the G1 robot and the supermarket scene at the authored test spawn. + ## Sidecar Schema `.cook.json` can contain static-collision policy and interactables: diff --git a/dimos/experimental/pimsim/scene/browser_collision.py b/dimos/experimental/pimsim/scene/browser_collision.py index 0b7e9f35dd..4c594752b6 100644 --- a/dimos/experimental/pimsim/scene/browser_collision.py +++ b/dimos/experimental/pimsim/scene/browser_collision.py @@ -23,7 +23,6 @@ import numpy as np import open3d as o3d # type: ignore[import-untyped] -import trimesh from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset from dimos.simulation.mujoco.collision_spec import CollisionSpec @@ -116,6 +115,8 @@ def cook_browser_collision( def _write_glb(mesh: o3d.geometry.TriangleMesh, path: Path) -> None: + import trimesh + vertices = np.asarray(mesh.vertices, dtype=np.float64) faces = np.asarray(mesh.triangles, dtype=np.int64) if len(vertices) == 0 or len(faces) == 0: @@ -133,7 +134,25 @@ def _load_collision_prims( source_alignment = alignment or SceneMeshAlignment(y_up=False) prims = load_scene_prims(source, alignment=source_alignment) - if spec.split_disconnected_components: + has_forced_splits = any( + bool(override.get("split_components")) for override in spec.prim_overrides.values() + ) + if spec.split_disconnected_components or has_forced_splits: + + def _split_override(prim: ScenePrimMesh) -> dict[str, object]: + return spec.resolve(prim.prim_path or prim.name) + + def _can_split_prim(prim: ScenePrimMesh) -> bool: + override = _split_override(prim) + if override.get("split_components"): + return True + return ( + spec.split_disconnected_components and override.get("type", spec.default) == "auto" + ) + + def _force_split_prim(prim: ScenePrimMesh) -> bool: + return bool(_split_override(prim).get("split_components")) + prims, split_stats = split_disconnected_scene_prims( prims, min_components=spec.split_min_components, @@ -142,9 +161,8 @@ def _load_collision_prims( axis_ratio=spec.split_axis_ratio, min_component_extent=spec.split_component_min_extent_m, min_component_faces=spec.split_component_min_faces, - can_split=lambda prim: ( - spec.resolve(prim.prim_path or prim.name).get("type", spec.default) == "auto" - ), + can_split=_can_split_prim, + force_split=_force_split_prim, ) if split_stats["split_prims"]: logger.info( @@ -239,11 +257,3 @@ def _mesh_from_arrays(vertices: np.ndarray, faces: np.ndarray) -> o3d.geometry.T mesh.vertices = o3d.utility.Vector3dVector(vertices) mesh.triangles = o3d.utility.Vector3iVector(faces.astype(np.int32)) return mesh - - -__all__ = [ - "OBJECTS_SIDECAR_NAME", - "BrowserCollisionCookResult", - "cook_browser_collision", - "extract_scene_objects", -] diff --git a/dimos/experimental/pimsim/scene/command.py b/dimos/experimental/pimsim/scene/command.py new file mode 100644 index 0000000000..f84782a4f3 --- /dev/null +++ b/dimos/experimental/pimsim/scene/command.py @@ -0,0 +1,157 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Subprocess helpers for long-running scene cooking tools.""" + +from __future__ import annotations + +from collections import deque +from collections.abc import Callable, Sequence +import contextlib +import selectors +import shlex +import subprocess +import time + +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +_DEFAULT_HEARTBEAT_SECONDS = 30.0 +_DEFAULT_TAIL_LINES = 30 + + +def run_logged_command( + args: Sequence[str], + label: str, + *, + heartbeat_seconds: float = _DEFAULT_HEARTBEAT_SECONDS, + tail_lines: int = _DEFAULT_TAIL_LINES, + line_log_filter: Callable[[str], bool] | None = None, +) -> str: + """Run a command while streaming output and emitting heartbeat logs. + + Blender and mesh optimizers can run for minutes on production scenes. Using + ``subprocess.run(stdout=PIPE)`` hides all progress until the command exits, + which makes failures and stalls indistinguishable to operators. + """ + + command = " ".join(shlex.quote(str(arg)) for arg in args) + logger.info("scene cook command started", label=label, command=command) + started = time.monotonic() + last_heartbeat = started + output_lines: list[str] = [] + tail: deque[str] = deque(maxlen=tail_lines) + + proc = subprocess.Popen( + [str(arg) for arg in args], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + assert proc.stdout is not None + + selector = selectors.DefaultSelector() + selector.register(proc.stdout, selectors.EVENT_READ) + stdout_open = True + + try: + while True: + if stdout_open: + for key, _ in selector.select(timeout=1.0): + line = key.fileobj.readline() + if line == "": + selector.unregister(key.fileobj) + stdout_open = False + break + clean = line.rstrip() + output_lines.append(clean) + tail.append(clean) + if line_log_filter is None or line_log_filter(clean): + logger.info("scene cook command output", label=label, line=clean) + else: + time.sleep(0.1) + + returncode = proc.poll() + now = time.monotonic() + if now - last_heartbeat >= heartbeat_seconds: + logger.info( + "scene cook command still running", + label=label, + elapsed_s=round(now - started, 1), + recent_output=list(tail), + ) + last_heartbeat = now + + if returncode is not None: + if stdout_open: + remaining = proc.stdout.read() + for line in remaining.splitlines(): + output_lines.append(line) + tail.append(line) + if line_log_filter is None or line_log_filter(line): + logger.info("scene cook command output", label=label, line=line) + selector.unregister(proc.stdout) + break + except BaseException: + if proc.poll() is None: + logger.warning("scene cook command interrupted; terminating", label=label) + proc.terminate() + with contextlib.suppress(subprocess.TimeoutExpired): + proc.wait(timeout=5.0) + if proc.poll() is None: + logger.warning("scene cook command did not terminate; killing", label=label) + proc.kill() + proc.wait() + raise + finally: + selector.close() + proc.stdout.close() + + elapsed_s = time.monotonic() - started + output = "\n".join(output_lines) + if returncode != 0: + raise RuntimeError( + f"{label} failed with exit code {returncode} after {elapsed_s:.1f}s\n" + f"command: {command}\n" + f"last output:\n{_tail(output, tail_lines)}" + ) + + logger.info( + "scene cook command finished", + label=label, + elapsed_s=round(elapsed_s, 1), + recent_output=list(tail), + ) + return output + + +def _tail(output: str, tail_lines: int) -> str: + return "\n".join(output.splitlines()[-tail_lines:]) + + +def blender_output_line_is_interesting(line: str) -> bool: + """Return true for Blender output worth streaming during normal cooks.""" + + return ( + line.startswith("DIMOS_") + or "Read blend:" in line + or "Finished glTF" in line + or line.startswith("Blender ") + or line == "Blender quit" + or "Traceback" in line + or "ERROR" in line + or line.startswith("Error:") + ) diff --git a/dimos/experimental/pimsim/scene/cook.py b/dimos/experimental/pimsim/scene/cook.py index 58eeacf898..7f656865fe 100644 --- a/dimos/experimental/pimsim/scene/cook.py +++ b/dimos/experimental/pimsim/scene/cook.py @@ -25,6 +25,7 @@ import hashlib import json from pathlib import Path +import time from typing import Any from dimos.experimental.pimsim.scene.browser_collision import cook_browser_collision @@ -33,8 +34,9 @@ cook_entity_collision_hulls, ) from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset -from dimos.experimental.pimsim.scene.plan import build_scene_cook_plan +from dimos.experimental.pimsim.scene.plan import EntityPrototypePlan, build_scene_cook_plan from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar +from dimos.experimental.pimsim.scene.source_asset import prepare_scene_source from dimos.experimental.pimsim.scene.visual_blender import cook_plan_visual_assets from dimos.experimental.pimsim.scene.visual_glb import cook_browser_visual from dimos.simulation.mujoco.collision_spec import CollisionSpec @@ -102,32 +104,43 @@ def cook_scene_package( mujoco_dir = package_dir / "mujoco" package_dir.mkdir(parents=True, exist_ok=True) + prepared_source = prepare_scene_source(source, rebake=rebake) + cook_source = prepared_source.cook_path + stats: dict[str, Any] = { - "source": inspect_scene_asset(source).to_json_dict(), + "source": inspect_scene_asset(cook_source).to_json_dict(), "cook_spec": _cook_spec_json(cook_spec), "cook_version": _COOK_VERSION, } - if sidecar.path is not None or sidecar.interactables: + if prepared_source.normalized: + stats["source_normalization"] = prepared_source.to_json_dict() + if sidecar.path is not None or sidecar.interactables or sidecar.entity_groups: stats["authored_sidecar"] = sidecar.to_json_dict() plan = build_scene_cook_plan( - source, + cook_source, sidecar=sidecar, alignment=align, output_dir=package_dir, collision_spec=collision_spec, ) - stats["cook_plan"] = plan.to_json_dict() + stats["cook_plan"] = { + "source_path": str(plan.source_path), + "sidecar_path": str(plan.sidecar.path) if plan.sidecar.path else None, + "entities": len(plan.entities), + "prototypes": [prototype.to_json_dict() for prototype in plan.prototypes], + "stats": plan.stats, + } entities = plan.entities_metadata() if entities: stats["interactables"] = { "count": len(entities), - "ids": [entity["id"] for entity in entities], + "id_samples": [entity["id"] for entity in entities[:100]], "static_visual_filter": "plan/blender", } - visual_source = source + visual_source = cook_source # Only invoke Blender when at least one entity actually extracts from # the source mesh; pure-synthetic sidecars (manip rigs) don't need it. needs_blender = visual.enabled and any( @@ -135,16 +148,25 @@ def cook_scene_package( ) if needs_blender: visual_source = cook_plan_visual_assets( - source, + cook_source, package_dir, plan=plan, rebake=rebake, ) if mujoco.enabled: + prototype_hull_counts = _cook_entity_prototype_collision( + plan.prototypes, + entities, + rebake=rebake, + ) hull_counts = _cook_entity_collision(entities, rebake=rebake) - if hull_counts: - stats["entity_collision"] = {"hulls_per_entity": hull_counts} + if prototype_hull_counts or hull_counts: + stats["entity_collision"] = {} + if prototype_hull_counts: + stats["entity_collision"]["hulls_per_prototype"] = prototype_hull_counts + if hull_counts: + stats["entity_collision"]["hulls_per_entity"] = hull_counts visual_result = cook_browser_visual( visual_source, @@ -159,7 +181,7 @@ def cook_scene_package( } browser_collision_result = cook_browser_collision( - source, + cook_source, browser_dir, alignment=SceneMeshAlignment(y_up=False), spec=browser_collision, @@ -170,9 +192,10 @@ def cook_scene_package( stats["browser_collision"] = browser_collision_result.stats mujoco_scene_path: Path | None = None + mujoco_binary_path: Path | None = None if mujoco.enabled: mujoco_scene_path = load_or_bake( - scene_mesh_path=source, + scene_mesh_path=cook_source, alignment=align, cache_root=mujoco_dir, collision_spec=plan.collision_spec, @@ -180,6 +203,13 @@ def cook_scene_package( rebake=rebake, ) stats["mujoco"] = {"scene_path": str(mujoco_scene_path)} + if mujoco.compile_binary: + mujoco_binary_path, binary_stats = _compile_mujoco_binary( + mujoco_scene_path, + rebake=rebake, + ) + stats["mujoco"]["binary_path"] = str(mujoco_binary_path) + stats["mujoco"]["binary"] = binary_stats package = ScenePackage( package_dir=package_dir, @@ -189,6 +219,7 @@ def cook_scene_package( browser_collision_path=browser_collision_result.path if browser_collision_result else None, objects_path=browser_collision_result.objects_path if browser_collision_result else None, mujoco_scene_path=mujoco_scene_path, + mujoco_binary_path=mujoco_binary_path, metadata_path=package_dir / "scene.meta.json", entities=entities, stats=stats, @@ -214,6 +245,8 @@ def _cook_entity_collision( for entity in entities: if entity.get("descriptor", {}).get("shape_hint") != "mesh": continue + if entity.get("collision_paths"): + continue visual_path = entity.get("visual_path") if not visual_path or not Path(visual_path).exists(): logger.warning( @@ -233,6 +266,86 @@ def _cook_entity_collision( return hull_counts +def _cook_entity_prototype_collision( + prototypes: tuple[EntityPrototypePlan, ...], + entities: list[dict[str, Any]], + *, + rebake: bool, +) -> dict[str, int]: + """Cook shared mesh prototypes once and attach hull paths to instances.""" + if not prototypes: + return {} + + hulls_by_prototype: dict[str, list[Path]] = {} + counts: dict[str, int] = {} + for prototype in prototypes: + source_obj = prototype.collision_dir.parent / "source.obj" + if rebake or not source_obj.exists(): + _write_obj(source_obj, prototype.vertices, prototype.triangles) + hull_paths = cook_entity_collision_hulls( + source_obj, + prototype.collision_dir, + rebake=rebake, + ) + if hull_paths: + hulls_by_prototype[prototype.id] = hull_paths + counts[prototype.id] = len(hull_paths) + + for entity in entities: + prototype_id = entity.get("prototype_id") + if not isinstance(prototype_id, str): + continue + hull_paths = hulls_by_prototype.get(prototype_id) + if hull_paths: + entity["collision_paths"] = [str(path) for path in hull_paths] + return counts + + +def _write_obj(path: Path, vertices: Any, triangles: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w") as f: + for vertex in vertices: + f.write(f"v {float(vertex[0])} {float(vertex[1])} {float(vertex[2])}\n") + for tri in triangles: + f.write(f"f {int(tri[0]) + 1} {int(tri[1]) + 1} {int(tri[2]) + 1}\n") + + +def _compile_mujoco_binary(scene_xml_path: Path, *, rebake: bool) -> tuple[Path, dict[str, Any]]: + """Compile a scene-only MuJoCo XML wrapper to ``.mjb``. + + This binary is fast to load but not editable as ``MjSpec``. It is a + cache/debug artifact for fixed scene models; robot attachment still + requires the XML wrapper unless a robot-specific composed binary is + produced separately. + """ + import mujoco + + binary_path = scene_xml_path.with_suffix(".mjb") + if binary_path.exists() and not rebake: + return binary_path, { + "cached": True, + "size_bytes": binary_path.stat().st_size, + } + + start = time.perf_counter() + model = mujoco.MjModel.from_xml_path(str(scene_xml_path)) + compile_s = time.perf_counter() - start + + save_start = time.perf_counter() + mujoco.mj_saveModel(model, str(binary_path)) + save_s = time.perf_counter() - save_start + + return binary_path, { + "cached": False, + "compile_seconds": compile_s, + "save_seconds": save_s, + "size_bytes": binary_path.stat().st_size, + "nbody": int(model.nbody), + "ngeom": int(model.ngeom), + "nmesh": int(model.nmesh), + } + + def _package_key( cook_spec: SceneCookSpec, sidecar: SceneCookSidecar, @@ -283,10 +396,43 @@ def cli_main() -> None: choices=("none", "webp", "ktx2"), default="none", ) + parser.add_argument( + "--no-visual-texture-normalization", + action="store_true", + help=( + "do not rewrite embedded visual textures to plain 8-bit PNGs after " + "gltfpack. The default preserves textures but avoids viewer-specific " + "compressed or high-bit-depth texture formats." + ), + ) + parser.add_argument( + "--visual-quantize", + action="store_true", + help=( + "allow gltfpack quantization. This makes smaller files but emits " + "KHR_mesh_quantization, which some viewers cannot load." + ), + ) + parser.add_argument( + "--visual-gpu-instancing", + action="store_true", + help=( + "allow gltfpack to emit EXT_mesh_gpu_instancing. This can make " + "dense repeated assets much smaller, but not every viewer supports it." + ), + ) parser.add_argument("--no-browser-collision", action="store_true") parser.add_argument("--browser-collision-target-faces", type=int, default=100_000) parser.add_argument("--no-mujoco", action="store_true") parser.add_argument("--include-mujoco-visual", action="store_true") + parser.add_argument( + "--compile-mujoco-binary", + action="store_true", + help=( + "also compile the scene-only MuJoCo wrapper.xml to wrapper.mjb. " + "Fast to load, but not usable for runtime robot attachment by itself." + ), + ) parser.add_argument("--rebake", action="store_true") args = parser.parse_args() @@ -309,6 +455,9 @@ def cli_main() -> None: None if args.visual_texture_format == "none" else args.visual_texture_format ), max_texture_size=args.visual_max_texture_size, + normalize_textures=not args.no_visual_texture_normalization, + quantize=args.visual_quantize, + use_gpu_instancing=args.visual_gpu_instancing, ), browser_collision_spec=BrowserCollisionSpec( enabled=not args.no_browser_collision, @@ -317,6 +466,7 @@ def cli_main() -> None: mujoco_spec=MujocoSceneSpec( enabled=not args.no_mujoco, include_visual_mesh=args.include_mujoco_visual, + compile_binary=args.compile_mujoco_binary, ), rebake=args.rebake, ) @@ -325,6 +475,3 @@ def cli_main() -> None: if __name__ == "__main__": cli_main() - - -__all__ = ["SCENE_PACKAGE_DIR", "cook_scene_package"] diff --git a/dimos/experimental/pimsim/scene/entity_collision.py b/dimos/experimental/pimsim/scene/entity_collision.py index 1ecd6619b3..bd137a149e 100644 --- a/dimos/experimental/pimsim/scene/entity_collision.py +++ b/dimos/experimental/pimsim/scene/entity_collision.py @@ -136,6 +136,3 @@ def _run_coacd(mesh: object, mesh_path: Path) -> list[tuple[object, object]]: "entity hulls: CoACD failed for %s (%s); using single convex hull", mesh_path, exc ) return [] - - -__all__ = ["COLLISION_DIR_NAME", "cook_entity_collision_hulls"] diff --git a/dimos/experimental/pimsim/scene/inspect.py b/dimos/experimental/pimsim/scene/inspect.py index 41158b548d..889b242a58 100644 --- a/dimos/experimental/pimsim/scene/inspect.py +++ b/dimos/experimental/pimsim/scene/inspect.py @@ -180,6 +180,3 @@ def _inspect_open3d(path: Path) -> SceneAssetStats: vertex_count=len(mesh.vertices), triangle_count=len(mesh.triangles), ) - - -__all__ = ["SceneAssetStats", "inspect_scene_asset"] diff --git a/dimos/experimental/pimsim/scene/plan.py b/dimos/experimental/pimsim/scene/plan.py index 68f81784e2..06a08483bc 100644 --- a/dimos/experimental/pimsim/scene/plan.py +++ b/dimos/experimental/pimsim/scene/plan.py @@ -28,8 +28,13 @@ from typing import Any import numpy as np +from scipy.spatial.transform import Rotation as R -from dimos.experimental.pimsim.scene.sidecar import InteractableSpec, SceneCookSidecar +from dimos.experimental.pimsim.scene.sidecar import ( + EntityGroupSpec, + InteractableSpec, + SceneCookSidecar, +) from dimos.simulation.mujoco.collision_spec import CollisionSpec from dimos.simulation.scene_assets.mesh_scene import ( SceneMeshAlignment, @@ -54,9 +59,10 @@ class EntityCookPlan: initial_quat: tuple[float, float, float, float] descriptor: dict[str, Any] visual_path: Path | None + prototype_id: str | None = None def to_metadata(self) -> dict[str, Any]: - return { + metadata = { "id": self.spec.id, "tags": list(self.spec.tags), "source_prim_paths": list(self.spec.source_prim_paths), @@ -83,9 +89,12 @@ def to_metadata(self) -> dict[str, Any]: "physics": self.spec.physics, "visual": self.spec.visual, } + if self.prototype_id is not None: + metadata["prototype_id"] = self.prototype_id + return metadata def to_json_dict(self) -> dict[str, Any]: - return { + raw = { "id": self.spec.id, "safe_id": self.safe_id, "matched_prim_paths": list(self.matched_prim_paths), @@ -97,6 +106,31 @@ def to_json_dict(self) -> dict[str, Any]: "visual_path": str(self.visual_path) if self.visual_path else None, "remove_from_static": self.spec.remove_from_static, } + if self.prototype_id is not None: + raw["prototype_id"] = self.prototype_id + return raw + + +@dataclass(frozen=True) +class EntityPrototypePlan: + """Shared source mesh cooked once and instanced by many entities.""" + + id: str + safe_id: str + source_prim_path: str + vertices: np.ndarray + triangles: np.ndarray + collision_dir: Path + + def to_json_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "safe_id": self.safe_id, + "source_prim_path": self.source_prim_path, + "vertex_count": len(self.vertices), + "triangle_count": len(self.triangles), + "collision_dir": str(self.collision_dir), + } @dataclass(frozen=True) @@ -108,6 +142,7 @@ class SceneCookPlan: sidecar: SceneCookSidecar collision_spec: CollisionSpec entities: tuple[EntityCookPlan, ...] = () + prototypes: tuple[EntityPrototypePlan, ...] = () stats: dict[str, Any] = field(default_factory=dict) @property @@ -128,6 +163,7 @@ def to_json_dict(self) -> dict[str, Any]: }, "sidecar_path": str(self.sidecar.path) if self.sidecar.path else None, "entities": [entity.to_json_dict() for entity in self.entities], + "prototypes": [prototype.to_json_dict() for prototype in self.prototypes], "stats": self.stats, } @@ -142,7 +178,7 @@ def build_scene_cook_plan( ) -> SceneCookPlan: source = Path(source_path).expanduser().resolve() base_collision = collision_spec or sidecar.collision - if not sidecar.interactables: + if not sidecar.interactables and not sidecar.entity_groups: return SceneCookPlan( source_path=source, alignment=alignment, @@ -152,9 +188,11 @@ def build_scene_cook_plan( ) entities_dir = Path(output_dir).expanduser().resolve() / "entities" - needs_prims = any(item.source_prim_paths for item in sidecar.interactables) + needs_prims = bool(sidecar.entity_groups) or any( + item.source_prim_paths for item in sidecar.interactables + ) prims = load_scene_prims(source, alignment=alignment) if needs_prims else [] - entities = tuple( + explicit_entities = tuple( ( _build_synthetic_entity_plan(item, entities_dir) if item.is_synthetic @@ -162,14 +200,29 @@ def build_scene_cook_plan( ) for item in sidecar.interactables ) - effective_collision = _collision_spec_with_entity_skips(base_collision, entities) + group_entities, prototypes, group_skip_patterns = _build_entity_group_plans( + sidecar.entity_groups, + prims, + entities_dir, + ) + entities = (*explicit_entities, *group_entities) + effective_collision = _collision_spec_with_entity_skips( + base_collision, + entities, + group_skip_patterns=group_skip_patterns, + ) return SceneCookPlan( source_path=source, alignment=alignment, sidecar=sidecar, collision_spec=effective_collision, entities=entities, - stats={"source_prims": len(prims), "entities": len(entities)}, + prototypes=prototypes, + stats={ + "source_prims": len(prims), + "entities": len(entities), + "entity_prototypes": len(prototypes), + }, ) @@ -261,6 +314,97 @@ def _build_synthetic_entity_plan( ) +def _build_entity_group_plans( + groups: tuple[EntityGroupSpec, ...], + prims: list[ScenePrimMesh], + entities_dir: Path, +) -> tuple[tuple[EntityCookPlan, ...], tuple[EntityPrototypePlan, ...], tuple[str, ...]]: + entities: list[EntityCookPlan] = [] + prototypes_by_id: dict[str, EntityPrototypePlan] = {} + group_skip_patterns: list[str] = [] + + for group in groups: + matched = sorted((prim for prim in prims if group.matches(prim)), key=_prim_sort_key) + if not matched: + patterns = ", ".join(group.source_prim_paths) + raise ValueError( + f"scene entity group {group.id_prefix!r} matched no source prims: {patterns}" + ) + if group.remove_from_static: + group_skip_patterns.extend(group.source_prim_paths) + + physics = {"shape": "mesh", **group.physics} + for index, prim in enumerate(matched): + entity, prototype = _build_group_entity_plan( + group, + prim, + index=index, + physics=physics, + entities_dir=entities_dir, + ) + entities.append(entity) + prototypes_by_id.setdefault(prototype.id, prototype) + + return tuple(entities), tuple(prototypes_by_id.values()), tuple(group_skip_patterns) + + +def _build_group_entity_plan( + group: EntityGroupSpec, + prim: ScenePrimMesh, + *, + index: int, + physics: dict[str, Any], + entities_dir: Path, +) -> tuple[EntityCookPlan, EntityPrototypePlan]: + prim_path = prim.prim_path or prim.name + prototype_key = _entity_group_prototype_key(group, prim) + prototype_safe_id = _safe_entity_id(f"{group.id_prefix}_{prototype_key}") + entity_id = f"{group.id_prefix}_{index:05d}_{prototype_safe_id}" + spec = InteractableSpec( + id=entity_id, + source_prim_paths=(prim_path,), + remove_from_static=group.remove_from_static, + spawn=group.spawn, + kind=group.kind, + mass=group.mass, + tags=group.tags, + physics=physics, + visual=group.visual, + ) + + vertices = np.asarray(prim.vertices, dtype=np.float64) + aabb_min_np = vertices.min(axis=0).astype(float) + aabb_max_np = vertices.max(axis=0).astype(float) + extents = np.maximum(aabb_max_np - aabb_min_np, 1e-4).astype(float) + local_vertices, center_np, quat = _localize_prim_mesh(vertices) + shape_hint, shape_extents = _resolve_shape(spec, extents) + descriptor = _make_descriptor(spec, shape_hint, shape_extents, visual_path=None) + descriptor["prototype_id"] = prototype_safe_id + + entity = EntityCookPlan( + spec=spec, + safe_id=_safe_entity_id(entity_id), + matched_prim_paths=(prim_path,), + visual_node_patterns=(), + aabb_min=(float(aabb_min_np[0]), float(aabb_min_np[1]), float(aabb_min_np[2])), + aabb_max=(float(aabb_max_np[0]), float(aabb_max_np[1]), float(aabb_max_np[2])), + center=(float(center_np[0]), float(center_np[1]), float(center_np[2])), + initial_quat=quat, + descriptor=descriptor, + visual_path=None, + prototype_id=prototype_safe_id, + ) + prototype = EntityPrototypePlan( + id=prototype_safe_id, + safe_id=prototype_safe_id, + source_prim_path=prim_path, + vertices=local_vertices.astype(np.float32), + triangles=np.asarray(prim.triangles, dtype=np.int32), + collision_dir=entities_dir / "_prototypes" / prototype_safe_id / "mujoco_collision", + ) + return entity, prototype + + def _resolve_shape( spec: InteractableSpec, extents_np: np.ndarray, @@ -305,6 +449,52 @@ def _make_descriptor( return descriptor +def _entity_group_prototype_key(group: EntityGroupSpec, prim: ScenePrimMesh) -> str: + prim_path = prim.prim_path or prim.visual_node_name or prim.name + if group.prototype_key == "prim_path": + return prim_path + if group.prototype_key != "mesh_name": + raise ValueError( + f"entity group {group.id_prefix!r}: unsupported prototype_key {group.prototype_key!r}" + ) + + basename = prim_path.lstrip("/").rsplit("/", 1)[-1] + if "__" in basename: + basename = basename.split("__", 1)[1] + basename = basename.rsplit("_Mesh", 1)[0] + repeated = re.match(r"^(.+?)\.\d+_\1$", basename) + if repeated: + return repeated.group(1) + return _HASH_SUFFIX_RE.sub("", basename) + + +def _localize_prim_mesh( + vertices: np.ndarray, +) -> tuple[np.ndarray, tuple[float, float, float], tuple[float, float, float, float]]: + aabb_min = vertices.min(axis=0) + aabb_max = vertices.max(axis=0) + center = (aabb_min + aabb_max) * 0.5 + centered = vertices - center + cov = centered.T @ centered + _, axes = np.linalg.eigh(cov) + axes = axes[:, ::-1] + if np.linalg.det(axes) < 0.0: + axes[:, 2] *= -1.0 + local_vertices = centered @ axes + quat_xyzw = R.from_matrix(axes).as_quat() + quat_wxyz = ( + float(quat_xyzw[3]), + float(quat_xyzw[0]), + float(quat_xyzw[1]), + float(quat_xyzw[2]), + ) + return ( + local_vertices, + (float(center[0]), float(center[1]), float(center[2])), + quat_wxyz, + ) + + def _visual_node_patterns(prims: list[ScenePrimMesh]) -> tuple[str, ...]: names: list[str] = [] for prim in prims: @@ -319,13 +509,27 @@ def _visual_node_patterns(prims: list[ScenePrimMesh]) -> tuple[str, ...]: def _collision_spec_with_entity_skips( collision_spec: CollisionSpec, entities: tuple[EntityCookPlan, ...], + *, + group_skip_patterns: tuple[str, ...] = (), ) -> CollisionSpec: - prim_overrides: dict[str, dict[str, Any]] = dict(collision_spec.prim_overrides) + entity_skip_overrides: dict[str, dict[str, Any]] = {} + for pattern in group_skip_patterns: + entity_skip_overrides[pattern] = {"type": "skip", "visual": False} for entity in entities: + if entity.prototype_id is not None: + continue if not entity.spec.remove_from_static: continue for prim_path in sorted(entity.matched_prim_paths): - prim_overrides.setdefault(prim_path, {"type": "skip"}) + entity_skip_overrides[prim_path] = {"type": "skip", "visual": False} + + # CollisionSpec.resolve() is first-match-wins. Entity extraction must + # take precedence over broad class overrides such as "Grocery_Scatter_*", + # otherwise extracted dynamic entities are duplicated in static collision. + prim_overrides: dict[str, dict[str, Any]] = { + **entity_skip_overrides, + **collision_spec.prim_overrides, + } return replace(collision_spec, prim_overrides=prim_overrides) @@ -336,6 +540,3 @@ def _prim_sort_key(prim: ScenePrimMesh) -> tuple[str, str]: def _safe_entity_id(entity_id: str) -> str: safe = "".join(c if c.isalnum() or c in {"-", "_", "."} else "_" for c in entity_id) return safe or "entity" - - -__all__ = ["EntityCookPlan", "SceneCookPlan", "build_scene_cook_plan"] diff --git a/dimos/experimental/pimsim/scene/sidecar.py b/dimos/experimental/pimsim/scene/sidecar.py index 492700199b..c0fa0be3cd 100644 --- a/dimos/experimental/pimsim/scene/sidecar.py +++ b/dimos/experimental/pimsim/scene/sidecar.py @@ -36,6 +36,7 @@ CookEntitySpawn = Literal["initial", "manual"] CookEntityKind = Literal["dynamic", "kinematic", "static"] +EntityGroupMode = Literal["per_prim"] _COOK_SIDECAR_SUFFIXES = (".cook.json", ".scene.json") @@ -112,6 +113,71 @@ def matches(self, prim: ScenePrimMesh) -> bool: ) +@dataclass(frozen=True) +class EntityGroupSpec: + """Pattern-authored runtime entities expanded from many source prims. + + ``mode="per_prim"`` creates one runtime entity for each matched source + prim. The cook plan can then share collision prototypes across repeated + source mesh names instead of decomposing every instance independently. + """ + + id_prefix: str + source_prim_paths: tuple[str, ...] + mode: EntityGroupMode = "per_prim" + remove_from_static: bool = True + spawn: CookEntitySpawn = "initial" + kind: CookEntityKind = "dynamic" + mass: float = 1.0 + tags: tuple[str, ...] = () + physics: dict[str, Any] = field(default_factory=dict) + visual: dict[str, Any] = field(default_factory=dict) + prototype_key: str = "mesh_name" + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> EntityGroupSpec: + prims = raw.get("source_prim_paths", raw.get("prim_paths", ())) + if isinstance(prims, str): + prims = (prims,) + if not prims: + raise ValueError(f"entity group {raw.get('id_prefix')!r}: source_prim_paths required") + tags = raw.get("tags", ()) + if isinstance(tags, str): + tags = (tags,) + mode = raw.get("mode", "per_prim") + if mode != "per_prim": + raise ValueError(f"entity group {raw.get('id_prefix')!r}: unsupported mode {mode!r}") + return cls( + id_prefix=str(raw["id_prefix"]), + source_prim_paths=tuple(str(pattern) for pattern in prims), + mode=mode, + remove_from_static=bool(raw.get("remove_from_static", True)), + spawn=raw.get("spawn", "initial"), + kind=raw.get("kind", "dynamic"), + mass=float(raw.get("mass", 1.0)), + tags=tuple(str(tag) for tag in tags), + physics=dict(raw.get("physics", {})), + visual=dict(raw.get("visual", {})), + prototype_key=str(raw.get("prototype_key", "mesh_name")), + ) + + def to_json_dict(self) -> dict[str, Any]: + raw = asdict(self) + raw["source_prim_paths"] = list(self.source_prim_paths) + raw["tags"] = list(self.tags) + return raw + + def matches(self, prim: ScenePrimMesh) -> bool: + prim_candidates = tuple( + candidate for candidate in (prim.visual_node_name, prim.prim_path) if candidate + ) + return any( + match_prim_pattern(candidate, pattern, include_sanitized=False) + for candidate in prim_candidates + for pattern in self.source_prim_paths + ) + + @dataclass(frozen=True) class SceneCookSidecar: """Authored policy loaded from ``.cook.json``. @@ -124,6 +190,7 @@ class SceneCookSidecar: path: Path | None = None collision: CollisionSpec = field(default_factory=CollisionSpec) interactables: tuple[InteractableSpec, ...] = () + entity_groups: tuple[EntityGroupSpec, ...] = () @classmethod def from_dict(cls, raw: dict[str, Any], *, path: Path | None = None) -> SceneCookSidecar: @@ -137,7 +204,15 @@ def from_dict(cls, raw: dict[str, Any], *, path: Path | None = None) -> SceneCoo interactables = tuple( InteractableSpec.from_dict(item) for item in raw.get("interactables", ()) ) - return cls(path=path, collision=collision, interactables=interactables) + entity_groups = tuple( + EntityGroupSpec.from_dict(item) for item in raw.get("entity_groups", ()) + ) + return cls( + path=path, + collision=collision, + interactables=interactables, + entity_groups=entity_groups, + ) @classmethod def from_json(cls, path: str | Path) -> SceneCookSidecar: @@ -164,6 +239,7 @@ def to_json_dict(self) -> dict[str, Any]: "path": str(self.path) if self.path else None, "collision": asdict(self.collision), "interactables": [item.to_json_dict() for item in self.interactables], + "entity_groups": [item.to_json_dict() for item in self.entity_groups], } @@ -180,10 +256,3 @@ def match_prim_pattern( if include_sanitized: candidates.append(sanitized) return any(fnmatch.fnmatchcase(candidate, pattern) for candidate in candidates) - - -__all__ = [ - "InteractableSpec", - "SceneCookSidecar", - "match_prim_pattern", -] diff --git a/dimos/experimental/pimsim/scene/source_asset.py b/dimos/experimental/pimsim/scene/source_asset.py new file mode 100644 index 0000000000..88324cc22f --- /dev/null +++ b/dimos/experimental/pimsim/scene/source_asset.py @@ -0,0 +1,328 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Normalize authored scene sources into concrete mesh assets for cooking.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass, field +import hashlib +from pathlib import Path +import shutil +import tempfile +from typing import Any + +from dimos.experimental.pimsim.scene.command import ( + blender_output_line_is_interesting, + run_logged_command, +) +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +DIRECT_SCENE_SUFFIXES = { + ".glb", + ".gltf", + ".obj", + ".ply", + ".stl", + ".usd", + ".usda", + ".usdc", + ".usdz", +} + +SOURCE_CACHE_DIR = Path.home() / ".cache" / "dimos" / "scene_sources" +_BLENDER_NORMALIZER_VERSION = "blend-evaluated-depsgraph-v1" + + +@dataclass(frozen=True) +class PreparedSceneSource: + """A source asset in a format downstream cookers can consume.""" + + original_path: Path + cook_path: Path + normalized: bool = False + normalizer: str | None = None + stats: dict[str, Any] = field(default_factory=dict) + + def to_json_dict(self) -> dict[str, Any]: + return { + "original_path": str(self.original_path), + "cook_path": str(self.cook_path), + "normalized": self.normalized, + "normalizer": self.normalizer, + "stats": self.stats, + } + + +Normalizer = Callable[[Path, Path, bool], PreparedSceneSource] + + +def prepare_scene_source( + source_path: str | Path, + *, + cache_root: str | Path | None = None, + rebake: bool = False, +) -> PreparedSceneSource: + """Return a concrete mesh source for the scene cooking pipeline. + + Most supported source formats already are mesh assets, so they pass through + unchanged. Authored project formats such as ``.blend`` are normalized into + GLB first, using the authoring tool to evaluate procedural data and + instances into concrete mesh nodes. + """ + source = Path(source_path).expanduser().resolve() + if not source.exists(): + raise FileNotFoundError(f"scene source not found: {source}") + if not source.is_file(): + raise ValueError(f"scene source must be a file: {source}") + + suffix = source.suffix.lower() + if suffix in DIRECT_SCENE_SUFFIXES: + return PreparedSceneSource(original_path=source, cook_path=source) + + normalizer = _NORMALIZERS.get(suffix) + if normalizer is None: + supported = ", ".join(sorted((*DIRECT_SCENE_SUFFIXES, *_NORMALIZERS))) + raise RuntimeError(f"unsupported scene source suffix {suffix!r}; supported: {supported}") + + cache_dir = Path(cache_root).expanduser().resolve() if cache_root else SOURCE_CACHE_DIR + cache_dir.mkdir(parents=True, exist_ok=True) + return normalizer(source, cache_dir, rebake) + + +def _normalize_blend_source( + source: Path, + cache_dir: Path, + rebake: bool, +) -> PreparedSceneSource: + cache_key = _source_cache_key(source, _BLENDER_NORMALIZER_VERSION) + target = cache_dir / f"{source.stem}-{cache_key}.glb" + if target.exists() and not rebake: + return PreparedSceneSource( + original_path=source, + cook_path=target, + normalized=True, + normalizer=_BLENDER_NORMALIZER_VERSION, + stats={"cache_hit": True}, + ) + + blender = shutil.which("blender") + if blender is None: + raise RuntimeError(".blend scene cooking requires Blender on PATH") + + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as script: + script.write(_BLENDER_SOURCE_NORMALIZER_SCRIPT) + script_path = Path(script.name) + try: + output = _run_command( + [ + blender, + "--background", + str(source), + "--python", + str(script_path), + "--", + str(target), + ], + "blender source normalization", + ) + finally: + script_path.unlink(missing_ok=True) + + if not target.exists(): + raise RuntimeError(f"Blender source normalization did not write {target}") + logger.info("normalized Blender scene source: %s -> %s", source, target) + return PreparedSceneSource( + original_path=source, + cook_path=target, + normalized=True, + normalizer=_BLENDER_NORMALIZER_VERSION, + stats=_parse_normalizer_stats(output), + ) + + +def _source_cache_key(source: Path, version: str) -> str: + h = hashlib.sha256() + h.update(version.encode()) + with source.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest()[:16] + + +def _run_command(args: list[str], label: str) -> str: + return run_logged_command( + args, + label, + tail_lines=40, + line_log_filter=blender_output_line_is_interesting, + ) + + +def _parse_normalizer_stats(output: str) -> dict[str, Any]: + stats: dict[str, Any] = {"cache_hit": False} + for line in output.splitlines(): + if not line.startswith("DIMOS_BLEND_NORMALIZER "): + continue + for item in line.removeprefix("DIMOS_BLEND_NORMALIZER ").split(): + key, sep, value = item.partition("=") + if not sep: + continue + try: + stats[key] = int(value) + except ValueError: + stats[key] = value + return stats + + +_BLENDER_SOURCE_NORMALIZER_SCRIPT = r""" +import pathlib +import re +import sys + +import bpy + +target = pathlib.Path(sys.argv[-1]) +target.parent.mkdir(parents=True, exist_ok=True) + + +def log(message): + print(f"DIMOS_BLEND_SOURCE {message}", flush=True) + + +log(f"start target={target}") +depsgraph = bpy.context.evaluated_depsgraph_get() +log("depsgraph ready") +collection = bpy.data.collections.new("DIMOS_Normalized_Source") +bpy.context.scene.collection.children.link(collection) + +name_counts = {} +mesh_cache = {} +realized = [] +skipped_empty = 0 +skipped_non_mesh = 0 +instances = 0 +base_objects = 0 + + +def safe_name(raw): + cleaned = re.sub(r"[^0-9A-Za-z_.-]+", "_", raw).strip("_") + return cleaned or "mesh" + + +def unique_name(raw): + base = safe_name(raw) + count = name_counts.get(base, 0) + name_counts[base] = count + 1 + if count == 0: + return base + return f"{base}.{count:04d}" + + +def object_key(obj): + original = getattr(obj, "original", None) + if original is not None: + obj = original + return str(obj.as_pointer()) + + +def mesh_for_source(obj): + key = object_key(obj) + cached = mesh_cache.get(key) + if cached is not None: + return cached + + evaluated = obj.evaluated_get(depsgraph) + try: + temp = evaluated.to_mesh(preserve_all_data_layers=True, depsgraph=depsgraph) + except TypeError: + temp = evaluated.to_mesh() + + if temp is None or len(temp.vertices) == 0 or len(temp.polygons) == 0: + if temp is not None: + evaluated.to_mesh_clear() + mesh_cache[key] = None + return None + + mesh = temp.copy() + mesh.name = f"{safe_name(obj.name)}_Mesh" + evaluated.to_mesh_clear() + mesh_cache[key] = mesh + return mesh + + +for index, inst in enumerate(depsgraph.object_instances): + source_obj = inst.instance_object if inst.is_instance and inst.instance_object else inst.object + if source_obj is None or source_obj.type != "MESH": + skipped_non_mesh += 1 + continue + + mesh = mesh_for_source(source_obj) + if mesh is None: + skipped_empty += 1 + continue + + parent_name = inst.parent.name if inst.parent is not None else None + if inst.is_instance: + instances += 1 + raw_name = f"{parent_name or inst.object.name}__{source_obj.name}" + else: + base_objects += 1 + raw_name = source_obj.name + + obj = bpy.data.objects.new(unique_name(raw_name), mesh) + obj.matrix_world = inst.matrix_world.copy() + collection.objects.link(obj) + realized.append(obj) + if len(realized) % 100 == 0: + log(f"realized progress objects={len(realized)} depsgraph_index={index}") + +if not realized: + raise RuntimeError("Blender scene normalization produced no mesh objects") + +bpy.ops.object.select_all(action="DESELECT") +for obj in realized: + obj.select_set(True) +bpy.context.view_layer.objects.active = realized[0] + +log(f"export start realized_objects={len(realized)} unique_meshes={sum(1 for mesh in mesh_cache.values() if mesh is not None)}") +bpy.ops.export_scene.gltf( + filepath=str(target), + export_format="GLB", + export_yup=True, + use_selection=True, + export_cameras=False, + export_lights=False, + export_apply=False, +) +log("export done") + +print( + "DIMOS_BLEND_NORMALIZER " + f"base_objects={base_objects} " + f"instances={instances} " + f"realized_objects={len(realized)} " + f"unique_meshes={sum(1 for mesh in mesh_cache.values() if mesh is not None)} " + f"skipped_empty={skipped_empty} " + f"skipped_non_mesh={skipped_non_mesh}" +) +""" + + +_NORMALIZERS: dict[str, Normalizer] = { + ".blend": _normalize_blend_source, +} diff --git a/dimos/experimental/pimsim/scene/test_source_asset.py b/dimos/experimental/pimsim/scene/test_source_asset.py new file mode 100644 index 0000000000..6087e9d7d0 --- /dev/null +++ b/dimos/experimental/pimsim/scene/test_source_asset.py @@ -0,0 +1,109 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from dimos.experimental.pimsim.scene import source_asset + + +def test_prepare_scene_source_passes_through_direct_mesh_source(tmp_path: Path) -> None: + source = tmp_path / "scene.glb" + source.write_bytes(b"glb") + + prepared = source_asset.prepare_scene_source(source) + + assert prepared.original_path == source + assert prepared.cook_path == source + assert not prepared.normalized + assert prepared.normalizer is None + + +def test_prepare_scene_source_normalizes_blend_with_blender( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + source = tmp_path / "market.blend" + source.write_bytes(b"blend") + cache_root = tmp_path / "cache" + calls: list[list[str]] = [] + + def fake_run_command(args: list[str], label: str) -> str: + calls.append(args) + assert label == "blender source normalization" + Path(args[-1]).write_bytes(b"glb") + return ( + "DIMOS_BLEND_NORMALIZER base_objects=2 instances=10 " + "realized_objects=12 unique_meshes=4 skipped_empty=1" + ) + + monkeypatch.setattr(source_asset.shutil, "which", lambda command: f"/usr/bin/{command}") + monkeypatch.setattr(source_asset, "_source_cache_key", lambda path, version: "abc123") + monkeypatch.setattr(source_asset, "_run_command", fake_run_command) + + prepared = source_asset.prepare_scene_source(source, cache_root=cache_root) + + assert prepared.original_path == source + assert prepared.cook_path == cache_root / "market-abc123.glb" + assert prepared.cook_path.read_bytes() == b"glb" + assert prepared.normalized + assert prepared.normalizer == "blend-evaluated-depsgraph-v1" + assert prepared.stats["instances"] == 10 + assert prepared.stats["unique_meshes"] == 4 + assert calls == [ + [ + "/usr/bin/blender", + "--background", + str(source), + "--python", + calls[0][4], + "--", + str(cache_root / "market-abc123.glb"), + ] + ] + + +def test_prepare_scene_source_uses_cached_blend_normalization( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + source = tmp_path / "market.blend" + source.write_bytes(b"blend") + cache_root = tmp_path / "cache" + cache_root.mkdir() + normalized = cache_root / "market-abc123.glb" + normalized.write_bytes(b"cached") + + monkeypatch.setattr(source_asset, "_source_cache_key", lambda path, version: "abc123") + monkeypatch.setattr( + source_asset, + "_run_command", + lambda args, label: pytest.fail("cache hit should not invoke Blender"), + ) + + prepared = source_asset.prepare_scene_source(source, cache_root=cache_root) + + assert prepared.cook_path == normalized + assert prepared.stats == {"cache_hit": True} + + +def test_prepare_scene_source_rejects_unsupported_suffix(tmp_path: Path) -> None: + source = tmp_path / "scene.fbx" + source.write_bytes(b"fbx") + + with pytest.raises(RuntimeError, match="unsupported scene source suffix"): + source_asset.prepare_scene_source(source) diff --git a/dimos/experimental/pimsim/scene/test_spec.py b/dimos/experimental/pimsim/scene/test_spec.py index 24e7f67011..560715b778 100644 --- a/dimos/experimental/pimsim/scene/test_spec.py +++ b/dimos/experimental/pimsim/scene/test_spec.py @@ -42,6 +42,7 @@ def _metadata(tmp_path: Path) -> dict[str, object]: "browser_collision": str(tmp_path / "collision.glb"), "objects": str(tmp_path / "objects.json"), "mujoco_scene": str(tmp_path / "wrapper.xml"), + "mujoco_binary": str(tmp_path / "wrapper.mjb"), }, "stats": {}, } @@ -109,6 +110,7 @@ def test_load_scene_package_accepts_expected_artifact_frames(tmp_path: Path) -> assert package.browser_collision_path == tmp_path / "collision.glb" assert package.objects_path == tmp_path / "objects.json" assert package.mujoco_scene_path == tmp_path / "wrapper.xml" + assert package.mujoco_binary_path == tmp_path / "wrapper.mjb" def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> None: @@ -120,6 +122,7 @@ def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> N browser_collision_path=tmp_path / "browser" / "collision.glb", objects_path=tmp_path / "browser" / "objects.json", mujoco_scene_path=tmp_path / "mujoco" / "abc123" / "wrapper.xml", + mujoco_binary_path=tmp_path / "mujoco" / "abc123" / "wrapper.mjb", entities=[ { "id": "chair_001", @@ -136,12 +139,14 @@ def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> N assert raw["artifacts"]["browser_collision"] == "browser/collision.glb" assert raw["artifacts"]["objects"] == "browser/objects.json" assert raw["artifacts"]["mujoco_scene"] == "mujoco/abc123/wrapper.xml" + assert raw["artifacts"]["mujoco_binary"] == "mujoco/abc123/wrapper.mjb" assert raw["entities"][0]["visual_path"] == "entities/chair_001/visual.glb" loaded = load_scene_package(metadata_path) assert loaded.package_dir == tmp_path assert loaded.visual_path == tmp_path / "browser" / "visual.glb" assert loaded.mujoco_scene_path == tmp_path / "mujoco" / "abc123" / "wrapper.xml" + assert loaded.mujoco_binary_path == tmp_path / "mujoco" / "abc123" / "wrapper.mjb" assert loaded.entities[0]["visual_path"] == str( tmp_path / "entities" / "chair_001" / "visual.glb" ) @@ -319,6 +324,126 @@ def test_synthetic_entity_uses_pose_and_extents(tmp_path: Path) -> None: assert entity.descriptor["mesh_ref"] == "" +def test_entity_group_expands_per_prim_with_shared_prototypes( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + def fake_load_scene_prims( + path: str | Path, + alignment: SceneMeshAlignment | None = None, + ) -> list[ScenePrimMesh]: + del path, alignment + triangles = np.array([[0, 1, 2], [1, 2, 3]], dtype=np.int32) + return [ + ScenePrimMesh( + name="cereal_1", + prim_path="Grocery_Scatter_Left_Aligned__2_CerealBlue.0001_2_CerealBlue_Mesh", + vertices=np.array( + [ + [0.0, 0.0, 0.0], + [0.2, 0.0, 0.0], + [0.0, 0.1, 0.3], + [0.2, 0.1, 0.3], + ], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="cereal_2", + prim_path="Grocery_Scatter_Right_Aligned__2_CerealBlue.0002_2_CerealBlue_Mesh", + vertices=np.array( + [ + [1.0, 0.0, 0.0], + [1.0, 0.2, 0.0], + [0.9, 0.0, 0.3], + [0.9, 0.2, 0.3], + ], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="apple", + prim_path="Produce_Scatter__4_RedApple.0001_4_RedApple_Mesh", + vertices=np.array( + [ + [2.0, 0.0, 0.0], + [2.1, 0.0, 0.0], + [2.0, 0.1, 0.1], + [2.1, 0.1, 0.1], + ], + dtype=np.float32, + ), + triangles=triangles, + ), + ScenePrimMesh( + name="cereal_no_instance_suffix", + prim_path="Grocery_Scatter_Left_Aligned__2_CerealBlue_2_CerealBlue_Mesh", + vertices=np.array( + [ + [3.0, 0.0, 0.0], + [3.2, 0.0, 0.0], + [3.0, 0.1, 0.3], + [3.2, 0.1, 0.3], + ], + dtype=np.float32, + ), + triangles=triangles, + ), + ] + + monkeypatch.setattr(plan_module, "load_scene_prims", fake_load_scene_prims) + sidecar = SceneCookSidecar.from_dict( + { + "entity_groups": [ + { + "id_prefix": "product", + "source_prim_paths": ["Grocery_Scatter_*", "Produce_Scatter__*"], + "kind": "dynamic", + "mass": 0.05, + "physics": {"shape": "mesh", "friction": [0.6, 0.02, 0.001]}, + "tags": ["shelf_product"], + } + ] + } + ) + + plan = plan_module.build_scene_cook_plan( + tmp_path / "supermarket.glb", + sidecar=sidecar, + alignment=SceneMeshAlignment(), + output_dir=tmp_path, + ) + + assert len(plan.entities) == 4 + assert len(plan.prototypes) == 3 + assert {prototype.id for prototype in plan.prototypes} == { + "product_2_CerealBlue", + "product_4_RedApple", + "product_2_CerealBlue_2_CerealBlue", + } + entity_prototype_ids = [entity.prototype_id for entity in plan.entities] + assert entity_prototype_ids.count("product_2_CerealBlue") == 2 + assert entity_prototype_ids.count("product_4_RedApple") == 1 + assert entity_prototype_ids.count("product_2_CerealBlue_2_CerealBlue") == 1 + assert plan.entities[0].descriptor["shape_hint"] == "mesh" + assert plan.entities[0].descriptor["mesh_ref"] == "" + assert plan.entities[0].descriptor["prototype_id"] == "product_2_CerealBlue" + assert plan.entities[0].initial_quat[0] != 0.0 + assert ( + plan.collision_spec.resolve( + "Grocery_Scatter_Left_Aligned__2_CerealBlue.0001_2_CerealBlue_Mesh" + )["type"] + == "skip" + ) + assert plan.collision_spec.resolve("Wall")["type"] == "auto" + + def test_interactable_requires_prims_or_pose() -> None: with pytest.raises(ValueError, match="source_prim_paths.*or pose"): SceneCookSidecar.from_dict({"interactables": [{"id": "ghost"}]}) + + +def test_entity_group_requires_source_prim_paths() -> None: + with pytest.raises(ValueError, match="source_prim_paths required"): + SceneCookSidecar.from_dict({"entity_groups": [{"id_prefix": "product"}]}) diff --git a/dimos/experimental/pimsim/scene/test_visual_glb.py b/dimos/experimental/pimsim/scene/test_visual_glb.py new file mode 100644 index 0000000000..aa786d4116 --- /dev/null +++ b/dimos/experimental/pimsim/scene/test_visual_glb.py @@ -0,0 +1,135 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from io import BytesIO +import json +from pathlib import Path +import struct + +from PIL import Image + +from dimos.experimental.pimsim.scene.visual_glb import ( + _buffer_view_bytes, + _demote_required_extensions, + _normalize_embedded_textures, + _read_glb, +) + + +def test_normalize_embedded_textures_preserves_glb_and_rewrites_png8( + tmp_path: Path, +) -> None: + geometry_payload = b"meshbytes" + texture_payload = _png16_texture() + path = tmp_path / "textured.glb" + _write_test_glb(path, geometry_payload, texture_payload) + + count = _normalize_embedded_textures(path) + + assert count == 1 + gltf, bin_chunk = _read_glb(path) + assert gltf["images"][0]["mimeType"] == "image/png" + assert _buffer_view_bytes(bin_chunk, gltf["bufferViews"][0]) == geometry_payload + + normalized_texture = _buffer_view_bytes(bin_chunk, gltf["bufferViews"][1]) + with Image.open(BytesIO(normalized_texture)) as image: + assert image.format == "PNG" + assert image.mode == "RGB" + + +def test_demote_required_extensions_keeps_extension_optional(tmp_path: Path) -> None: + path = tmp_path / "extension.glb" + _write_test_glb( + path, + geometry_payload=b"meshbytes", + texture_payload=_png16_texture(), + required_extensions=["KHR_texture_transform", "EXT_texture_webp"], + used_extensions=["KHR_texture_transform", "EXT_texture_webp"], + ) + + demoted = _demote_required_extensions(path, {"KHR_texture_transform"}) + + assert demoted == {"KHR_texture_transform"} + gltf, _ = _read_glb(path) + assert gltf["extensionsRequired"] == ["EXT_texture_webp"] + assert "KHR_texture_transform" in gltf["extensionsUsed"] + + +def _png16_texture() -> bytes: + image = Image.new("I;16", (2, 2)) + image.putdata([0, 256, 32768, 65535]) + out = BytesIO() + image.save(out, format="PNG") + return out.getvalue() + + +def _write_test_glb( + path: Path, + geometry_payload: bytes, + texture_payload: bytes, + required_extensions: list[str] | None = None, + used_extensions: list[str] | None = None, +) -> None: + bin_chunk = bytearray() + geometry_offset = len(bin_chunk) + bin_chunk.extend(geometry_payload) + _pad(bin_chunk) + texture_offset = len(bin_chunk) + bin_chunk.extend(texture_payload) + _pad(bin_chunk) + + gltf = { + "asset": {"version": "2.0"}, + "buffers": [{"byteLength": len(bin_chunk)}], + "bufferViews": [ + { + "buffer": 0, + "byteOffset": geometry_offset, + "byteLength": len(geometry_payload), + }, + { + "buffer": 0, + "byteOffset": texture_offset, + "byteLength": len(texture_payload), + }, + ], + "images": [{"bufferView": 1, "mimeType": "image/png"}], + } + if required_extensions is not None: + gltf["extensionsRequired"] = required_extensions + if used_extensions is not None: + gltf["extensionsUsed"] = used_extensions + json_chunk = json.dumps(gltf, separators=(",", ":")).encode("utf-8") + json_chunk = _padded(json_chunk, b" ") + bin_bytes = bytes(bin_chunk) + total_length = 12 + 8 + len(json_chunk) + 8 + len(bin_bytes) + with path.open("wb") as file: + file.write(struct.pack("<4sII", b"glTF", 2, total_length)) + file.write(struct.pack(" None: + while len(data) % 4: + data.append(0) + + +def _padded(data: bytes, pad: bytes) -> bytes: + while len(data) % 4: + data += pad + return data diff --git a/dimos/experimental/pimsim/scene/visual_blender.py b/dimos/experimental/pimsim/scene/visual_blender.py index 0615d271e1..e76e9d4a81 100644 --- a/dimos/experimental/pimsim/scene/visual_blender.py +++ b/dimos/experimental/pimsim/scene/visual_blender.py @@ -19,12 +19,14 @@ import json from pathlib import Path import shutil -import subprocess import tempfile +from dimos.experimental.pimsim.scene.command import ( + blender_output_line_is_interesting, + run_logged_command, +) from dimos.experimental.pimsim.scene.plan import SceneCookPlan -_COMMAND_TAIL_LINES = 30 _VISUAL_PLAN_VERSION = 2 _PLAN_VISUAL_SCRIPT = r""" @@ -48,9 +50,14 @@ def fail(message): raise RuntimeError(message) +def log(message): + print(f"DIMOS_VISUAL_PLAN {message}", flush=True) + + def import_source(): bpy.ops.object.select_all(action="SELECT") bpy.ops.object.delete() + log(f"import start source={source} suffix={suffix}") if suffix in {".usd", ".usda", ".usdc", ".usdz"}: bpy.ops.wm.usd_import(filepath=str(source)) elif suffix in {".gltf", ".glb"}: @@ -63,6 +70,11 @@ def import_source(): bpy.ops.wm.ply_import(filepath=str(source)) else: fail(f"unsupported visual source suffix: {suffix}") + log( + "import done " + f"objects={len(bpy.context.scene.objects)} " + f"meshes={len(bpy.data.meshes)} images={len(bpy.data.images)}" + ) def alignment_matrix(): @@ -141,6 +153,10 @@ def duplicate_for_entity(obj, center, suffix): def export_entity(entity, objects): visual_path = pathlib.Path(entity["visual_path"]) + log( + f"entity export start id={entity['id']} " + f"objects={len(objects)} visual_path={visual_path}" + ) visual_path.parent.mkdir(parents=True, exist_ok=True) center = Vector(tuple(float(v) for v in entity["center"])) duplicates = [duplicate_for_entity(obj, center, entity["safe_id"]) for obj in objects] @@ -159,9 +175,11 @@ def export_entity(entity, objects): finally: for dup in duplicates: bpy.data.objects.remove(dup, do_unlink=True) + log(f"entity export done id={entity['id']}") def export_static_visual(objects_to_remove): + log(f"static visual remove start objects={len(objects_to_remove)}") for obj in sorted(objects_to_remove, key=lambda item: item.name): if obj.name in bpy.data.objects: bpy.data.objects.remove(obj, do_unlink=True) @@ -169,6 +187,7 @@ def export_static_visual(objects_to_remove): if not remaining: fail("static visual would contain no mesh objects after entity removal") static_visual_path.parent.mkdir(parents=True, exist_ok=True) + log(f"static visual export start remaining_meshes={len(remaining)} path={static_visual_path}") bpy.ops.object.select_all(action="DESELECT") bpy.ops.export_scene.gltf( filepath=str(static_visual_path), @@ -176,12 +195,16 @@ def export_static_visual(objects_to_remove): export_yup=True, export_apply=True, ) + log("static visual export done") import_source() remove_from_static = set() report = [] -for entity in plan["entities"]: +entities = plan["entities"] +log(f"entity plan start count={len(entities)}") +for index, entity in enumerate(entities, start=1): + log(f"entity plan progress index={index}/{len(entities)} id={entity['id']}") objects = resolve_objects(entity) export_entity(entity, objects) if entity["remove_from_static"]: @@ -234,7 +257,7 @@ def cook_plan_visual_assets( script_file.write(_PLAN_VISUAL_SCRIPT) script_path = Path(script_file.name) try: - _run_command( + run_logged_command( [ blender, "--background", @@ -246,6 +269,7 @@ def cook_plan_visual_assets( str(plan_path), ], "blender visual plan cook", + line_log_filter=blender_output_line_is_interesting, ) finally: plan_path.unlink(missing_ok=True) @@ -283,20 +307,6 @@ def _blender_plan_json(plan: SceneCookPlan, static_visual_source: Path) -> dict[ } -def _run_command(args: list[str], label: str) -> str: - result = subprocess.run( - args, - check=False, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - output = result.stdout or "" - if result.returncode != 0: - raise RuntimeError(f"{label} failed with exit code {result.returncode}:\n{_tail(output)}") - return output - - def _manifest_matches(path: Path, expected: dict[str, object]) -> bool: if not path.exists(): return False @@ -304,10 +314,3 @@ def _manifest_matches(path: Path, expected: dict[str, object]) -> bool: return bool(json.loads(path.read_text()) == expected) except json.JSONDecodeError: return False - - -def _tail(output: str) -> str: - return "\n".join(output.splitlines()[-_COMMAND_TAIL_LINES:]) - - -__all__ = ["cook_plan_visual_assets"] diff --git a/dimos/experimental/pimsim/scene/visual_glb.py b/dimos/experimental/pimsim/scene/visual_glb.py index 4a2027d10c..ef6dacab4a 100644 --- a/dimos/experimental/pimsim/scene/visual_glb.py +++ b/dimos/experimental/pimsim/scene/visual_glb.py @@ -17,13 +17,20 @@ from __future__ import annotations from dataclasses import dataclass +from io import BytesIO import json from pathlib import Path import shutil -import subprocess +import struct import tempfile from typing import Any +from PIL import Image + +from dimos.experimental.pimsim.scene.command import ( + blender_output_line_is_interesting, + run_logged_command, +) from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset from dimos.simulation.scene_assets.spec import BrowserVisualSpec from dimos.utils.logging_config import setup_logger @@ -42,7 +49,16 @@ ".ply", } _GLTFPACK_INPUT_SUFFIXES = {".gltf", ".glb", ".obj"} -_COMMAND_TAIL_LINES = 30 +_GLB_MAGIC = b"glTF" +_GLB_VERSION = 2 +_GLB_HEADER_SIZE = 12 +_GLB_CHUNK_HEADER_SIZE = 8 +_GLB_JSON_CHUNK_TYPE = 0x4E4F534A +_GLB_BIN_CHUNK_TYPE = 0x004E4942 +_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" +_STANDARD_TEXTURE_MIME_TYPES = {"image/png", "image/jpeg"} +_STANDARD_TEXTURE_MODES = {"RGB", "RGBA"} +_DEMOTABLE_REQUIRED_EXTENSIONS = {"KHR_texture_transform"} _BLENDER_SCRIPT = r""" import pathlib @@ -56,9 +72,19 @@ max_texture_size = int(sys.argv[-1]) suffix = source.suffix.lower() + +def log(message): + print(f"DIMOS_VISUAL_COOK {message}", flush=True) + + +log( + f"start source={source} target={target} " + f"simplify_ratio={simplify_ratio} max_texture_size={max_texture_size}" +) bpy.ops.object.select_all(action="SELECT") bpy.ops.object.delete() +log(f"import start suffix={suffix}") if suffix in {".usd", ".usda", ".usdc", ".usdz"}: bpy.ops.wm.usd_import(filepath=str(source)) elif suffix in {".gltf", ".glb"}: @@ -71,12 +97,22 @@ bpy.ops.wm.ply_import(filepath=str(source)) else: raise RuntimeError(f"unsupported visual source suffix: {suffix}") +log( + "import done " + f"objects={len(bpy.context.scene.objects)} " + f"meshes={len(bpy.data.meshes)} images={len(bpy.data.images)}" +) +removed_non_mesh = 0 for obj in list(bpy.context.scene.objects): if obj.type != "MESH": bpy.data.objects.remove(obj, do_unlink=True) + removed_non_mesh += 1 +log(f"removed non_mesh_objects={removed_non_mesh}") if max_texture_size > 0: + resized = 0 + skipped = 0 for image in bpy.data.images: width, height = image.size largest = max(width, height) @@ -85,13 +121,19 @@ scale = max_texture_size / largest try: image.scale(max(1, int(width * scale)), max(1, int(height * scale))) + resized += 1 except RuntimeError: # Blender cannot scale some generated or missing images; keep those # untouched instead of aborting the entire scene cook. - pass + skipped += 1 + log(f"texture resize done resized={resized} skipped={skipped}") if 0.0 < simplify_ratio < 0.999: - for obj in list(bpy.context.scene.objects): + mesh_objects = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"] + log(f"decimate start mesh_objects={len(mesh_objects)}") + decimated = 0 + skipped = 0 + for index, obj in enumerate(mesh_objects, start=1): if obj.type != "MESH": continue bpy.ops.object.select_all(action="DESELECT") @@ -101,23 +143,32 @@ modifier.ratio = simplify_ratio try: bpy.ops.object.modifier_apply(modifier=modifier.name) + decimated += 1 except RuntimeError: obj.modifiers.remove(modifier) + skipped += 1 + if index % 25 == 0: + log(f"decimate progress processed={index}/{len(mesh_objects)}") + log(f"decimate done decimated={decimated} skipped={skipped}") mesh_objects = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"] if len(mesh_objects) > 1: + log(f"join start mesh_objects={len(mesh_objects)}") bpy.ops.object.select_all(action="DESELECT") for obj in mesh_objects: obj.select_set(True) bpy.context.view_layer.objects.active = mesh_objects[0] bpy.ops.object.join() + log(f"join done objects={len(bpy.context.scene.objects)}") +log("export start") bpy.ops.export_scene.gltf( filepath=str(target), export_format="GLB", export_yup=True, export_apply=True, ) +log("export done") """ @@ -162,6 +213,7 @@ def cook_browser_visual( temp_dir = Path(temp_dir_raw) temp_out = temp_dir / out_path.name tool, report = _cook_visual(source, temp_out, visual_spec) + _sanitize_browser_visual_output(temp_out, visual_spec) stats = inspect_scene_asset(temp_out).to_json_dict() _validate_output(source_stats, stats, visual_spec) if report is not None: @@ -218,7 +270,7 @@ def _export_with_blender( script.write(_BLENDER_SCRIPT) script_path = Path(script.name) try: - _run_command( + run_logged_command( [ blender, "--background", @@ -232,6 +284,7 @@ def _export_with_blender( str(max_texture_size or 0), ], "blender", + line_log_filter=blender_output_line_is_interesting, ) finally: script_path.unlink(missing_ok=True) @@ -254,6 +307,7 @@ def _export_with_gltfpack( report_path = target.with_suffix(".gltfpack.json") args = [ *command, + "-v", "-i", str(source_for_gltfpack), "-o", @@ -266,9 +320,15 @@ def _export_with_gltfpack( "-r", str(report_path), ] + if not spec.quantize: + args.append("-noq") + if spec.use_gpu_instancing: + args.append("-mi") if spec.texture_format == "webp": + _require_native_gltfpack_for_texture_compression(command, spec.texture_format) args.append("-tw") elif spec.texture_format == "ktx2": + _require_native_gltfpack_for_texture_compression(command, spec.texture_format) args.append("-tc") elif spec.texture_format is not None: raise ValueError(f"unknown browser texture format: {spec.texture_format}") @@ -277,13 +337,261 @@ def _export_with_gltfpack( raise ValueError("max_texture_size requires texture_format='webp' or 'ktx2'") args.extend(["-tl", str(spec.max_texture_size)]) - output = _run_command(args, "gltfpack") + try: + output = run_logged_command(args, "gltfpack") + except RuntimeError as exc: + if "unreachable" in str(exc): + raise RuntimeError( + "gltfpack crashed internally while optimizing the browser visual " + f"for {source_for_gltfpack}. This is a tool failure, not a scene " + "sidecar validation error. Try a native gltfpack build first; if " + "that still fails, partition the visual source or use " + "--visual-optimizer blender/copy for diagnosis." + ) from exc + raise if output and "Warning:" in output: - logger.warning("gltfpack output:\n%s", _tail(output)) + logger.warning("gltfpack output:\n%s", _tail(output, 30)) report = _read_json(report_path) return ("gltfpack", report) +def _sanitize_browser_visual_output(path: Path, spec: BrowserVisualSpec) -> None: + if path.suffix.lower() != ".glb": + return + + demoted_extensions = _demote_required_extensions( + path, + _DEMOTABLE_REQUIRED_EXTENSIONS, + ) + if demoted_extensions: + logger.info( + "demoted browser visual GLB extensions path=%s extensions=%s", + path, + sorted(demoted_extensions), + ) + + if spec.normalize_textures and spec.texture_format is None: + normalized_textures = _normalize_embedded_textures(path) + if normalized_textures: + logger.info( + "normalized embedded browser visual textures path=%s count=%d", + path, + normalized_textures, + ) + + +def _demote_required_extensions(path: Path, extensions: set[str]) -> set[str]: + gltf, bin_chunk = _read_glb(path) + required = gltf.get("extensionsRequired") + if not isinstance(required, list): + return set() + + demoted = {extension for extension in required if extension in extensions} + if not demoted: + return set() + + next_required = [extension for extension in required if extension not in demoted] + if next_required: + gltf["extensionsRequired"] = next_required + else: + gltf.pop("extensionsRequired", None) + used = gltf.get("extensionsUsed") + if isinstance(used, list): + merged = list(dict.fromkeys([*used, *sorted(demoted)])) + gltf["extensionsUsed"] = merged + else: + gltf["extensionsUsed"] = sorted(demoted) + _write_glb(path, gltf, bin_chunk, {}) + return demoted + + +def _normalize_embedded_textures(path: Path) -> int: + gltf, bin_chunk = _read_glb(path) + images = gltf.get("images") + buffer_views = gltf.get("bufferViews") + buffers = gltf.get("buffers") + if not isinstance(images, list) or not images: + return 0 + if not isinstance(buffer_views, list) or not isinstance(buffers, list): + return 0 + if len(buffers) != 1: + raise RuntimeError(f"cannot normalize textures in multi-buffer GLB: {path}") + + replacements: dict[int, bytes] = {} + for image_index, image in enumerate(images): + if not isinstance(image, dict): + continue + buffer_view_index = image.get("bufferView") + if not isinstance(buffer_view_index, int): + continue + if buffer_view_index < 0 or buffer_view_index >= len(buffer_views): + raise RuntimeError( + f"image {image_index} references missing bufferView {buffer_view_index}: {path}" + ) + view = buffer_views[buffer_view_index] + if not isinstance(view, dict): + continue + texture_bytes = _buffer_view_bytes(bin_chunk, view) + normalized = _normalized_texture_bytes( + texture_bytes, + mime_type=image.get("mimeType"), + ) + if normalized is None: + continue + replacements[buffer_view_index] = normalized + image["mimeType"] = "image/png" + image.pop("uri", None) + + if not replacements: + return 0 + + _write_glb(path, gltf, bin_chunk, replacements) + return len(replacements) + + +def _normalized_texture_bytes( + texture_bytes: bytes, + *, + mime_type: Any, +) -> bytes | None: + try: + with Image.open(BytesIO(texture_bytes)) as image: + image.load() + if _is_standard_embedded_texture(image, texture_bytes, mime_type): + return None + has_alpha = image.mode in {"RGBA", "LA"} or "transparency" in image.info + mode = "RGBA" if has_alpha else "RGB" + converted = image.convert(mode) + out = BytesIO() + converted.save(out, format="PNG", compress_level=1) + return out.getvalue() + except Exception as exc: + raise RuntimeError("failed to normalize embedded GLB texture to 8-bit PNG") from exc + + +def _is_standard_embedded_texture( + image: Image.Image, + texture_bytes: bytes, + mime_type: Any, +) -> bool: + if mime_type not in _STANDARD_TEXTURE_MIME_TYPES: + return False + if image.mode not in _STANDARD_TEXTURE_MODES: + return False + return not _is_high_bit_depth_png(texture_bytes) + + +def _is_high_bit_depth_png(texture_bytes: bytes) -> bool: + if not texture_bytes.startswith(_PNG_SIGNATURE) or len(texture_bytes) < 25: + return False + return texture_bytes[24] > 8 + + +def _read_glb(path: Path) -> tuple[dict[str, Any], bytes]: + data = path.read_bytes() + if len(data) < _GLB_HEADER_SIZE: + raise RuntimeError(f"invalid GLB header: {path}") + magic, version, declared_length = struct.unpack_from("<4sII", data, 0) + if magic != _GLB_MAGIC or version != _GLB_VERSION: + raise RuntimeError(f"expected GLB v2 file: {path}") + if declared_length != len(data): + raise RuntimeError( + f"GLB length mismatch for {path}: header={declared_length} actual={len(data)}" + ) + + offset = _GLB_HEADER_SIZE + json_bytes: bytes | None = None + bin_chunk: bytes | None = None + while offset < len(data): + if offset + _GLB_CHUNK_HEADER_SIZE > len(data): + raise RuntimeError(f"truncated GLB chunk header: {path}") + chunk_length, chunk_type = struct.unpack_from(" len(data): + raise RuntimeError(f"truncated GLB chunk payload: {path}") + chunk = data[offset:chunk_end] + if chunk_type == _GLB_JSON_CHUNK_TYPE: + json_bytes = chunk + elif chunk_type == _GLB_BIN_CHUNK_TYPE: + bin_chunk = chunk + offset = chunk_end + + if json_bytes is None or bin_chunk is None: + raise RuntimeError(f"GLB must contain JSON and BIN chunks: {path}") + gltf = json.loads(json_bytes.rstrip(b" \t\r\n\0").decode("utf-8")) + if not isinstance(gltf, dict): + raise RuntimeError(f"GLB JSON chunk is not an object: {path}") + return gltf, bin_chunk + + +def _buffer_view_bytes(bin_chunk: bytes, view: dict[str, Any]) -> bytes: + if int(view.get("buffer", 0)) != 0: + raise RuntimeError("embedded texture normalization only supports buffer 0") + byte_offset = int(view.get("byteOffset", 0)) + byte_length = int(view["byteLength"]) + return bin_chunk[byte_offset : byte_offset + byte_length] + + +def _write_glb( + path: Path, + gltf: dict[str, Any], + bin_chunk: bytes, + buffer_view_replacements: dict[int, bytes], +) -> None: + buffer_views = gltf.get("bufferViews") + buffers = gltf.get("buffers") + if not isinstance(buffer_views, list) or not isinstance(buffers, list) or len(buffers) != 1: + raise RuntimeError(f"cannot rewrite GLB buffer views: {path}") + + new_bin = bytearray() + for index, view in enumerate(buffer_views): + if not isinstance(view, dict): + raise RuntimeError(f"invalid GLB bufferView at index {index}: {path}") + payload = buffer_view_replacements.get(index) + if payload is None: + payload = _buffer_view_bytes(bin_chunk, view) + _pad_bytearray(new_bin, alignment=4, pad=0) + view["byteOffset"] = len(new_bin) + view["byteLength"] = len(payload) + new_bin.extend(payload) + _pad_bytearray(new_bin, alignment=4, pad=0) + buffers[0]["byteLength"] = len(new_bin) + + json_chunk = json.dumps(gltf, separators=(",", ":"), sort_keys=True).encode("utf-8") + json_chunk = _padded_bytes(json_chunk, alignment=4, pad=b" ") + bin_bytes = bytes(new_bin) + total_length = ( + _GLB_HEADER_SIZE + + _GLB_CHUNK_HEADER_SIZE + + len(json_chunk) + + _GLB_CHUNK_HEADER_SIZE + + len(bin_bytes) + ) + with tempfile.NamedTemporaryFile("wb", suffix=".glb", delete=False) as temp: + temp_path = Path(temp.name) + temp.write(struct.pack("<4sII", _GLB_MAGIC, _GLB_VERSION, total_length)) + temp.write(struct.pack(" None: + while len(data) % alignment: + data.append(pad) + + +def _padded_bytes(data: bytes, *, alignment: int, pad: bytes) -> bytes: + while len(data) % alignment: + data += pad + return data + + def _gltfpack_command() -> list[str]: gltfpack = shutil.which("gltfpack") if gltfpack is not None: @@ -293,22 +601,24 @@ def _gltfpack_command() -> list[str]: return [npx, "-y", "gltfpack"] raise RuntimeError( "browser visual optimization requires gltfpack. Install it with " - "`npm install -g gltfpack` or use --visual-optimizer blender/copy." + "a native meshoptimizer gltfpack binary on PATH, or use " + "--visual-optimizer blender/copy." ) -def _run_command(args: list[str], label: str) -> str: - result = subprocess.run( - args, - check=False, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, +def _require_native_gltfpack_for_texture_compression( + command: list[str], + texture_format: str, +) -> None: + executable = Path(command[0]).name + if executable != "npx": + return + raise RuntimeError( + f"gltfpack texture compression requested ({texture_format}), but the " + "available gltfpack is the Node/npx build. That build does not support " + "WebP/KTX texture compression. Install a native gltfpack binary from " + "meshoptimizer releases on PATH, or set --visual-texture-format none." ) - output = result.stdout or "" - if result.returncode != 0: - raise RuntimeError(f"{label} failed with exit code {result.returncode}:\n{_tail(output)}") - return output def _read_json(path: Path) -> dict[str, Any] | None: @@ -339,8 +649,8 @@ def _validate_output( ) -def _tail(output: str) -> str: - return "\n".join(output.splitlines()[-_COMMAND_TAIL_LINES:]) +def _tail(output: str, tail_lines: int) -> str: + return "\n".join(output.splitlines()[-tail_lines:]) def _budget_warnings(stats: dict[str, Any], spec: BrowserVisualSpec) -> list[str]: @@ -358,6 +668,3 @@ def _budget_warnings(stats: dict[str, Any], spec: BrowserVisualSpec) -> list[str if vertex_count > spec.max_vertices: warnings.append(f"{vertex_count} vertices exceeds target {spec.max_vertices}") return warnings - - -__all__ = ["BrowserVisualCookResult", "cook_browser_visual"] diff --git a/dimos/simulation/mujoco/collision_spec.py b/dimos/simulation/mujoco/collision_spec.py index a11427352b..1ba69abaf5 100644 --- a/dimos/simulation/mujoco/collision_spec.py +++ b/dimos/simulation/mujoco/collision_spec.py @@ -87,6 +87,9 @@ class CollisionSpec: Optional override keys: + - ``"visual"``: ``false`` suppresses non-colliding visual passthrough + when MuJoCo visual meshes are enabled. This is intended for prims + extracted into runtime entities so they are not drawn twice. - ``"friction"``: list ``[slide, spin, roll]``. - ``"min_thickness"``: for ``"box"`` overrides, minimum full thickness in metres along world Z. Useful for authored walkable @@ -97,6 +100,10 @@ class CollisionSpec: - ``"max_hulls"``: per-pattern CoACD cap. - ``"target_faces"``: per-pattern triangle target for ``mesh`` / ``decimate`` outputs, or a post-process cap for hull outputs. + - ``"split_components"``: ``true`` forces this source prim to be split + into disconnected components before collision fitting. Use this for + scene-graph nodes that group unrelated architectural pieces under one + mesh and otherwise produce oversized convex hulls. """ #: Fallback policy when no pattern matches. ``"auto"`` runs the full @@ -958,11 +965,3 @@ def _coacd_decompose( if len(v) >= 4 and len(t) >= 1: out.append((v, t)) return out - - -__all__ = [ - "CollisionSpec", - "GeomEmission", - "PrimDecision", - "decide_for_prim", -] diff --git a/dimos/simulation/mujoco/scene_mesh_to_mjcf.py b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py index 67af2a04c2..143fffed51 100644 --- a/dimos/simulation/mujoco/scene_mesh_to_mjcf.py +++ b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py @@ -173,7 +173,7 @@ # affect MJCF emission (new geom kinds, rewritten visual policy, etc.). # This is only a local cache salt; it is not a persisted file format # contract and old cache directories can safely stay on disk. -_CACHE_SCHEMA_VERSION = "scene-only-v10" +_CACHE_SCHEMA_VERSION = "scene-only-v11" @dataclass @@ -260,7 +260,25 @@ def bake_scene_mjcf( logger.info(f"bake_scene_mjcf: loading + aligning {scene_mesh_path}") prims = load_scene_prims(scene_mesh_path, alignment=align) - if spec.split_disconnected_components: + has_forced_splits = any( + bool(override.get("split_components")) for override in spec.prim_overrides.values() + ) + if spec.split_disconnected_components or has_forced_splits: + + def _split_override(prim: ScenePrimMesh) -> dict[str, object]: + return spec.resolve(prim.prim_path or prim.name) + + def _can_split_prim(prim: ScenePrimMesh) -> bool: + override = _split_override(prim) + if override.get("split_components"): + return True + return ( + spec.split_disconnected_components and override.get("type", spec.default) == "auto" + ) + + def _force_split_prim(prim: ScenePrimMesh) -> bool: + return bool(_split_override(prim).get("split_components")) + prims, split_stats = split_disconnected_scene_prims( prims, min_components=spec.split_min_components, @@ -269,9 +287,8 @@ def bake_scene_mjcf( axis_ratio=spec.split_axis_ratio, min_component_extent=spec.split_component_min_extent_m, min_component_faces=spec.split_component_min_faces, - can_split=lambda prim: ( - spec.resolve(prim.prim_path or prim.name).get("type", spec.default) == "auto" - ), + can_split=_can_split_prim, + force_split=_force_split_prim, ) if split_stats["split_prims"]: logger.info( @@ -378,7 +395,9 @@ def _file_signature(path: Path) -> str: h.update(_CACHE_SCHEMA_VERSION.encode()) h.update(_file_signature(scene_mesh_path).encode()) h.update(repr(sorted(asdict(alignment).items())).encode()) - h.update(json.dumps(asdict(spec), sort_keys=True).encode()) + # CollisionSpec prim_overrides are first-match-wins, so key order is + # semantically relevant. Preserve insertion order in the cache key. + h.update(json.dumps(asdict(spec), sort_keys=False).encode()) h.update(b"visual=" + (b"1" if include_visual_mesh else b"0")) return h.hexdigest()[:_CACHE_KEY_LEN] @@ -435,8 +454,10 @@ def _process_one_prim( counters = {"hulls": 0, "box_fallbacks": 0, "visuals": 0, "degenerate": 0} # Visual passthrough (always before the collision branch -- even - # ``skip`` prims can have a visual). - if include_visual_mesh: + # ``skip`` prims can have a visual). Sidecars can opt out for prims + # extracted into runtime entities so MuJoCo does not draw them twice. + visual_enabled = bool(spec.resolve(prim.prim_path or prim.name).get("visual", True)) + if include_visual_mesh and visual_enabled: vis_name = f"{prim.name}_visual" vis_path = cache_dir / f"{vis_name}.obj" try: @@ -520,7 +541,34 @@ def _bake_prims( n_degenerate = 0 reasons: dict[str, int] = {} - work_items = [(prim, cache_dir, spec, include_visual_mesh) for prim in prims] + work_items = [] + n_pre_skipped = 0 + pre_skip_reasons: dict[str, int] = {} + for prim in prims: + # When visual passthrough is disabled, explicit skip prims have no + # output at all. Do not pay process-pool overhead for dense clutter + # such as product scatter that the sidecar intentionally removes from + # the static scene. + override = spec.resolve(prim.prim_path or prim.name) + if not include_visual_mesh and override.get("type", spec.default) == "skip": + n_pre_skipped += 1 + pre_skip_reasons["sidecar:skip"] = pre_skip_reasons.get("sidecar:skip", 0) + 1 + continue + work_items.append((prim, cache_dir, spec, include_visual_mesh)) + + if not work_items: + return _BakeArtifacts( + asset_lines=[], + geom_lines=[], + n_primitive=0, + n_hulls_total=0, + n_box_fallbacks=0, + n_skipped=n_pre_skipped, + n_visuals=0, + n_degenerate_dropped=0, + decision_reasons=pre_skip_reasons, + ) + n_workers = max(1, (os.cpu_count() or 4) - 1) if _native_thread_count() > 1: n_workers = min(n_workers, 8) @@ -530,14 +578,18 @@ def _bake_prims( else: start_method = "fork" logger.info( - f"_bake_prims: fanning {len(prims)} prims across {n_workers} workers ({start_method})" + f"_bake_prims: fanning {len(work_items)} prims across {n_workers} workers " + f"({start_method}); pre-skipped {n_pre_skipped}" ) t0 = time.time() mp_ctx = multiprocessing.get_context(start_method) executor = ProcessPoolExecutor(max_workers=n_workers, mp_context=mp_ctx) - progress_every = 25 if len(prims) <= 500 else 250 + n_skipped += n_pre_skipped + reasons.update(pre_skip_reasons) + total_work = len(work_items) + progress_every = 25 if total_work <= 500 else 250 with executor as ex: futures = [ex.submit(_process_one_prim, item) for item in work_items] done = 0 @@ -555,12 +607,12 @@ def _bake_prims( n_visuals += counters["visuals"] n_degenerate += counters["degenerate"] done += 1 - if done % progress_every == 0 or done == len(prims): + if done % progress_every == 0 or done == total_work: elapsed = time.time() - t0 - eta = elapsed * (len(prims) - done) / max(done, 1) + eta = elapsed * (total_work - done) / max(done, 1) logger.info( - f" prim {done}/{len(prims)} " - f"({100 * done / len(prims):.0f}%) " + f" prim {done}/{total_work} " + f"({100 * done / total_work:.0f}%) " f"elapsed={elapsed:.0f}s eta={eta:.0f}s " f"hulls_so_far={n_hulls_total}" ) @@ -998,6 +1050,3 @@ def cli_main() -> None: if __name__ == "__main__": cli_main() - - -__all__ = ["bake_scene_mjcf", "load_or_bake"] diff --git a/dimos/simulation/scene_assets/mesh_scene.py b/dimos/simulation/scene_assets/mesh_scene.py index ba16f7f396..20790a2ccb 100644 --- a/dimos/simulation/scene_assets/mesh_scene.py +++ b/dimos/simulation/scene_assets/mesh_scene.py @@ -413,6 +413,7 @@ def split_disconnected_scene_prims( min_component_extent: float, min_component_faces: int, can_split: Callable[[ScenePrimMesh], bool] | None = None, + force_split: Callable[[ScenePrimMesh], bool] | None = None, ) -> tuple[list[ScenePrimMesh], dict[str, int]]: """Split scene-graph nodes that are disconnected prop clusters. @@ -437,38 +438,42 @@ def split_disconnected_scene_prims( if can_split is not None and not can_split(prim): result.append(prim) continue + forced = force_split(prim) if force_split is not None else False if len(prim.triangles) < max(min_component_faces * 2, 1): result.append(prim) continue prim_extent = np.ptp(prim.vertices, axis=0) - if float(prim_extent.max()) < prim_min_extent: - result.append(prim) - continue - positive_extent = prim_extent[prim_extent > 1e-6] - if ( - len(positive_extent) < 3 - or float(positive_extent.max() / positive_extent.min()) < axis_ratio - ): - result.append(prim) - continue + if not forced: + if float(prim_extent.max()) < prim_min_extent: + result.append(prim) + continue + positive_extent = prim_extent[prim_extent > 1e-6] + if ( + len(positive_extent) < 3 + or float(positive_extent.max() / positive_extent.min()) < axis_ratio + ): + result.append(prim) + continue mesh = trimesh.Trimesh(vertices=prim.vertices, faces=prim.triangles, process=False) parts = mesh.split(only_watertight=False) - if len(parts) < min_components: + required_components = 2 if forced else min_components + if len(parts) < required_components: result.append(prim) continue - component_extents = np.array( - [np.ptp(np.asarray(part.vertices), axis=0).max() for part in parts], - dtype=np.float64, - ) - median_component_extent = float(np.median(component_extents)) - if median_component_extent <= 0.0: - result.append(prim) - continue - if float(prim_extent.max()) / median_component_extent < extent_ratio: - result.append(prim) - continue + if not forced: + component_extents = np.array( + [np.ptp(np.asarray(part.vertices), axis=0).max() for part in parts], + dtype=np.float64, + ) + median_component_extent = float(np.median(component_extents)) + if median_component_extent <= 0.0: + result.append(prim) + continue + if float(prim_extent.max()) / median_component_extent < extent_ratio: + result.append(prim) + continue emitted = 0 dropped = 0 @@ -476,7 +481,8 @@ def split_disconnected_scene_prims( vertices = np.asarray(part.vertices, dtype=np.float32) triangles = np.asarray(part.faces, dtype=np.int32) component_extent = float(np.ptp(vertices, axis=0).max()) if len(vertices) else 0.0 - if len(triangles) < min_component_faces or component_extent < min_component_extent: + too_few_faces = not forced and len(triangles) < min_component_faces + if too_few_faces or component_extent < min_component_extent: dropped += 1 continue result.append( @@ -697,14 +703,3 @@ def load_scene_prims( if not prims: raise RuntimeError(f"no Mesh prims with triangles found in {path}") return prims - - -__all__ = [ - "SceneMeshAlignment", - "ScenePrimMesh", - "floor_z_under_origin", - "load_scene_mesh", - "load_scene_prims", - "make_raycasting_scene", - "split_disconnected_scene_prims", -] diff --git a/dimos/simulation/scene_assets/spec.py b/dimos/simulation/scene_assets/spec.py index 35b666541d..3ddf599764 100644 --- a/dimos/simulation/scene_assets/spec.py +++ b/dimos/simulation/scene_assets/spec.py @@ -30,6 +30,7 @@ "browser_visual": "source", "browser_collision": "source", "mujoco": "dimos_world", + "mujoco_binary": "dimos_world", } @@ -58,6 +59,9 @@ class BrowserVisualSpec: simplify_error: float = 0.02 texture_format: str | None = None max_texture_size: int | None = None + normalize_textures: bool = True + quantize: bool = False + use_gpu_instancing: bool = False max_meshes: int = 200 max_materials: int = 50 max_textures: int = 750 @@ -80,6 +84,7 @@ class MujocoSceneSpec: enabled: bool = True include_visual_mesh: bool = False + compile_binary: bool = False @dataclass(frozen=True) @@ -104,6 +109,7 @@ class ScenePackage: browser_collision_path: Path | None = None objects_path: Path | None = None mujoco_scene_path: Path | None = None + mujoco_binary_path: Path | None = None metadata_path: Path | None = None entities: list[dict[str, Any]] = field(default_factory=list) stats: dict[str, Any] = field(default_factory=dict) @@ -124,6 +130,10 @@ def to_json_dict(self) -> dict[str, Any]: ), "objects": _serialize_package_path(self.objects_path, package_dir), "mujoco_scene": _serialize_package_path(self.mujoco_scene_path, package_dir), + "mujoco_binary": _serialize_package_path( + self.mujoco_binary_path, + package_dir, + ), }, "entities": _serialize_entity_paths(self.entities, package_dir), "stats": self.stats, @@ -154,6 +164,7 @@ def load_scene_package(path: str | Path) -> ScenePackage: ), objects_path=_resolve_package_path(artifacts.get("objects"), package_dir), mujoco_scene_path=_resolve_package_path(artifacts.get("mujoco_scene"), package_dir), + mujoco_binary_path=_resolve_package_path(artifacts.get("mujoco_binary"), package_dir), metadata_path=metadata_path, entities=_resolve_entity_paths(raw.get("entities", []), package_dir), stats=raw.get("stats", {}), @@ -173,6 +184,7 @@ def _validate_artifact_frames(raw: dict[str, Any], metadata_path: Path) -> None: "browser_visual": "browser_visual", "browser_collision": "browser_collision", "mujoco_scene": "mujoco", + "mujoco_binary": "mujoco_binary", } for artifact_name, frame_name in required.items(): if artifacts.get(artifact_name) and frames.get(frame_name) != ARTIFACT_FRAMES[frame_name]: From fe0cc3cf4e8eccf665b252ddb0a6f3a75b361dc3 Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Fri, 26 Jun 2026 05:02:16 +0200 Subject: [PATCH 3/6] Fix scene cooking type checks --- dimos/experimental/pimsim/scene/browser_collision.py | 3 +-- dimos/experimental/pimsim/scene/command.py | 4 +++- dimos/experimental/pimsim/scene/cook.py | 8 ++++---- dimos/experimental/pimsim/scene/plan.py | 2 +- dimos/simulation/mujoco/scene_mesh_to_mjcf.py | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dimos/experimental/pimsim/scene/browser_collision.py b/dimos/experimental/pimsim/scene/browser_collision.py index 4c594752b6..909e4d4116 100644 --- a/dimos/experimental/pimsim/scene/browser_collision.py +++ b/dimos/experimental/pimsim/scene/browser_collision.py @@ -27,12 +27,11 @@ from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset from dimos.simulation.mujoco.collision_spec import CollisionSpec from dimos.simulation.scene_assets.mesh_scene import ( - SceneMeshAlignment, ScenePrimMesh, load_scene_prims, split_disconnected_scene_prims, ) -from dimos.simulation.scene_assets.spec import BrowserCollisionSpec +from dimos.simulation.scene_assets.spec import BrowserCollisionSpec, SceneMeshAlignment from dimos.utils.logging_config import setup_logger logger = setup_logger() diff --git a/dimos/experimental/pimsim/scene/command.py b/dimos/experimental/pimsim/scene/command.py index f84782a4f3..687d86841d 100644 --- a/dimos/experimental/pimsim/scene/command.py +++ b/dimos/experimental/pimsim/scene/command.py @@ -23,6 +23,7 @@ import shlex import subprocess import time +from typing import IO, cast from dimos.utils.logging_config import setup_logger @@ -62,9 +63,10 @@ def run_logged_command( bufsize=1, ) assert proc.stdout is not None + stdout = cast("IO[str]", proc.stdout) selector = selectors.DefaultSelector() - selector.register(proc.stdout, selectors.EVENT_READ) + selector.register(stdout, selectors.EVENT_READ) stdout_open = True try: diff --git a/dimos/experimental/pimsim/scene/cook.py b/dimos/experimental/pimsim/scene/cook.py index 7f656865fe..a70e2da549 100644 --- a/dimos/experimental/pimsim/scene/cook.py +++ b/dimos/experimental/pimsim/scene/cook.py @@ -41,12 +41,12 @@ from dimos.experimental.pimsim.scene.visual_glb import cook_browser_visual from dimos.simulation.mujoco.collision_spec import CollisionSpec from dimos.simulation.mujoco.scene_mesh_to_mjcf import load_or_bake -from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment from dimos.simulation.scene_assets.spec import ( BrowserCollisionSpec, BrowserVisualSpec, MujocoSceneSpec, SceneCookSpec, + SceneMeshAlignment, ScenePackage, ) from dimos.utils.data import get_data_dir @@ -295,9 +295,9 @@ def _cook_entity_prototype_collision( prototype_id = entity.get("prototype_id") if not isinstance(prototype_id, str): continue - hull_paths = hulls_by_prototype.get(prototype_id) - if hull_paths: - entity["collision_paths"] = [str(path) for path in hull_paths] + prototype_hull_paths = hulls_by_prototype.get(prototype_id) + if prototype_hull_paths: + entity["collision_paths"] = [str(path) for path in prototype_hull_paths] return counts diff --git a/dimos/experimental/pimsim/scene/plan.py b/dimos/experimental/pimsim/scene/plan.py index 06a08483bc..fe4fd81047 100644 --- a/dimos/experimental/pimsim/scene/plan.py +++ b/dimos/experimental/pimsim/scene/plan.py @@ -37,10 +37,10 @@ ) from dimos.simulation.mujoco.collision_spec import CollisionSpec from dimos.simulation.scene_assets.mesh_scene import ( - SceneMeshAlignment, ScenePrimMesh, load_scene_prims, ) +from dimos.simulation.scene_assets.spec import SceneMeshAlignment _HASH_SUFFIX_RE = re.compile(r"_[0-9a-fA-F]{6,}$") diff --git a/dimos/simulation/mujoco/scene_mesh_to_mjcf.py b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py index 143fffed51..35367a8b78 100644 --- a/dimos/simulation/mujoco/scene_mesh_to_mjcf.py +++ b/dimos/simulation/mujoco/scene_mesh_to_mjcf.py @@ -77,11 +77,11 @@ decide_for_prim, ) from dimos.simulation.scene_assets.mesh_scene import ( - SceneMeshAlignment, ScenePrimMesh, load_scene_prims, split_disconnected_scene_prims, ) +from dimos.simulation.scene_assets.spec import SceneMeshAlignment from dimos.utils.logging_config import setup_logger logger = setup_logger() From d2e339e9481ce78bbae8f75cfe572469a9b116c7 Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Fri, 26 Jun 2026 05:07:48 +0200 Subject: [PATCH 4/6] Fix scene cooking command typing --- dimos/experimental/pimsim/scene/command.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dimos/experimental/pimsim/scene/command.py b/dimos/experimental/pimsim/scene/command.py index 687d86841d..636bff6c5b 100644 --- a/dimos/experimental/pimsim/scene/command.py +++ b/dimos/experimental/pimsim/scene/command.py @@ -23,7 +23,6 @@ import shlex import subprocess import time -from typing import IO, cast from dimos.utils.logging_config import setup_logger @@ -63,7 +62,7 @@ def run_logged_command( bufsize=1, ) assert proc.stdout is not None - stdout = cast("IO[str]", proc.stdout) + stdout = proc.stdout selector = selectors.DefaultSelector() selector.register(stdout, selectors.EVENT_READ) @@ -72,10 +71,10 @@ def run_logged_command( try: while True: if stdout_open: - for key, _ in selector.select(timeout=1.0): - line = key.fileobj.readline() + for _, _ in selector.select(timeout=1.0): + line = stdout.readline() if line == "": - selector.unregister(key.fileobj) + selector.unregister(stdout) stdout_open = False break clean = line.rstrip() From a7704952fb092968f7296a325c6759cb55bd311b Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Fri, 26 Jun 2026 18:23:21 +0200 Subject: [PATCH 5/6] Clarify scene cooking workflow --- dimos/experimental/pimsim/scene/README.md | 113 +++++++++-- .../scene/demo_scene_cook_requirements.py | 186 ++++++++++++++++++ 2 files changed, 281 insertions(+), 18 deletions(-) create mode 100644 dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py diff --git a/dimos/experimental/pimsim/scene/README.md b/dimos/experimental/pimsim/scene/README.md index 74aa8a31f3..dd71d263ac 100644 --- a/dimos/experimental/pimsim/scene/README.md +++ b/dimos/experimental/pimsim/scene/README.md @@ -1,18 +1,75 @@ -# Scene Packages +# Scene Cooking -A scene package is the cooked, robot-agnostic form of a 3D environment. It -contains the visual mesh, collision geometry, per-object metadata, a scene-only -MuJoCo wrapper, and optional runtime entities. +Scene cooking is the offline half of the scene-package workflow. It turns a raw +environment asset plus an authored sidecar into a robot-agnostic package that +runtime modules can load. -The runtime rule is simple: +```text +source asset + .cook.json -> cooked scene package -> runtime attaches robot +``` + +This PR is part 2 of the split. Part 1 adds runtime scene loading and the +consumer contract in `dimos/simulation/scenes/README.md`. Keep that split clean: +runtime code loads `scene.meta.json`; cooking code is the only place that should +need Blender, CoACD, gltfpack, Open3D, USD tooling, or source-asset heuristics. + +The robot is not part of the scene package. The cooker prepares the world once, +offline. `MujocoSimModule` can then load the cooked world at runtime and attach +the robot MJCF into the same `MjSpec`. + +## Minimum Interface + +The cooker needs four things: ```text -source asset + sidecar -> cooked scene package -> simulator attaches any robot +1. source asset .glb, .blend, .usd, .obj, .ply, .stl, ... +2. sidecar .cook.json, optional but recommended +3. alignment scale, rotation, translation, up-axis +4. output directory data/scene_packages/ +``` + +Install the Python cooking dependencies with: + +```bash +uv sync --extra scene +``` + +For full DimOS development environments, `uv sync --extra all` also includes the +scene extra. External tools are still separate system binaries: + +```text +blender required for .blend normalization and authored visual extraction +gltfpack recommended for browser visual optimization +``` + +Before a long cook, run the preflight module. It normalizes authored formats +when needed, loads the sidecar, counts source prims, builds the entity plan, and +prints the exact cook command: + +```bash +python -m dimos.experimental.pimsim.scene.demo_scene_cook_requirements \ + data/dimos_office_mesh/dimos_office_mesh.glb \ + --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ + --output-dir data/scene_packages/dimos_office \ + --scale 2.0 \ + --no-y-up ``` -The robot is not part of the package. The cooker prepares the world once, -offline. `MujocoSimModule` loads the cooked world at runtime and attaches the -robot MJCF into the same `MjSpec`. +Then cook: + +```bash +python -m dimos.experimental.pimsim.scene.cook \ + data/dimos_office_mesh/dimos_office_mesh.glb \ + --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ + --output-dir data/scene_packages/dimos_office \ + --scale 2.0 \ + --no-y-up \ + --rebake +``` + +The output is a package with one manifest and backend-specific artifacts. The +runtime consumer should use `load_scene_package()` or `resolve_scene_package()`, +not the raw source asset. ## Package Layout @@ -202,7 +259,7 @@ python -m dimos.experimental.pimsim.scene.cook \ That writes `mujoco//wrapper.mjb`. It is still scene-only; it does not include a robot. -## Tool Diagnostics +## Tool Diagnostics And Rerun GLBs Scene cooking shells out to production asset tools. Their output is streamed through the DimOS logger with a command label, elapsed-time heartbeats, and a @@ -210,6 +267,17 @@ tail of recent output on failure. Long Blender imports should therefore show which stage is running: source normalization, authored visual extraction, browser visual import, decimation, join, or export. +Rerun can load normal textured GLBs, but it is strict about required glTF +extensions. The default visual cook is intentionally conservative so the output +is useful in Rerun and in generic browser viewers: + +- `gltfpack -noq` avoids `KHR_mesh_quantization` as a required extension. +- `KHR_texture_transform` is demoted from required to used. +- Embedded textures are normalized to ordinary 8-bit PNG payloads unless + explicitly disabled. +- WebP/KTX2 texture compression is opt-in because it requires viewer support + and a native meshoptimizer `gltfpack` build. + `gltfpack` is the preferred browser visual optimizer for GLB inputs, but install a native meshoptimizer `gltfpack` binary when using texture compression. The Node/npx package can optimize geometry, but it is built without WebP/KTX texture @@ -358,17 +426,19 @@ top_z = pos_z + size_z bottom_z = pos_z - size_z ``` -## Loading In MuJoCo +## Loading Cooked Output -The G1 GR00T WBC blueprint already uses the scene package path when `--scene` -is provided: +Runtime details live in `dimos/simulation/scenes/README.md`. For a quick G1 +GR00T WBC check with a cooked named scene: ```bash python -m dimos.robot.cli.dimos \ --simulation mujoco \ --scene office \ --viewer rerun \ - run unitree-g1-groot-wbc + --n-workers 12 \ + run unitree-g1-groot-wbc \ + -o mujocosimmodule.headless=true ``` `office` resolves through `dimos/simulation/scenes/catalog.py` to: @@ -390,20 +460,27 @@ At runtime, `MujocoSimModule`: The robot MJCF must stay robot-only: no office floor, no scene walls, no furniture, no manipulation rig. Scene geometry belongs in the cooked package. +Prefer `mujocosimmodule.headless=true` and inspect the robot, scene GLB, lidar, +costmaps, and planned path in the Rerun native viewer. `headless=false` opens +MuJoCo's native window and can run much slower, so use it only for direct MuJoCo +render/contact debugging. + For a large scene where XML compile is too slow, load a composed binary model instead of the scene package metadata: ```bash python -m dimos.robot.cli.dimos \ --simulation mujoco \ - --scene /home/pim/Desktop/dimos-scene-cooking-part2/data/scene_packages/supermarket_static_product_primitives_20dyn/mujoco/composed/unitree-g1-groot-wbc_spawn_9p2_11p8_yaw_m1p57.mjb \ - --n-workers 10 \ + --scene /path/to/package/mujoco/composed/unitree-g1-groot-wbc_spawn.mjb \ + --viewer rerun \ + --n-workers 12 \ run unitree-g1-groot-wbc \ - -o mujocosimmodule.headless=false + -o mujocosimmodule.headless=true ``` That path skips scene-package runtime composition. The `.mjb` already contains -the G1 robot and the supermarket scene at the authored test spawn. +the robot, cooked scene, spawn pose, and selected runtime entity set for that +build. ## Sidecar Schema diff --git a/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py b/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py new file mode 100644 index 0000000000..0acb836e59 --- /dev/null +++ b/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py @@ -0,0 +1,186 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Small reference preflight for scene cooking inputs. + +Run: + + python -m dimos.experimental.pimsim.scene.demo_scene_cook_requirements \ + data/dimos_office_mesh/dimos_office_mesh.glb \ + --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ + --output-dir data/scene_packages/dimos_office \ + --scale 2.0 \ + --no-y-up + +This module shows what a cooker needs before starting a potentially long bake: +one source asset, one optional sidecar, alignment, and an output directory. It +does not emit scene package artifacts. +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +import shlex +from typing import Any + +import numpy as np + +from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset +from dimos.experimental.pimsim.scene.plan import build_scene_cook_plan +from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar +from dimos.experimental.pimsim.scene.source_asset import prepare_scene_source +from dimos.simulation.scene_assets.mesh_scene import load_scene_prims +from dimos.simulation.scene_assets.spec import SceneMeshAlignment + + +def inspect_cook_requirements( + source: str | Path, + *, + output_dir: str | Path, + cook_spec: str | Path | None = None, + alignment: SceneMeshAlignment | None = None, + sample_prims: int = 8, + rebake: bool = False, +) -> dict[str, Any]: + """Return a JSON-serializable summary of the inputs needed to cook a scene.""" + align = alignment or SceneMeshAlignment() + source_path = Path(source).expanduser().resolve() + prepared = prepare_scene_source(source_path, rebake=rebake) + sidecar = ( + SceneCookSidecar.from_json(cook_spec) + if cook_spec is not None + else SceneCookSidecar.auto_discover(source_path) + ) + stats = inspect_scene_asset(prepared.cook_path) + prims = list(load_scene_prims(prepared.cook_path, alignment=align)) + plan = build_scene_cook_plan( + prepared.cook_path, + sidecar=sidecar, + alignment=align, + output_dir=output_dir, + ) + + return { + "source": str(source_path), + "prepared_source": prepared.to_json_dict(), + "output_dir": str(Path(output_dir).expanduser().resolve()), + "alignment": { + "scale": align.scale, + "translation": list(align.translation), + "rotation_zyx_deg": list(align.rotation_zyx_deg), + "y_up": align.y_up, + }, + "source_stats": stats.to_json_dict(), + "source_prim_count": len(prims), + "sample_prims": [_prim_summary(prim) for prim in prims[:sample_prims]], + "sidecar": { + "path": str(sidecar.path) if sidecar.path else None, + "collision_default": sidecar.collision.default, + "collision_override_count": len(sidecar.collision.prim_overrides), + "interactable_count": len(sidecar.interactables), + "entity_group_count": len(sidecar.entity_groups), + }, + "cook_plan": { + "entities": len(plan.entities), + "entity_prototypes": len(plan.prototypes), + "stats": plan.stats, + }, + "cook_command": _cook_command( + source_path, + output_dir=output_dir, + cook_spec=cook_spec, + alignment=align, + rebake=rebake, + ), + } + + +def _prim_summary(prim: Any) -> dict[str, Any]: + vertices = np.asarray(prim.vertices) + lo = np.min(vertices, axis=0) + hi = np.max(vertices, axis=0) + return { + "name": prim.visual_node_name or prim.prim_path or prim.name, + "min": lo.round(4).tolist(), + "max": hi.round(4).tolist(), + "extent": (hi - lo).round(4).tolist(), + } + + +def _cook_command( + source: Path, + *, + output_dir: str | Path, + cook_spec: str | Path | None, + alignment: SceneMeshAlignment, + rebake: bool, +) -> str: + command = [ + "python", + "-m", + "dimos.experimental.pimsim.scene.cook", + str(source), + "--output-dir", + str(output_dir), + "--scale", + str(alignment.scale), + ] + if cook_spec is not None: + command.extend(["--cook-spec", str(cook_spec)]) + if alignment.translation != (0.0, 0.0, 0.0): + command.extend(["--translation", *[str(value) for value in alignment.translation]]) + if alignment.rotation_zyx_deg != (0.0, 0.0, 0.0): + command.extend( + ["--rotation-zyx-deg", *[str(value) for value in alignment.rotation_zyx_deg]] + ) + if not alignment.y_up: + command.append("--no-y-up") + if rebake: + command.append("--rebake") + return " ".join(shlex.quote(part) for part in command) + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("source") + parser.add_argument("--cook-spec") + parser.add_argument("--output-dir", required=True) + parser.add_argument("--scale", type=float, default=1.0) + parser.add_argument("--translation", type=float, nargs=3, default=(0.0, 0.0, 0.0)) + parser.add_argument("--rotation-zyx-deg", type=float, nargs=3, default=(0.0, 0.0, 0.0)) + parser.add_argument("--no-y-up", action="store_true") + parser.add_argument("--sample-prims", type=int, default=8) + parser.add_argument("--rebake", action="store_true") + args = parser.parse_args() + + result = inspect_cook_requirements( + args.source, + output_dir=args.output_dir, + cook_spec=args.cook_spec, + alignment=SceneMeshAlignment( + scale=args.scale, + translation=tuple(args.translation), + rotation_zyx_deg=tuple(args.rotation_zyx_deg), + y_up=not args.no_y_up, + ), + sample_prims=args.sample_prims, + rebake=args.rebake, + ) + print(json.dumps(result, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() From db88a300e03e3def53f75f578dc682ba544f2c8a Mon Sep 17 00:00:00 2001 From: Nabla7 Date: Fri, 26 Jun 2026 18:45:53 +0200 Subject: [PATCH 6/6] Refine scene cooking visual targets --- dimos/experimental/pimsim/scene/README.md | 604 +++++------------- dimos/experimental/pimsim/scene/cook.py | 106 ++- .../scene/demo_scene_cook_requirements.py | 186 ------ dimos/experimental/pimsim/scene/test_spec.py | 38 +- dimos/experimental/pimsim/scene/visual_glb.py | 245 +------ dimos/simulation/scene_assets/glb.py | 255 ++++++++ dimos/simulation/scene_assets/spec.py | 82 ++- .../scene_assets/test_glb.py} | 22 +- 8 files changed, 622 insertions(+), 916 deletions(-) delete mode 100644 dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py create mode 100644 dimos/simulation/scene_assets/glb.py rename dimos/{experimental/pimsim/scene/test_visual_glb.py => simulation/scene_assets/test_glb.py} (88%) diff --git a/dimos/experimental/pimsim/scene/README.md b/dimos/experimental/pimsim/scene/README.md index dd71d263ac..71f39a118d 100644 --- a/dimos/experimental/pimsim/scene/README.md +++ b/dimos/experimental/pimsim/scene/README.md @@ -1,213 +1,46 @@ # Scene Cooking -Scene cooking is the offline half of the scene-package workflow. It turns a raw -environment asset plus an authored sidecar into a robot-agnostic package that -runtime modules can load. +Scene cooking turns an authored 3D environment into a DimOS scene package. A +package is the runtime contract for simulators and viewers: it contains a +manifest, backend-specific browser assets, MuJoCo collision assets, optional +runtime entities, and metadata about frames and alignment. + +The robot is not baked into a normal scene package. Runtime code loads the +package and attaches the robot it wants to simulate. ```text -source asset + .cook.json -> cooked scene package -> runtime attaches robot +source asset + cook sidecar + alignment -> scene package -> runtime attaches robot ``` -This PR is part 2 of the split. Part 1 adds runtime scene loading and the -consumer contract in `dimos/simulation/scenes/README.md`. Keep that split clean: -runtime code loads `scene.meta.json`; cooking code is the only place that should -need Blender, CoACD, gltfpack, Open3D, USD tooling, or source-asset heuristics. - -The robot is not part of the scene package. The cooker prepares the world once, -offline. `MujocoSimModule` can then load the cooked world at runtime and attach -the robot MJCF into the same `MjSpec`. - -## Minimum Interface +## Inputs -The cooker needs four things: +The cooker needs: ```text -1. source asset .glb, .blend, .usd, .obj, .ply, .stl, ... -2. sidecar .cook.json, optional but recommended -3. alignment scale, rotation, translation, up-axis -4. output directory data/scene_packages/ +source asset .glb, .gltf, .blend, .usd, .obj, .ply, .stl, ... +cook sidecar .cook.json, optional but strongly recommended +alignment scale, rotation, translation, and up-axis +output directory data/scene_packages/ +visual target rerun, babylon, or generic ``` -Install the Python cooking dependencies with: +Install the Python dependencies with: ```bash uv sync --extra scene ``` -For full DimOS development environments, `uv sync --extra all` also includes the -scene extra. External tools are still separate system binaries: +External tools are installed separately: ```text -blender required for .blend normalization and authored visual extraction +blender required for .blend files and entity visual extraction gltfpack recommended for browser visual optimization ``` -Before a long cook, run the preflight module. It normalizes authored formats -when needed, loads the sidecar, counts source prims, builds the entity plan, and -prints the exact cook command: - -```bash -python -m dimos.experimental.pimsim.scene.demo_scene_cook_requirements \ - data/dimos_office_mesh/dimos_office_mesh.glb \ - --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ - --output-dir data/scene_packages/dimos_office \ - --scale 2.0 \ - --no-y-up -``` - -Then cook: - -```bash -python -m dimos.experimental.pimsim.scene.cook \ - data/dimos_office_mesh/dimos_office_mesh.glb \ - --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ - --output-dir data/scene_packages/dimos_office \ - --scale 2.0 \ - --no-y-up \ - --rebake -``` - -The output is a package with one manifest and backend-specific artifacts. The -runtime consumer should use `load_scene_package()` or `resolve_scene_package()`, -not the raw source asset. - -## Package Layout - -```text -data/scene_packages// -├── scene.meta.json manifest: alignment, artifact paths, entities, stats -├── mujoco// -│ ├── wrapper.xml scene-only MJCF, no robot -│ ├── wrapper.mjb optional scene-only compiled MuJoCo model -│ └── *.obj static collision assets -├── mujoco/composed/ -│ └── _.mjb optional robot+scene compiled MuJoCo model -├── entities// -│ ├── visual.glb per-entity visual, in entity-local frame -│ └── mujoco_collision/ cook-time convex hulls -└── browser/ - ├── visual.glb - ├── collision.glb - └── objects.json semantic object table for browser/raycast users -``` - -Packages are content-hash keyed on the source mesh, alignment, sidecar, and cook -schema version. Changing one of those inputs creates a new cooked output. - -## MuJoCo Artifact Split - -There are two MuJoCo loading modes. Keep the distinction explicit when reviewing -or testing scene packages. - -### Scene Package XML - -This is the default path: - -```text -scene.meta.json -> mujoco//wrapper.xml + entities -> runtime attaches robot -> compile MjModel -``` - -Use it for normal scenes and for robot-agnostic packages. The package contains -the world only. At runtime `MujocoSimModule` loads `wrapper.xml`, attaches the -requested robot MJCF, adds dynamic entities from `scene.meta.json`, and compiles -one in-memory model. This keeps one cooked package usable by many robots and -spawn points. - -Tradeoff: MuJoCo XML compile cost is paid at startup. That is fine for office- -scale scenes. It is not fine for product-dense scenes with tens of thousands of -individual geoms. - -### Composed Binary MJB - -This is the fast-load path for huge scenes: - -```text -wrapper.xml + robot MJCF + selected spawn/entities -> composed/_.mjb -``` - -Use it when XML compile time dominates startup, such as the supermarket scene -with thousands of shelf products. The `.mjb` already contains the robot, scene, -spawn pose, static collision, and any runtime entities chosen for that build. -MuJoCo loads that binary model directly. - -Tradeoff: a composed `.mjb` is not robot-agnostic. Build one per robot model, -spawn/entity configuration, and meaningful scene revision. You also cannot edit -it with `MjSpec` after loading it; if the robot, spawn, or dynamic-entity set -changes, rebuild the binary from the XML package. - -The scene-only `wrapper.mjb` produced by `--compile-mujoco-binary` is useful for -profiling and cache validation, but it does not replace runtime robot attach. -For G1 WBC testing, use a composed robot+scene `.mjb`. - -## Spec And Backends - -`dimos/simulation/scene_assets/spec.py` is the shared scene-package contract. It -defines: - -- cook input specs such as `SceneCookSpec`, `BrowserVisualSpec`, - `BrowserCollisionSpec`, and `MujocoSceneSpec`; -- the runtime `ScenePackage` object; -- the on-disk `scene.meta.json` shape; -- `load_scene_package()`, which resolves package-relative artifact paths and - validates artifact frame metadata. - -This spec is intentionally general. A module or simulator that wants to consume -a scene package should not parse `scene.meta.json` by hand. It should load the -package once: - -```python -from dimos.simulation.scene_assets.spec import load_scene_package - -package = load_scene_package("data/scene_packages/dimos_office/scene.meta.json") -``` - -and then consume the artifact it understands: - -- browser/viewer systems use `package.visual_path`, `package.browser_collision_path`, - and `package.objects_path`; -- MuJoCo uses `package.mujoco_scene_path` plus `package.entities`; -- future simulators can add their own artifact fields/specs without changing the - source mesh loader or the scene-name catalog. - -`dimos/simulation/scene_assets/mesh_scene.py` is a different layer. It loads raw -source geometry and applies `SceneMeshAlignment`; it is used by cook-time tools, -not by normal runtime modules. Runtime modules should prefer the cooked package -contract from `spec.py`. - -`dimos/simulation/scenes/catalog.py` is only name/path resolution. It maps user -inputs like `--scene office` to a loaded `ScenePackage`. It does not define what -a package is, and it should not know how MuJoCo, browser viewers, or other -systems load their artifacts. - -## Workflow - -1. Inspect the source asset and identify the prims that need authored collision. -2. Write `.cook.json` next to the source mesh. -3. Cook the package. -4. Verify the generated `mujoco//wrapper.xml`. -5. Load it through a blueprint with `--scene`. - -The DimOS office scene is the reference example below. - -## Authored Blender Sources - -The cooker accepts `.blend` files as authored scene sources. A Blender file is -not a concrete mesh asset: it can contain view layers, disabled source -collections, Geometry Nodes, procedural instances, cameras, lights, text, and -other authoring data. Before the normal cook starts, DimOS runs Blender -headlessly and normalizes the evaluated dependency graph into a GLB: - -```text -scene.blend -> evaluated depsgraph GLB -> normal scene cook -``` - -The normalizer walks `depsgraph.object_instances`, so Geometry Nodes and -instanced collection content are realized as concrete mesh nodes instead of -being dropped by a plain Blender glTF export. Direct mesh formats (`.glb`, -`.gltf`, `.obj`, `.ply`, `.stl`, `.usd`, `.usda`, `.usdc`, `.usdz`) skip this -step. +## Inspect First -To inspect a `.blend` scene with the same geometry the cooker will see: +Do not write collision policy blind. Inspect the source in the same frame the +cooker will use: ```bash python - <<'PY' @@ -218,157 +51,35 @@ import numpy as np from dimos.experimental.pimsim.scene.source_asset import prepare_scene_source from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment, load_scene_prims -prepared = prepare_scene_source(Path("data/my_scene/source.blend")) -print(prepared.to_json_dict()) - -for prim in load_scene_prims(prepared.cook_path, alignment=SceneMeshAlignment()): - name = prim.visual_node_name or prim.prim_path or prim.name - if "Floor" not in name: - continue - lo = np.min(prim.vertices, axis=0) - hi = np.max(prim.vertices, axis=0) - print(name, lo.round(4).tolist(), hi.round(4).tolist()) -PY -``` - -Then cook the `.blend` directly: - -```bash -python -m dimos.experimental.pimsim.scene.cook \ - data/my_scene/source.blend \ - --cook-spec data/my_scene/source.cook.json \ - --output-dir data/scene_packages/my_scene \ - --rebake -``` - -The sidecar still targets normalized mesh prim names. Inspect first, then write -the sidecar against the names printed from `load_scene_prims(prepared.cook_path)`. - -To also emit a scene-only MuJoCo binary for profiling or cache validation, add -`--compile-mujoco-binary`: - -```bash -python -m dimos.experimental.pimsim.scene.cook \ - data/my_scene/source.blend \ - --cook-spec data/my_scene/source.cook.json \ - --output-dir data/scene_packages/my_scene \ - --compile-mujoco-binary \ - --rebake -``` - -That writes `mujoco//wrapper.mjb`. It is still scene-only; it does not -include a robot. - -## Tool Diagnostics And Rerun GLBs - -Scene cooking shells out to production asset tools. Their output is streamed -through the DimOS logger with a command label, elapsed-time heartbeats, and a -tail of recent output on failure. Long Blender imports should therefore show -which stage is running: source normalization, authored visual extraction, -browser visual import, decimation, join, or export. - -Rerun can load normal textured GLBs, but it is strict about required glTF -extensions. The default visual cook is intentionally conservative so the output -is useful in Rerun and in generic browser viewers: - -- `gltfpack -noq` avoids `KHR_mesh_quantization` as a required extension. -- `KHR_texture_transform` is demoted from required to used. -- Embedded textures are normalized to ordinary 8-bit PNG payloads unless - explicitly disabled. -- WebP/KTX2 texture compression is opt-in because it requires viewer support - and a native meshoptimizer `gltfpack` build. - -`gltfpack` is the preferred browser visual optimizer for GLB inputs, but install -a native meshoptimizer `gltfpack` binary when using texture compression. The -Node/npx package can optimize geometry, but it is built without WebP/KTX texture -compression support. If a cook fails immediately after requesting -`--visual-texture-format webp` or `ktx2`, put a native `gltfpack` on `PATH` or -use `--visual-texture-format none`. - -The default visual cook is conservative: DimOS passes `-noq` to gltfpack so the -output GLB has no required quantization extension. This produces larger files, -but keeps the artifact loadable by generic GLB consumers such as Rerun. Use -`--visual-quantize` only when the target viewer supports `KHR_mesh_quantization`. -With uncompressed textures, DimOS also rewrites embedded images to ordinary -8-bit PNG payloads after gltfpack. This keeps the source materials textured -while avoiding renderer-specific failures from high-bit-depth PNGs or required -texture-compression extensions. Use `--no-visual-texture-normalization` only -when a downstream viewer needs the original embedded texture encoding. -The final GLB sanitize pass also demotes `KHR_texture_transform` from -`extensionsRequired` to `extensionsUsed`, since Rerun rejects it as required but -can still load the asset when the transform is treated as optional. - -If `gltfpack` exits with only `unreachable`, that is an internal optimizer -crash. Re-run with the native binary first. If it still fails, diagnose with -`--visual-optimizer blender` or `copy`, or split the visual asset into smaller -source chunks before optimizing. - -For product-dense authored scenes, glTF GPU instancing can keep browser visuals -small. The supermarket source has tens of thousands of repeated product -instances; without instancing, an optimizer can turn compact source meshes into -millions of duplicated output triangles. Keep the default off when the package -must render in generic Rerun viewers, and enable it explicitly with -`--visual-gpu-instancing` only for viewers that support -`EXT_mesh_gpu_instancing`. -If preserving the authored node instancing matters more than reducing draw -calls, use `--visual-optimizer copy` after the filtered static visual source has -been generated. That keeps repeated assets as normal glTF nodes instead of -flattening them into a giant mesh. - -## Office Example - -The office source mesh lives in data: - -```text -data/dimos_office_mesh/dimos_office_mesh.glb -``` - -That file is a visual asset, not a physics contract. Before writing the sidecar, -inspect the scene graph and bounds of the source prims after applying the same -alignment used by the cooker: - -```bash -python - <<'PY' -from pathlib import Path - -import numpy as np - -from dimos.simulation.scene_assets.mesh_scene import ( - SceneMeshAlignment, - load_scene_prims, -) - source = Path("data/dimos_office_mesh/dimos_office_mesh.glb") +prepared = prepare_scene_source(source) alignment = SceneMeshAlignment(scale=2.0, y_up=False) -for prim in load_scene_prims(source, alignment=alignment): +for prim in load_scene_prims(prepared.cook_path, alignment=alignment): name = prim.visual_node_name or prim.prim_path or prim.name - if "Floor" not in name: - continue lo = np.min(prim.vertices, axis=0) hi = np.max(prim.vertices, axis=0) extent = hi - lo - print(f"{name}: min={lo.round(4).tolist()} max={hi.round(4).tolist()} extent={extent.round(4).tolist()}") + if "Floor" in name or "Wall" in name: + print( + name, + "min=", lo.round(4).tolist(), + "max=", hi.round(4).tolist(), + "extent=", extent.round(4).tolist(), + ) PY ``` -For the current office GLB, the relevant support prim is `Floor_Plane.002`. Its -Z extent is effectively zero: it is a visual sheet. If that sheet is cooked into -a very thin MuJoCo box, humanoid foot contacts can clip through it during walking -even though the robot can stand still on it. - -Do not solve that by adding an infinite MuJoCo plane. That ignores the authored -scene geometry and breaks as soon as floors have different heights, holes, -stairs, ramps, platforms, or multiple stories. The sidecar should instead state -the authored physics intent for the named support surface. +For `.blend` files, `prepare_scene_source()` runs Blender headlessly and exports +the evaluated dependency graph to a cached GLB. Geometry Nodes and collection +instances are realized before the normal cook starts. -Create or edit: +## Author Collision Policy -```text -data/dimos_office_mesh/dimos_office_mesh.cook.json -``` +The cook sidecar is where scene-specific physics intent lives. Use it for +support surfaces, walls, fixtures, interactables, and repeated entity groups. -with the floor override: +Example floor policy: ```json { @@ -385,16 +96,46 @@ with the floor override: } ``` -Reasoning: +That means: match source prims named `Floor*`, cook them as boxes, make them at +least 4 cm thick, and keep the authored top surface height unchanged. This is +better than adding an infinite MuJoCo plane because it preserves multi-floor +buildings, ramps, platforms, and holes in the environment. -- `Floor*` matches `Floor_Plane.002` without depending on the sanitized MJCF - name. -- `type: "box"` says the floor is a support slab, not a raw visual mesh. -- `min_thickness: 0.04` gives the support 4 cm of total thickness. -- `preserve: "top"` keeps the walkable surface at the authored visual height and - expands the slab downward. +Static collision types: -Cook the office package: +```text +auto | box | sphere | cylinder | capsule | plane | hull | mesh | decompose | skip +``` + +Interactables and entity groups become runtime entities in `scene.meta.json`. +Mesh entities are extracted once, decomposed once, and reused by instances when +the sidecar gives a shared prototype key. + +## Choose A Browser Visual Target + +Browser-facing GLBs are backend-specific. Different viewers support different +glTF extensions and texture encodings, so the cooker exposes explicit visual +targets instead of hardcoding one heuristic. + +```text +rerun conservative GLB for Rerun; no mesh quantization, normalized textures +babylon web-oriented GLB for Babylon/PimSim; quantization and instancing allowed +generic conservative generic GLB without Rerun-specific cleanup +``` + +Cooked packages store these under named artifacts. Runtime consumers should ask +for the asset they support: + +```python +rerun_glb = package.browser_visual_path("rerun") +babylon_glb = package.browser_visual_path("babylon") +``` + +If a target returns `None`, the package was not cooked for that viewer. + +## Cook + +Cook the office package for Rerun: ```bash python -m dimos.experimental.pimsim.scene.cook \ @@ -403,165 +144,124 @@ python -m dimos.experimental.pimsim.scene.cook \ --output-dir data/scene_packages/dimos_office \ --scale 2.0 \ --no-y-up \ + --visual-target rerun \ --rebake ``` -Verify the generated MuJoCo wrapper: +Cook a Babylon/PimSim visual instead: ```bash -rg 'Floor.*_col' data/scene_packages/dimos_office/mujoco -g wrapper.xml +python -m dimos.experimental.pimsim.scene.cook \ + data/my_scene/source.blend \ + --cook-spec data/my_scene/source.cook.json \ + --output-dir data/scene_packages/my_scene \ + --visual-target babylon \ + --rebake ``` -For a floor whose top is at `z=0`, the cooked geom should have half-thickness -`0.02` and center `z=-0.02`, for example: +The target profile supplies defaults. Override only when needed: -```xml - +```bash +python -m dimos.experimental.pimsim.scene.cook \ + data/my_scene/source.glb \ + --cook-spec data/my_scene/source.cook.json \ + --output-dir data/scene_packages/my_scene \ + --visual-target babylon \ + --visual-texture-format webp \ + --visual-max-texture-size 2048 \ + --rebake ``` -The important invariant is top height unchanged, bottom expanded downward: +Native `gltfpack` is required for WebP/KTX2 texture compression. The Node/npx +package can optimize geometry but does not support those texture modes. + +## Output Layout ```text -top_z = pos_z + size_z -bottom_z = pos_z - size_z +data/scene_packages// +├── scene.meta.json +├── browser/ +│ ├── visual.rerun.glb backend-specific visual, if cooked +│ ├── visual.babylon.glb backend-specific visual, if cooked +│ ├── collision.glb simplified raycast/picking collision +│ └── objects.json per-prim object table for browser users +├── mujoco// +│ ├── wrapper.xml scene-only MuJoCo XML +│ ├── wrapper.mjb optional scene-only MuJoCo binary +│ └── *.obj static collision meshes +├── mujoco/composed/ +│ └── _.mjb optional robot+scene binary +└── entities// + ├── visual.glb + └── mujoco_collision/ ``` -## Loading Cooked Output +`scene.meta.json` stores package-relative paths, frame metadata, entities, and +cook statistics. Runtime code should read it through `load_scene_package()` or +`resolve_scene_package()`. -Runtime details live in `dimos/simulation/scenes/README.md`. For a quick G1 -GR00T WBC check with a cooked named scene: - -```bash -python -m dimos.robot.cli.dimos \ - --simulation mujoco \ - --scene office \ - --viewer rerun \ - --n-workers 12 \ - run unitree-g1-groot-wbc \ - -o mujocosimmodule.headless=true -``` +## MuJoCo Artifacts -`office` resolves through `dimos/simulation/scenes/catalog.py` to: +The normal MuJoCo artifact is scene-only XML: ```text -data/scene_packages/dimos_office/scene.meta.json +scene.meta.json -> wrapper.xml + entities -> runtime attaches robot MJCF -> MjModel ``` -At runtime, `MujocoSimModule`: +This keeps the package robot-agnostic. It is the right path for office-scale +scenes and for packages that should work with multiple robots. + +Huge scenes can additionally use composed `.mjb` files: -1. receives a `ScenePackage` resolved by `catalog.py`; -2. reads `package.mujoco_scene_path`, which points at - `mujoco//wrapper.xml`; -3. loads the robot-only G1 MJCF; -4. attaches the robot into the scene `MjSpec`; -5. adds `package.entities`; -6. compiles one MuJoCo model. +```text +wrapper.xml + robot MJCF + spawn/entity selection -> composed/.mjb +``` -The robot MJCF must stay robot-only: no office floor, no scene walls, no -furniture, no manipulation rig. Scene geometry belongs in the cooked package. +A composed `.mjb` loads quickly, but it is specific to one robot, spawn pose, +runtime entity set, and scene revision. Rebuild it when any of those change. -Prefer `mujocosimmodule.headless=true` and inspect the robot, scene GLB, lidar, -costmaps, and planned path in the Rerun native viewer. `headless=false` opens -MuJoCo's native window and can run much slower, so use it only for direct MuJoCo -render/contact debugging. +## Load The Cooked Package -For a large scene where XML compile is too slow, load a composed binary model -instead of the scene package metadata: +Run the G1 GR00T WBC blueprint with a cooked package: ```bash python -m dimos.robot.cli.dimos \ --simulation mujoco \ - --scene /path/to/package/mujoco/composed/unitree-g1-groot-wbc_spawn.mjb \ + --scene office \ --viewer rerun \ --n-workers 12 \ run unitree-g1-groot-wbc \ -o mujocosimmodule.headless=true ``` -That path skips scene-package runtime composition. The `.mjb` already contains -the robot, cooked scene, spawn pose, and selected runtime entity set for that -build. - -## Sidecar Schema - -`.cook.json` can contain static-collision policy and interactables: +Use `headless=true` for normal testing and inspect Rerun. Use +`headless=false` only when the native MuJoCo window is needed for contact or +render debugging; it can run much slower. -```json -{ - "collision": { - "default": "auto", - "prim_overrides": { - "Floor*": {"type": "box", "min_thickness": 0.04, "preserve": "top"}, - "Wall_*": {"type": "box"}, - "Stairs_*": {"type": "decompose", "max_hulls": 16} - } - }, - "interactables": [ - { - "id": "chair_000", - "source_prim_paths": ["Chair_000_*"], - "mass": 8.0, - "physics": {"shape": "mesh"}, - "tags": ["chair"] - } - ] -} -``` +## Data Publishing -Static collision types are: +Raw scene sources and cooked packages live under `data/` and should not be +added with ordinary `git add`. Publish large data through the LFS bin workflow +documented in: ```text -auto | box | sphere | cylinder | capsule | plane | hull | mesh | decompose | skip +docs/development/large_file_management.md ``` -For box overrides, `min_thickness` is full world-Z thickness in meters. -`preserve` can be `top`, `bottom`, or `center`. - -Interactables become MuJoCo bodies named `entity:`. They can be: +Code and docs changes go through normal git. Data archives should be updated +with `./bin/lfs_push` when the package is ready to ship. -- extracted from the source mesh with `source_prim_paths`; -- synthetic primitives with an authored `pose` and `physics.extents`; -- static, kinematic, or dynamic depending on `kind` and `mass`. - -## Data Publishing - -Scene source meshes and cooked packages live under `data/`, which is ignored by -normal git. Do not add these artifacts with ordinary `git add`. Publish or update -them through the repository LFS bin workflow described in: +## Reference Files ```text -docs/development/large_file_management.md +dimos/experimental/pimsim/scene/cook.py cook CLI +dimos/experimental/pimsim/scene/sidecar.py cook sidecar schema +dimos/experimental/pimsim/scene/plan.py sidecar to entity/collision plan +dimos/experimental/pimsim/scene/source_asset.py .blend normalization +dimos/experimental/pimsim/scene/visual_glb.py browser visual cooking +dimos/simulation/scene_assets/spec.py package contract +dimos/simulation/scene_assets/mesh_scene.py source mesh inspection +dimos/simulation/mujoco/scene_mesh_to_mjcf.py MuJoCo XML bake +dimos/simulation/mujoco/collision_spec.py static collision policy +dimos/simulation/scenes/catalog.py runtime name/path resolution ``` - -Code and docs changes can go through normal git. Data changes such as -`data/dimos_office_mesh/dimos_office_mesh.cook.json` and -`data/scene_packages/dimos_office` should be handled through the LFS script when -we are ready to ship them. - -## Why Bake - -MuJoCo treats a mesh geom as convex for collision. A raw concave building mesh is -therefore the wrong collision representation: it becomes a coarse blob or a bad -support surface. The cooker turns source prims into MuJoCo-friendly collision: -primitives where the shape is obvious, hulls where they fit, and CoACD -decompositions for concave objects. - -The sidecar is the place for source-specific knowledge. It keeps the cook -deterministic and reviewable instead of relying on broad heuristics that may fail -on multi-story buildings or unusual floor geometry. - -## Reference - -| File | Role | -|---|---| -| `dimos/experimental/pimsim/scene/cook.py` | cook entry point and CLI | -| `dimos/experimental/pimsim/scene/sidecar.py` | `.cook.json` schema | -| `dimos/experimental/pimsim/scene/plan.py` | sidecar to cook plan | -| `dimos/experimental/pimsim/scene/inspect.py` | source asset statistics | -| `dimos/simulation/scene_assets/spec.py` | shared scene-package metadata contract | -| `dimos/simulation/scene_assets/mesh_scene.py` | source mesh loading and prim inspection | -| `dimos/simulation/mujoco/scene_mesh_to_mjcf.py` | MuJoCo scene bake | -| `dimos/simulation/mujoco/collision_spec.py` | static collision policy | -| `dimos/simulation/mujoco/entity_scene.py` | runtime entity composition | -| `dimos/simulation/scenes/catalog.py` | scene name/path resolution | -| `dimos/simulation/engines/mujoco_sim_module.py` | runtime scene + robot composition | diff --git a/dimos/experimental/pimsim/scene/cook.py b/dimos/experimental/pimsim/scene/cook.py index a70e2da549..1971f22bac 100644 --- a/dimos/experimental/pimsim/scene/cook.py +++ b/dimos/experimental/pimsim/scene/cook.py @@ -42,12 +42,14 @@ from dimos.simulation.mujoco.collision_spec import CollisionSpec from dimos.simulation.mujoco.scene_mesh_to_mjcf import load_or_bake from dimos.simulation.scene_assets.spec import ( + BROWSER_VISUAL_TARGETS, BrowserCollisionSpec, BrowserVisualSpec, MujocoSceneSpec, SceneCookSpec, SceneMeshAlignment, ScenePackage, + browser_visual_spec_for_target, ) from dimos.utils.data import get_data_dir from dimos.utils.logging_config import setup_logger @@ -175,10 +177,17 @@ def cook_scene_package( rebake=rebake, ) if visual_result is not None: - stats["browser_visual"] = { + visual_stats = { + "target": visual.target_key, "tool": visual_result.tool, **visual_result.stats, } + stats["browser_visual"] = { + **visual_stats, + } + stats["browser_visuals"] = { + visual.target_key: visual_stats, + } browser_collision_result = cook_browser_collision( cook_source, @@ -216,6 +225,7 @@ def cook_scene_package( source_path=source, alignment=align, visual_path=visual_result.path if visual_result else None, + browser_visuals={visual.target_key: visual_result.path} if visual_result else {}, browser_collision_path=browser_collision_result.path if browser_collision_result else None, objects_path=browser_collision_result.objects_path if browser_collision_result else None, mujoco_scene_path=mujoco_scene_path, @@ -386,40 +396,65 @@ def cli_main() -> None: parser.add_argument( "--visual-optimizer", choices=("gltfpack", "blender", "copy"), - default="gltfpack", ) - parser.add_argument("--visual-simplify-ratio", type=float, default=0.3) - parser.add_argument("--visual-simplify-error", type=float, default=0.02) + parser.add_argument( + "--visual-target", + choices=BROWSER_VISUAL_TARGETS, + default="rerun", + help=( + "browser visual target to cook. Rerun uses conservative GLBs; " + "Babylon can use web-oriented glTF extensions." + ), + ) + parser.add_argument("--visual-output-name") + parser.add_argument("--visual-simplify-ratio", type=float) + parser.add_argument("--visual-simplify-error", type=float) parser.add_argument("--visual-max-texture-size", type=int) parser.add_argument( "--visual-texture-format", choices=("none", "webp", "ktx2"), - default="none", ) parser.add_argument( - "--no-visual-texture-normalization", + "--visual-texture-normalization", + dest="visual_texture_normalization", action="store_true", - help=( - "do not rewrite embedded visual textures to plain 8-bit PNGs after " - "gltfpack. The default preserves textures but avoids viewer-specific " - "compressed or high-bit-depth texture formats." - ), + default=None, + help="rewrite embedded visual textures to plain 8-bit PNGs", + ) + parser.add_argument( + "--no-visual-texture-normalization", + dest="visual_texture_normalization", + action="store_false", + default=None, + help="keep the optimizer's embedded texture encoding", ) parser.add_argument( "--visual-quantize", + dest="visual_quantize", action="store_true", - help=( - "allow gltfpack quantization. This makes smaller files but emits " - "KHR_mesh_quantization, which some viewers cannot load." - ), + default=None, + help="allow mesh quantization when the target viewer supports it", + ) + parser.add_argument( + "--no-visual-quantize", + dest="visual_quantize", + action="store_false", + default=None, + help="disable mesh quantization", ) parser.add_argument( "--visual-gpu-instancing", + dest="visual_gpu_instancing", action="store_true", - help=( - "allow gltfpack to emit EXT_mesh_gpu_instancing. This can make " - "dense repeated assets much smaller, but not every viewer supports it." - ), + default=None, + help="allow EXT_mesh_gpu_instancing when the target viewer supports it", + ) + parser.add_argument( + "--no-visual-gpu-instancing", + dest="visual_gpu_instancing", + action="store_false", + default=None, + help="disable EXT_mesh_gpu_instancing", ) parser.add_argument("--no-browser-collision", action="store_true") parser.add_argument("--browser-collision-target-faces", type=int, default=100_000) @@ -436,6 +471,24 @@ def cli_main() -> None: parser.add_argument("--rebake", action="store_true") args = parser.parse_args() + visual_overrides: dict[str, Any] = {"enabled": not args.no_visual} + for key, value in ( + ("output_name", args.visual_output_name), + ("optimizer", args.visual_optimizer), + ("simplify_ratio", args.visual_simplify_ratio), + ("simplify_error", args.visual_simplify_error), + ("max_texture_size", args.visual_max_texture_size), + ("normalize_textures", args.visual_texture_normalization), + ("quantize", args.visual_quantize), + ("use_gpu_instancing", args.visual_gpu_instancing), + ): + if value is not None: + visual_overrides[key] = value + if args.visual_texture_format is not None: + visual_overrides["texture_format"] = ( + None if args.visual_texture_format == "none" else args.visual_texture_format + ) + package = cook_scene_package( args.source, output_dir=args.output_dir, @@ -446,18 +499,9 @@ def cli_main() -> None: y_up=not args.no_y_up, ), cook_sidecar=SceneCookSidecar.from_json(args.cook_spec) if args.cook_spec else None, - visual_spec=BrowserVisualSpec( - enabled=not args.no_visual, - optimizer=args.visual_optimizer, - simplify_ratio=args.visual_simplify_ratio, - simplify_error=args.visual_simplify_error, - texture_format=( - None if args.visual_texture_format == "none" else args.visual_texture_format - ), - max_texture_size=args.visual_max_texture_size, - normalize_textures=not args.no_visual_texture_normalization, - quantize=args.visual_quantize, - use_gpu_instancing=args.visual_gpu_instancing, + visual_spec=browser_visual_spec_for_target( + args.visual_target, + **visual_overrides, ), browser_collision_spec=BrowserCollisionSpec( enabled=not args.no_browser_collision, diff --git a/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py b/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py deleted file mode 100644 index 0acb836e59..0000000000 --- a/dimos/experimental/pimsim/scene/demo_scene_cook_requirements.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2025-2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Small reference preflight for scene cooking inputs. - -Run: - - python -m dimos.experimental.pimsim.scene.demo_scene_cook_requirements \ - data/dimos_office_mesh/dimos_office_mesh.glb \ - --cook-spec data/dimos_office_mesh/dimos_office_mesh.cook.json \ - --output-dir data/scene_packages/dimos_office \ - --scale 2.0 \ - --no-y-up - -This module shows what a cooker needs before starting a potentially long bake: -one source asset, one optional sidecar, alignment, and an output directory. It -does not emit scene package artifacts. -""" - -from __future__ import annotations - -import argparse -import json -from pathlib import Path -import shlex -from typing import Any - -import numpy as np - -from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset -from dimos.experimental.pimsim.scene.plan import build_scene_cook_plan -from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar -from dimos.experimental.pimsim.scene.source_asset import prepare_scene_source -from dimos.simulation.scene_assets.mesh_scene import load_scene_prims -from dimos.simulation.scene_assets.spec import SceneMeshAlignment - - -def inspect_cook_requirements( - source: str | Path, - *, - output_dir: str | Path, - cook_spec: str | Path | None = None, - alignment: SceneMeshAlignment | None = None, - sample_prims: int = 8, - rebake: bool = False, -) -> dict[str, Any]: - """Return a JSON-serializable summary of the inputs needed to cook a scene.""" - align = alignment or SceneMeshAlignment() - source_path = Path(source).expanduser().resolve() - prepared = prepare_scene_source(source_path, rebake=rebake) - sidecar = ( - SceneCookSidecar.from_json(cook_spec) - if cook_spec is not None - else SceneCookSidecar.auto_discover(source_path) - ) - stats = inspect_scene_asset(prepared.cook_path) - prims = list(load_scene_prims(prepared.cook_path, alignment=align)) - plan = build_scene_cook_plan( - prepared.cook_path, - sidecar=sidecar, - alignment=align, - output_dir=output_dir, - ) - - return { - "source": str(source_path), - "prepared_source": prepared.to_json_dict(), - "output_dir": str(Path(output_dir).expanduser().resolve()), - "alignment": { - "scale": align.scale, - "translation": list(align.translation), - "rotation_zyx_deg": list(align.rotation_zyx_deg), - "y_up": align.y_up, - }, - "source_stats": stats.to_json_dict(), - "source_prim_count": len(prims), - "sample_prims": [_prim_summary(prim) for prim in prims[:sample_prims]], - "sidecar": { - "path": str(sidecar.path) if sidecar.path else None, - "collision_default": sidecar.collision.default, - "collision_override_count": len(sidecar.collision.prim_overrides), - "interactable_count": len(sidecar.interactables), - "entity_group_count": len(sidecar.entity_groups), - }, - "cook_plan": { - "entities": len(plan.entities), - "entity_prototypes": len(plan.prototypes), - "stats": plan.stats, - }, - "cook_command": _cook_command( - source_path, - output_dir=output_dir, - cook_spec=cook_spec, - alignment=align, - rebake=rebake, - ), - } - - -def _prim_summary(prim: Any) -> dict[str, Any]: - vertices = np.asarray(prim.vertices) - lo = np.min(vertices, axis=0) - hi = np.max(vertices, axis=0) - return { - "name": prim.visual_node_name or prim.prim_path or prim.name, - "min": lo.round(4).tolist(), - "max": hi.round(4).tolist(), - "extent": (hi - lo).round(4).tolist(), - } - - -def _cook_command( - source: Path, - *, - output_dir: str | Path, - cook_spec: str | Path | None, - alignment: SceneMeshAlignment, - rebake: bool, -) -> str: - command = [ - "python", - "-m", - "dimos.experimental.pimsim.scene.cook", - str(source), - "--output-dir", - str(output_dir), - "--scale", - str(alignment.scale), - ] - if cook_spec is not None: - command.extend(["--cook-spec", str(cook_spec)]) - if alignment.translation != (0.0, 0.0, 0.0): - command.extend(["--translation", *[str(value) for value in alignment.translation]]) - if alignment.rotation_zyx_deg != (0.0, 0.0, 0.0): - command.extend( - ["--rotation-zyx-deg", *[str(value) for value in alignment.rotation_zyx_deg]] - ) - if not alignment.y_up: - command.append("--no-y-up") - if rebake: - command.append("--rebake") - return " ".join(shlex.quote(part) for part in command) - - -def main() -> None: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("source") - parser.add_argument("--cook-spec") - parser.add_argument("--output-dir", required=True) - parser.add_argument("--scale", type=float, default=1.0) - parser.add_argument("--translation", type=float, nargs=3, default=(0.0, 0.0, 0.0)) - parser.add_argument("--rotation-zyx-deg", type=float, nargs=3, default=(0.0, 0.0, 0.0)) - parser.add_argument("--no-y-up", action="store_true") - parser.add_argument("--sample-prims", type=int, default=8) - parser.add_argument("--rebake", action="store_true") - args = parser.parse_args() - - result = inspect_cook_requirements( - args.source, - output_dir=args.output_dir, - cook_spec=args.cook_spec, - alignment=SceneMeshAlignment( - scale=args.scale, - translation=tuple(args.translation), - rotation_zyx_deg=tuple(args.rotation_zyx_deg), - y_up=not args.no_y_up, - ), - sample_prims=args.sample_prims, - rebake=args.rebake, - ) - print(json.dumps(result, indent=2, sort_keys=True)) - - -if __name__ == "__main__": - main() diff --git a/dimos/experimental/pimsim/scene/test_spec.py b/dimos/experimental/pimsim/scene/test_spec.py index 560715b778..7073dfe42e 100644 --- a/dimos/experimental/pimsim/scene/test_spec.py +++ b/dimos/experimental/pimsim/scene/test_spec.py @@ -23,7 +23,12 @@ from dimos.experimental.pimsim.scene import plan as plan_module from dimos.experimental.pimsim.scene.sidecar import SceneCookSidecar from dimos.simulation.scene_assets.mesh_scene import SceneMeshAlignment, ScenePrimMesh -from dimos.simulation.scene_assets.spec import ARTIFACT_FRAMES, ScenePackage, load_scene_package +from dimos.simulation.scene_assets.spec import ( + ARTIFACT_FRAMES, + ScenePackage, + browser_visual_spec_for_target, + load_scene_package, +) def _metadata(tmp_path: Path) -> dict[str, object]: @@ -39,6 +44,10 @@ def _metadata(tmp_path: Path) -> dict[str, object]: "artifact_frames": ARTIFACT_FRAMES, "artifacts": { "browser_visual": str(tmp_path / "visual.glb"), + "browser_visuals": { + "rerun": str(tmp_path / "visual.rerun.glb"), + "babylon": str(tmp_path / "visual.babylon.glb"), + }, "browser_collision": str(tmp_path / "collision.glb"), "objects": str(tmp_path / "objects.json"), "mujoco_scene": str(tmp_path / "wrapper.xml"), @@ -107,6 +116,8 @@ def test_load_scene_package_accepts_expected_artifact_frames(tmp_path: Path) -> package = load_scene_package(metadata_path) assert package.visual_path == tmp_path / "visual.glb" + assert package.browser_visual_path("rerun") == tmp_path / "visual.rerun.glb" + assert package.browser_visual_path("babylon") == tmp_path / "visual.babylon.glb" assert package.browser_collision_path == tmp_path / "collision.glb" assert package.objects_path == tmp_path / "objects.json" assert package.mujoco_scene_path == tmp_path / "wrapper.xml" @@ -119,6 +130,10 @@ def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> N source_path=tmp_path / "source.glb", alignment=SceneMeshAlignment(), visual_path=tmp_path / "browser" / "visual.glb", + browser_visuals={ + "rerun": tmp_path / "browser" / "visual.rerun.glb", + "babylon": tmp_path / "browser" / "visual.babylon.glb", + }, browser_collision_path=tmp_path / "browser" / "collision.glb", objects_path=tmp_path / "browser" / "objects.json", mujoco_scene_path=tmp_path / "mujoco" / "abc123" / "wrapper.xml", @@ -136,6 +151,10 @@ def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> N assert raw["package_dir"] == "." assert raw["artifacts"]["browser_visual"] == "browser/visual.glb" + assert raw["artifacts"]["browser_visuals"] == { + "babylon": "browser/visual.babylon.glb", + "rerun": "browser/visual.rerun.glb", + } assert raw["artifacts"]["browser_collision"] == "browser/collision.glb" assert raw["artifacts"]["objects"] == "browser/objects.json" assert raw["artifacts"]["mujoco_scene"] == "mujoco/abc123/wrapper.xml" @@ -145,6 +164,8 @@ def test_scene_package_metadata_uses_package_relative_paths(tmp_path: Path) -> N loaded = load_scene_package(metadata_path) assert loaded.package_dir == tmp_path assert loaded.visual_path == tmp_path / "browser" / "visual.glb" + assert loaded.browser_visual_path("rerun") == tmp_path / "browser" / "visual.rerun.glb" + assert loaded.browser_visual_path("babylon") == tmp_path / "browser" / "visual.babylon.glb" assert loaded.mujoco_scene_path == tmp_path / "mujoco" / "abc123" / "wrapper.xml" assert loaded.mujoco_binary_path == tmp_path / "mujoco" / "abc123" / "wrapper.mjb" assert loaded.entities[0]["visual_path"] == str( @@ -164,6 +185,21 @@ def test_load_scene_package_tolerates_missing_objects_sidecar(tmp_path: Path) -> assert package.objects_path is None +def test_browser_visual_profiles_are_backend_specific() -> None: + rerun = browser_visual_spec_for_target("rerun") + babylon = browser_visual_spec_for_target("babylon") + + assert rerun.artifact_name == "visual.rerun.glb" + assert rerun.quantize is False + assert rerun.normalize_textures is True + assert rerun.demote_required_extensions == ("KHR_texture_transform",) + + assert babylon.artifact_name == "visual.babylon.glb" + assert babylon.quantize is True + assert babylon.normalize_textures is False + assert babylon.demote_required_extensions == () + + def test_extract_scene_objects_emits_per_prim_aabb() -> None: from dimos.experimental.pimsim.scene.browser_collision import extract_scene_objects diff --git a/dimos/experimental/pimsim/scene/visual_glb.py b/dimos/experimental/pimsim/scene/visual_glb.py index ef6dacab4a..ca6d47bf0b 100644 --- a/dimos/experimental/pimsim/scene/visual_glb.py +++ b/dimos/experimental/pimsim/scene/visual_glb.py @@ -17,21 +17,21 @@ from __future__ import annotations from dataclasses import dataclass -from io import BytesIO import json from pathlib import Path import shutil -import struct import tempfile from typing import Any -from PIL import Image - from dimos.experimental.pimsim.scene.command import ( blender_output_line_is_interesting, run_logged_command, ) from dimos.experimental.pimsim.scene.inspect import inspect_scene_asset +from dimos.simulation.scene_assets.glb import ( + demote_required_extensions, + normalize_embedded_textures, +) from dimos.simulation.scene_assets.spec import BrowserVisualSpec from dimos.utils.logging_config import setup_logger @@ -49,16 +49,6 @@ ".ply", } _GLTFPACK_INPUT_SUFFIXES = {".gltf", ".glb", ".obj"} -_GLB_MAGIC = b"glTF" -_GLB_VERSION = 2 -_GLB_HEADER_SIZE = 12 -_GLB_CHUNK_HEADER_SIZE = 8 -_GLB_JSON_CHUNK_TYPE = 0x4E4F534A -_GLB_BIN_CHUNK_TYPE = 0x004E4942 -_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" -_STANDARD_TEXTURE_MIME_TYPES = {"image/png", "image/jpeg"} -_STANDARD_TEXTURE_MODES = {"RGB", "RGBA"} -_DEMOTABLE_REQUIRED_EXTENSIONS = {"KHR_texture_transform"} _BLENDER_SCRIPT = r""" import pathlib @@ -200,7 +190,7 @@ def cook_browser_visual( source = Path(source_path).expanduser().resolve() out_dir = Path(output_dir).expanduser().resolve() out_dir.mkdir(parents=True, exist_ok=True) - out_path = out_dir / visual_spec.output_name + out_path = out_dir / visual_spec.artifact_name if out_path.exists() and not rebake: return BrowserVisualCookResult( path=out_path, @@ -359,239 +349,26 @@ def _sanitize_browser_visual_output(path: Path, spec: BrowserVisualSpec) -> None if path.suffix.lower() != ".glb": return - demoted_extensions = _demote_required_extensions( - path, - _DEMOTABLE_REQUIRED_EXTENSIONS, - ) + demoted_extensions = demote_required_extensions(path, set(spec.demote_required_extensions)) if demoted_extensions: logger.info( - "demoted browser visual GLB extensions path=%s extensions=%s", + "demoted browser visual GLB extensions target=%s path=%s extensions=%s", + spec.target_key, path, sorted(demoted_extensions), ) if spec.normalize_textures and spec.texture_format is None: - normalized_textures = _normalize_embedded_textures(path) + normalized_textures = normalize_embedded_textures(path) if normalized_textures: logger.info( - "normalized embedded browser visual textures path=%s count=%d", + "normalized embedded browser visual textures target=%s path=%s count=%d", + spec.target_key, path, normalized_textures, ) -def _demote_required_extensions(path: Path, extensions: set[str]) -> set[str]: - gltf, bin_chunk = _read_glb(path) - required = gltf.get("extensionsRequired") - if not isinstance(required, list): - return set() - - demoted = {extension for extension in required if extension in extensions} - if not demoted: - return set() - - next_required = [extension for extension in required if extension not in demoted] - if next_required: - gltf["extensionsRequired"] = next_required - else: - gltf.pop("extensionsRequired", None) - used = gltf.get("extensionsUsed") - if isinstance(used, list): - merged = list(dict.fromkeys([*used, *sorted(demoted)])) - gltf["extensionsUsed"] = merged - else: - gltf["extensionsUsed"] = sorted(demoted) - _write_glb(path, gltf, bin_chunk, {}) - return demoted - - -def _normalize_embedded_textures(path: Path) -> int: - gltf, bin_chunk = _read_glb(path) - images = gltf.get("images") - buffer_views = gltf.get("bufferViews") - buffers = gltf.get("buffers") - if not isinstance(images, list) or not images: - return 0 - if not isinstance(buffer_views, list) or not isinstance(buffers, list): - return 0 - if len(buffers) != 1: - raise RuntimeError(f"cannot normalize textures in multi-buffer GLB: {path}") - - replacements: dict[int, bytes] = {} - for image_index, image in enumerate(images): - if not isinstance(image, dict): - continue - buffer_view_index = image.get("bufferView") - if not isinstance(buffer_view_index, int): - continue - if buffer_view_index < 0 or buffer_view_index >= len(buffer_views): - raise RuntimeError( - f"image {image_index} references missing bufferView {buffer_view_index}: {path}" - ) - view = buffer_views[buffer_view_index] - if not isinstance(view, dict): - continue - texture_bytes = _buffer_view_bytes(bin_chunk, view) - normalized = _normalized_texture_bytes( - texture_bytes, - mime_type=image.get("mimeType"), - ) - if normalized is None: - continue - replacements[buffer_view_index] = normalized - image["mimeType"] = "image/png" - image.pop("uri", None) - - if not replacements: - return 0 - - _write_glb(path, gltf, bin_chunk, replacements) - return len(replacements) - - -def _normalized_texture_bytes( - texture_bytes: bytes, - *, - mime_type: Any, -) -> bytes | None: - try: - with Image.open(BytesIO(texture_bytes)) as image: - image.load() - if _is_standard_embedded_texture(image, texture_bytes, mime_type): - return None - has_alpha = image.mode in {"RGBA", "LA"} or "transparency" in image.info - mode = "RGBA" if has_alpha else "RGB" - converted = image.convert(mode) - out = BytesIO() - converted.save(out, format="PNG", compress_level=1) - return out.getvalue() - except Exception as exc: - raise RuntimeError("failed to normalize embedded GLB texture to 8-bit PNG") from exc - - -def _is_standard_embedded_texture( - image: Image.Image, - texture_bytes: bytes, - mime_type: Any, -) -> bool: - if mime_type not in _STANDARD_TEXTURE_MIME_TYPES: - return False - if image.mode not in _STANDARD_TEXTURE_MODES: - return False - return not _is_high_bit_depth_png(texture_bytes) - - -def _is_high_bit_depth_png(texture_bytes: bytes) -> bool: - if not texture_bytes.startswith(_PNG_SIGNATURE) or len(texture_bytes) < 25: - return False - return texture_bytes[24] > 8 - - -def _read_glb(path: Path) -> tuple[dict[str, Any], bytes]: - data = path.read_bytes() - if len(data) < _GLB_HEADER_SIZE: - raise RuntimeError(f"invalid GLB header: {path}") - magic, version, declared_length = struct.unpack_from("<4sII", data, 0) - if magic != _GLB_MAGIC or version != _GLB_VERSION: - raise RuntimeError(f"expected GLB v2 file: {path}") - if declared_length != len(data): - raise RuntimeError( - f"GLB length mismatch for {path}: header={declared_length} actual={len(data)}" - ) - - offset = _GLB_HEADER_SIZE - json_bytes: bytes | None = None - bin_chunk: bytes | None = None - while offset < len(data): - if offset + _GLB_CHUNK_HEADER_SIZE > len(data): - raise RuntimeError(f"truncated GLB chunk header: {path}") - chunk_length, chunk_type = struct.unpack_from(" len(data): - raise RuntimeError(f"truncated GLB chunk payload: {path}") - chunk = data[offset:chunk_end] - if chunk_type == _GLB_JSON_CHUNK_TYPE: - json_bytes = chunk - elif chunk_type == _GLB_BIN_CHUNK_TYPE: - bin_chunk = chunk - offset = chunk_end - - if json_bytes is None or bin_chunk is None: - raise RuntimeError(f"GLB must contain JSON and BIN chunks: {path}") - gltf = json.loads(json_bytes.rstrip(b" \t\r\n\0").decode("utf-8")) - if not isinstance(gltf, dict): - raise RuntimeError(f"GLB JSON chunk is not an object: {path}") - return gltf, bin_chunk - - -def _buffer_view_bytes(bin_chunk: bytes, view: dict[str, Any]) -> bytes: - if int(view.get("buffer", 0)) != 0: - raise RuntimeError("embedded texture normalization only supports buffer 0") - byte_offset = int(view.get("byteOffset", 0)) - byte_length = int(view["byteLength"]) - return bin_chunk[byte_offset : byte_offset + byte_length] - - -def _write_glb( - path: Path, - gltf: dict[str, Any], - bin_chunk: bytes, - buffer_view_replacements: dict[int, bytes], -) -> None: - buffer_views = gltf.get("bufferViews") - buffers = gltf.get("buffers") - if not isinstance(buffer_views, list) or not isinstance(buffers, list) or len(buffers) != 1: - raise RuntimeError(f"cannot rewrite GLB buffer views: {path}") - - new_bin = bytearray() - for index, view in enumerate(buffer_views): - if not isinstance(view, dict): - raise RuntimeError(f"invalid GLB bufferView at index {index}: {path}") - payload = buffer_view_replacements.get(index) - if payload is None: - payload = _buffer_view_bytes(bin_chunk, view) - _pad_bytearray(new_bin, alignment=4, pad=0) - view["byteOffset"] = len(new_bin) - view["byteLength"] = len(payload) - new_bin.extend(payload) - _pad_bytearray(new_bin, alignment=4, pad=0) - buffers[0]["byteLength"] = len(new_bin) - - json_chunk = json.dumps(gltf, separators=(",", ":"), sort_keys=True).encode("utf-8") - json_chunk = _padded_bytes(json_chunk, alignment=4, pad=b" ") - bin_bytes = bytes(new_bin) - total_length = ( - _GLB_HEADER_SIZE - + _GLB_CHUNK_HEADER_SIZE - + len(json_chunk) - + _GLB_CHUNK_HEADER_SIZE - + len(bin_bytes) - ) - with tempfile.NamedTemporaryFile("wb", suffix=".glb", delete=False) as temp: - temp_path = Path(temp.name) - temp.write(struct.pack("<4sII", _GLB_MAGIC, _GLB_VERSION, total_length)) - temp.write(struct.pack(" None: - while len(data) % alignment: - data.append(pad) - - -def _padded_bytes(data: bytes, *, alignment: int, pad: bytes) -> bytes: - while len(data) % alignment: - data += pad - return data - - def _gltfpack_command() -> list[str]: gltfpack = shutil.which("gltfpack") if gltfpack is not None: diff --git a/dimos/simulation/scene_assets/glb.py b/dimos/simulation/scene_assets/glb.py new file mode 100644 index 0000000000..e84733970c --- /dev/null +++ b/dimos/simulation/scene_assets/glb.py @@ -0,0 +1,255 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Small GLB utilities shared by scene-package tools.""" + +from __future__ import annotations + +from io import BytesIO +import json +from pathlib import Path +import shutil +import struct +import tempfile +from typing import Any + +from PIL import Image + +GLB_MAGIC = b"glTF" +GLB_VERSION = 2 +GLB_HEADER_SIZE = 12 +GLB_CHUNK_HEADER_SIZE = 8 +GLB_JSON_CHUNK_TYPE = 0x4E4F534A +GLB_BIN_CHUNK_TYPE = 0x004E4942 +PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" +STANDARD_TEXTURE_MIME_TYPES = {"image/png", "image/jpeg"} +STANDARD_TEXTURE_MODES = {"RGB", "RGBA"} + + +def read_glb(path: Path) -> tuple[dict[str, Any], bytes]: + """Read a GLB v2 file as a JSON chunk plus one BIN chunk.""" + data = path.read_bytes() + if len(data) < GLB_HEADER_SIZE: + raise RuntimeError(f"invalid GLB header: {path}") + magic, version, declared_length = struct.unpack_from("<4sII", data, 0) + if magic != GLB_MAGIC or version != GLB_VERSION: + raise RuntimeError(f"expected GLB v2 file: {path}") + if declared_length != len(data): + raise RuntimeError( + f"GLB length mismatch for {path}: header={declared_length} actual={len(data)}" + ) + + offset = GLB_HEADER_SIZE + json_bytes: bytes | None = None + bin_chunk: bytes | None = None + while offset < len(data): + if offset + GLB_CHUNK_HEADER_SIZE > len(data): + raise RuntimeError(f"truncated GLB chunk header: {path}") + chunk_length, chunk_type = struct.unpack_from(" len(data): + raise RuntimeError(f"truncated GLB chunk payload: {path}") + chunk = data[offset:chunk_end] + if chunk_type == GLB_JSON_CHUNK_TYPE: + json_bytes = chunk + elif chunk_type == GLB_BIN_CHUNK_TYPE: + bin_chunk = chunk + offset = chunk_end + + if json_bytes is None or bin_chunk is None: + raise RuntimeError(f"GLB must contain JSON and BIN chunks: {path}") + gltf = json.loads(json_bytes.rstrip(b" \t\r\n\0").decode("utf-8")) + if not isinstance(gltf, dict): + raise RuntimeError(f"GLB JSON chunk is not an object: {path}") + return gltf, bin_chunk + + +def write_glb( + path: Path, + gltf: dict[str, Any], + bin_chunk: bytes, + buffer_view_replacements: dict[int, bytes], +) -> None: + """Rewrite a GLB while preserving or replacing bufferView payloads.""" + buffer_views = gltf.get("bufferViews") + buffers = gltf.get("buffers") + if not isinstance(buffer_views, list) or not isinstance(buffers, list) or len(buffers) != 1: + raise RuntimeError(f"cannot rewrite GLB buffer views: {path}") + + new_bin = bytearray() + for index, view in enumerate(buffer_views): + if not isinstance(view, dict): + raise RuntimeError(f"invalid GLB bufferView at index {index}: {path}") + payload = buffer_view_replacements.get(index) + if payload is None: + payload = buffer_view_bytes(bin_chunk, view) + _pad_bytearray(new_bin, alignment=4, pad=0) + view["byteOffset"] = len(new_bin) + view["byteLength"] = len(payload) + new_bin.extend(payload) + _pad_bytearray(new_bin, alignment=4, pad=0) + buffers[0]["byteLength"] = len(new_bin) + + json_chunk = json.dumps(gltf, separators=(",", ":"), sort_keys=True).encode("utf-8") + json_chunk = _padded_bytes(json_chunk, alignment=4, pad=b" ") + bin_bytes = bytes(new_bin) + total_length = ( + GLB_HEADER_SIZE + + GLB_CHUNK_HEADER_SIZE + + len(json_chunk) + + GLB_CHUNK_HEADER_SIZE + + len(bin_bytes) + ) + with tempfile.NamedTemporaryFile("wb", suffix=".glb", delete=False) as temp: + temp_path = Path(temp.name) + temp.write(struct.pack("<4sII", GLB_MAGIC, GLB_VERSION, total_length)) + temp.write(struct.pack(" bytes: + """Return the payload bytes for one bufferView in a single-BIN GLB.""" + if int(view.get("buffer", 0)) != 0: + raise RuntimeError("embedded texture normalization only supports buffer 0") + byte_offset = int(view.get("byteOffset", 0)) + byte_length = int(view["byteLength"]) + return bin_chunk[byte_offset : byte_offset + byte_length] + + +def demote_required_extensions(path: Path, extensions: set[str]) -> set[str]: + """Move selected GLB extensions from required to used.""" + gltf, bin_chunk = read_glb(path) + required = gltf.get("extensionsRequired") + if not isinstance(required, list): + return set() + + demoted = {extension for extension in required if extension in extensions} + if not demoted: + return set() + + next_required = [extension for extension in required if extension not in demoted] + if next_required: + gltf["extensionsRequired"] = next_required + else: + gltf.pop("extensionsRequired", None) + used = gltf.get("extensionsUsed") + if isinstance(used, list): + merged = list(dict.fromkeys([*used, *sorted(demoted)])) + gltf["extensionsUsed"] = merged + else: + gltf["extensionsUsed"] = sorted(demoted) + write_glb(path, gltf, bin_chunk, {}) + return demoted + + +def normalize_embedded_textures(path: Path) -> int: + """Rewrite non-standard embedded textures to ordinary 8-bit PNGs.""" + gltf, bin_chunk = read_glb(path) + images = gltf.get("images") + buffer_views = gltf.get("bufferViews") + buffers = gltf.get("buffers") + if not isinstance(images, list) or not images: + return 0 + if not isinstance(buffer_views, list) or not isinstance(buffers, list): + return 0 + if len(buffers) != 1: + raise RuntimeError(f"cannot normalize textures in multi-buffer GLB: {path}") + + replacements: dict[int, bytes] = {} + for image_index, image in enumerate(images): + if not isinstance(image, dict): + continue + buffer_view_index = image.get("bufferView") + if not isinstance(buffer_view_index, int): + continue + if buffer_view_index < 0 or buffer_view_index >= len(buffer_views): + raise RuntimeError( + f"image {image_index} references missing bufferView {buffer_view_index}: {path}" + ) + view = buffer_views[buffer_view_index] + if not isinstance(view, dict): + continue + texture_bytes = buffer_view_bytes(bin_chunk, view) + normalized = normalized_texture_bytes( + texture_bytes, + mime_type=image.get("mimeType"), + ) + if normalized is None: + continue + replacements[buffer_view_index] = normalized + image["mimeType"] = "image/png" + image.pop("uri", None) + + if not replacements: + return 0 + + write_glb(path, gltf, bin_chunk, replacements) + return len(replacements) + + +def normalized_texture_bytes( + texture_bytes: bytes, + *, + mime_type: Any, +) -> bytes | None: + """Return an 8-bit PNG replacement, or None when the texture is standard.""" + try: + with Image.open(BytesIO(texture_bytes)) as image: + image.load() + if _is_standard_embedded_texture(image, texture_bytes, mime_type): + return None + has_alpha = image.mode in {"RGBA", "LA"} or "transparency" in image.info + mode = "RGBA" if has_alpha else "RGB" + converted = image.convert(mode) + out = BytesIO() + converted.save(out, format="PNG", compress_level=1) + return out.getvalue() + except Exception as exc: + raise RuntimeError("failed to normalize embedded GLB texture to 8-bit PNG") from exc + + +def _is_standard_embedded_texture( + image: Image.Image, + texture_bytes: bytes, + mime_type: Any, +) -> bool: + if mime_type not in STANDARD_TEXTURE_MIME_TYPES: + return False + if image.mode not in STANDARD_TEXTURE_MODES: + return False + return not _is_high_bit_depth_png(texture_bytes) + + +def _is_high_bit_depth_png(texture_bytes: bytes) -> bool: + if not texture_bytes.startswith(PNG_SIGNATURE) or len(texture_bytes) < 25: + return False + return texture_bytes[24] > 8 + + +def _pad_bytearray(data: bytearray, *, alignment: int, pad: int) -> None: + while len(data) % alignment: + data.append(pad) + + +def _padded_bytes(data: bytes, *, alignment: int, pad: bytes) -> bytes: + while len(data) % alignment: + data += pad + return data diff --git a/dimos/simulation/scene_assets/spec.py b/dimos/simulation/scene_assets/spec.py index 0350da693b..eb0e054954 100644 --- a/dimos/simulation/scene_assets/spec.py +++ b/dimos/simulation/scene_assets/spec.py @@ -53,7 +53,8 @@ class BrowserVisualSpec: """Browser-rendered asset policy.""" enabled: bool = True - output_name: str = "visual.glb" + target: str = "rerun" + output_name: str | None = None optimizer: str = "gltfpack" simplify_ratio: float = 0.3 simplify_error: float = 0.02 @@ -62,12 +63,91 @@ class BrowserVisualSpec: normalize_textures: bool = True quantize: bool = False use_gpu_instancing: bool = False + demote_required_extensions: tuple[str, ...] = ("KHR_texture_transform",) max_meshes: int = 200 max_materials: int = 50 max_textures: int = 750 max_vertices: int = 750_000 max_vertex_growth_ratio: float = 1.25 + @property + def target_key(self) -> str: + return self.target.strip().lower() + + @property + def artifact_name(self) -> str: + return self.output_name or f"visual.{self.target_key}.glb" + + +_BROWSER_VISUAL_PROFILES: dict[str, dict[str, Any]] = { + "rerun": { + "optimizer": "gltfpack", + "simplify_ratio": 0.3, + "simplify_error": 0.02, + "texture_format": None, + "max_texture_size": None, + "normalize_textures": True, + "quantize": False, + "use_gpu_instancing": False, + "demote_required_extensions": ("KHR_texture_transform",), + "max_meshes": 200, + "max_materials": 50, + "max_textures": 750, + "max_vertices": 750_000, + "max_vertex_growth_ratio": 1.25, + }, + "babylon": { + "optimizer": "gltfpack", + "simplify_ratio": 0.3, + "simplify_error": 0.02, + "texture_format": None, + "max_texture_size": None, + "normalize_textures": False, + "quantize": True, + "use_gpu_instancing": True, + "demote_required_extensions": (), + "max_meshes": 2_000, + "max_materials": 400, + "max_textures": 2_000, + "max_vertices": 2_000_000, + "max_vertex_growth_ratio": 2.0, + }, + "generic": { + "optimizer": "gltfpack", + "simplify_ratio": 0.3, + "simplify_error": 0.02, + "texture_format": None, + "max_texture_size": None, + "normalize_textures": False, + "quantize": False, + "use_gpu_instancing": False, + "demote_required_extensions": (), + "max_meshes": 500, + "max_materials": 100, + "max_textures": 1_000, + "max_vertices": 1_000_000, + "max_vertex_growth_ratio": 1.5, + }, +} +BROWSER_VISUAL_TARGETS = tuple(sorted(_BROWSER_VISUAL_PROFILES)) + + +def browser_visual_spec_for_target( + target: str, + **overrides: Any, +) -> BrowserVisualSpec: + """Build a visual cook spec for a named browser/viewer target.""" + target_key = target.strip().lower() + try: + values = dict(_BROWSER_VISUAL_PROFILES[target_key]) + except KeyError as exc: + known = ", ".join(BROWSER_VISUAL_TARGETS) + raise ValueError( + f"unknown browser visual target {target!r}; expected one of: {known}" + ) from exc + values.update(overrides) + return BrowserVisualSpec(target=target_key, **values) + @dataclass(frozen=True) class BrowserCollisionSpec: diff --git a/dimos/experimental/pimsim/scene/test_visual_glb.py b/dimos/simulation/scene_assets/test_glb.py similarity index 88% rename from dimos/experimental/pimsim/scene/test_visual_glb.py rename to dimos/simulation/scene_assets/test_glb.py index aa786d4116..8b2ddeb632 100644 --- a/dimos/experimental/pimsim/scene/test_visual_glb.py +++ b/dimos/simulation/scene_assets/test_glb.py @@ -21,11 +21,11 @@ from PIL import Image -from dimos.experimental.pimsim.scene.visual_glb import ( - _buffer_view_bytes, - _demote_required_extensions, - _normalize_embedded_textures, - _read_glb, +from dimos.simulation.scene_assets.glb import ( + buffer_view_bytes, + demote_required_extensions, + normalize_embedded_textures, + read_glb, ) @@ -37,14 +37,14 @@ def test_normalize_embedded_textures_preserves_glb_and_rewrites_png8( path = tmp_path / "textured.glb" _write_test_glb(path, geometry_payload, texture_payload) - count = _normalize_embedded_textures(path) + count = normalize_embedded_textures(path) assert count == 1 - gltf, bin_chunk = _read_glb(path) + gltf, bin_chunk = read_glb(path) assert gltf["images"][0]["mimeType"] == "image/png" - assert _buffer_view_bytes(bin_chunk, gltf["bufferViews"][0]) == geometry_payload + assert buffer_view_bytes(bin_chunk, gltf["bufferViews"][0]) == geometry_payload - normalized_texture = _buffer_view_bytes(bin_chunk, gltf["bufferViews"][1]) + normalized_texture = buffer_view_bytes(bin_chunk, gltf["bufferViews"][1]) with Image.open(BytesIO(normalized_texture)) as image: assert image.format == "PNG" assert image.mode == "RGB" @@ -60,10 +60,10 @@ def test_demote_required_extensions_keeps_extension_optional(tmp_path: Path) -> used_extensions=["KHR_texture_transform", "EXT_texture_webp"], ) - demoted = _demote_required_extensions(path, {"KHR_texture_transform"}) + demoted = demote_required_extensions(path, {"KHR_texture_transform"}) assert demoted == {"KHR_texture_transform"} - gltf, _ = _read_glb(path) + gltf, _ = read_glb(path) assert gltf["extensionsRequired"] == ["EXT_texture_webp"] assert "KHR_texture_transform" in gltf["extensionsUsed"]