From abdbfdf67cc3bb6d2aeab29c215a9cdfad610115 Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 01:57:11 +0200 Subject: [PATCH 1/7] fix: update output directory logic in TrackerViewer --- utils/viewer_3d.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/viewer_3d.py b/utils/viewer_3d.py index 9b219cf..c3db9d8 100644 --- a/utils/viewer_3d.py +++ b/utils/viewer_3d.py @@ -6009,7 +6009,8 @@ def _stop_recording(self) -> None: n = self._record_frame_idx print(f"[REC] Recording stopped — {n} frames captured") if n > 0: - output_dir = os.path.expanduser("~/Desktop") + desktop = os.path.expanduser("~/Desktop") + output_dir = desktop if os.path.isdir(desktop) else os.path.expanduser("~") base_name = f"viz_tracker_{self._seq_name}" output_name = f"{base_name}.mp4" out_path = os.path.join(output_dir, output_name) From c6fc17f8b899c4751f74f29c447aa056e0c4ecb2 Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:08:18 +0200 Subject: [PATCH 2/7] feat: add Docker support --- .dockerignore | 35 ++++++++++++++++++++++ docker/Dockerfile | 63 +++++++++++++++++++++++++++++++++++++++ docker/docker-compose.yml | 38 +++++++++++++++++++++++ docker/entrypoint.sh | 25 ++++++++++++++++ scripts/boxer_docker.sh | 33 ++++++++++++++++++++ 5 files changed, 194 insertions(+) create mode 100644 .dockerignore create mode 100644 docker/Dockerfile create mode 100644 docker/docker-compose.yml create mode 100644 docker/entrypoint.sh create mode 100644 scripts/boxer_docker.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5f6bd6e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,35 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ + +# Environment +.env +*.egg +boxer/ + +# Version control +.git/ +.gitignore + +# Coverage +.coverage +tests/htmlcov/ +htmlcov/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.DS_Store + +# Checkpoints +ckpts/ + +# Sample data +sample_data/ + +# Visualization output +output/ diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..5e03383 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,63 @@ +# syntax=docker/dockerfile:1 + +FROM nvidia/cuda:12.8.1-runtime-ubuntu24.04 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + gcc \ + curl \ + wget \ + ffmpeg \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgl1 \ + libgl-dev \ + libegl1 \ + libegl-dev \ + libglx0 \ + libx11-6 \ + && rm -rf /var/lib/apt/lists/* + +# Install uv (https://docs.astral.sh/uv/) +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + +# Create virtual environment with uv +WORKDIR /boxer +RUN uv venv /boxer/venv --python python3.12 +ENV VIRTUAL_ENV=/boxer/venv +ENV PATH="/boxer/venv/bin:${PATH}" + +RUN uv pip install \ + 'torch>=2.0' \ + numpy \ + opencv-python \ + tqdm \ + dill \ + Pillow + +# To support Project Aria loading +RUN uv pip install projectaria-tools || true + +# 3D interactive viewer for view_*.py scripts +RUN uv pip install \ + moderngl \ + moderngl-window \ + imgui-bundle + + +COPY . /boxer +RUN chmod +x /boxer/docker/entrypoint.sh + +RUN echo 'source /boxer/venv/bin/activate' >> /root/.bashrc + +ENV PYTHONUNBUFFERED=1 +ENV DISPLAY=:0 + +ENTRYPOINT ["/boxer/docker/entrypoint.sh"] +CMD ["python", "run_boxer.py", "--help"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..1a59649 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,38 @@ +services: + boxer: + build: + context: .. + dockerfile: docker/Dockerfile + image: boxer + + # Pass through GPU access + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + environment: + - DISPLAY=${DISPLAY:-:0} + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics + + volumes: + # Checkpoints + - ../ckpts:/boxer/ckpts + + # Sample data + - ../sample_data:/boxer/sample_data + + # Visualization output + - ../output:/boxer/output + + # x11 unix socket + - /tmp/.X11-unix:/tmp/.X11-unix + + network_mode: host + + stdin_open: true + tty: true diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..f8ad09c --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +CKPT_DIR="/boxer/ckpts" +SENTINEL="${CKPT_DIR}/.downloaded" + +if [ ! -f "$SENTINEL" ]; then + echo "WARNING: Checkpoints not found, proceeding to download..." + bash /boxer/scripts/download_ckpts.sh + touch "$SENTINEL" + echo "INFO: Checkpoints downloaded and saved to $CKPT_DIR/" +fi + +DATA_DIR="/boxer/sample_data" +DATA_SENTINEL="${DATA_DIR}/.downloaded" + +if [ ! -f "$DATA_SENTINEL" ]; then + echo "INFO: Sample Aria data not found, downloading..." + bash /boxer/scripts/download_aria_data.sh + touch "$DATA_SENTINEL" + echo "INFO: Sample data downloaded to $DATA_DIR/" +fi + +exec "$@" diff --git a/scripts/boxer_docker.sh b/scripts/boxer_docker.sh new file mode 100644 index 0000000..d27549a --- /dev/null +++ b/scripts/boxer_docker.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then + cat < /dev/null 2>&1; then + XHOST_GRANTED=true + fi +fi + +docker compose -f "$SCRIPT_DIR/../docker/docker-compose.yml" run --rm --remove-orphans boxer "${@:-bash}" + +if [ "$XHOST_GRANTED" = true ]; then + xhost -local:docker > /dev/null 2>&1 || true +fi From 2f6e4b8ccfa5c0a679d45db77ac57fddddd7aa2d Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:11:55 +0200 Subject: [PATCH 3/7] fix: add user config to docker --- docker/docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 1a59649..38b9615 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -19,6 +19,8 @@ services: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics + user: "${UID:-1000}:${GID:-1000}" + volumes: # Checkpoints - ../ckpts:/boxer/ckpts From e6174890a3e54e4be7f79aa0e8bdfed7722d980a Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:55:16 +0200 Subject: [PATCH 4/7] feat: enhance docker support with UID/GID handling and update output dir logic --- docker/Dockerfile | 4 +++- docker/docker-compose.yml | 4 ++-- docker/entrypoint.sh | 4 ++++ scripts/boxer_docker.sh | 6 ++++++ utils/viewer_3d.py | 8 ++++++-- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 5e03383..1c45969 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,7 +29,7 @@ ENV PATH="/root/.local/bin:${PATH}" # Create virtual environment with uv WORKDIR /boxer -RUN uv venv /boxer/venv --python python3.12 +RUN uv venv /boxer/venv --python python3.12 --seed ENV VIRTUAL_ENV=/boxer/venv ENV PATH="/boxer/venv/bin:${PATH}" @@ -50,6 +50,8 @@ RUN uv pip install \ moderngl-window \ imgui-bundle +# test deps +RUN uv pip install pytest pytest-cov COPY . /boxer RUN chmod +x /boxer/docker/entrypoint.sh diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 38b9615..d05d964 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -18,8 +18,8 @@ services: - DISPLAY=${DISPLAY:-:0} - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics - - user: "${UID:-1000}:${GID:-1000}" + - HOST_UID=${BOXER_UID:-1000} + - HOST_GID=${BOXER_GID:-1000} volumes: # Checkpoints diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index f8ad09c..f4b178c 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -22,4 +22,8 @@ if [ ! -f "$DATA_SENTINEL" ]; then echo "INFO: Sample data downloaded to $DATA_DIR/" fi +if [ -n "${HOST_UID:-}" ] && [ -n "${HOST_GID:-}" ]; then + chown -R "$HOST_UID:$HOST_GID" /boxer/ckpts /boxer/sample_data /boxer/output 2>/dev/null || true +fi + exec "$@" diff --git a/scripts/boxer_docker.sh b/scripts/boxer_docker.sh index d27549a..4a462ce 100644 --- a/scripts/boxer_docker.sh +++ b/scripts/boxer_docker.sh @@ -18,6 +18,12 @@ EOF fi SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT_DIR="$SCRIPT_DIR/.." + +export BOXER_UID=$(id -u) +export BOXER_GID=$(id -g) + +mkdir -p "$ROOT_DIR/ckpts" "$ROOT_DIR/sample_data" "$ROOT_DIR/output" XHOST_GRANTED=false if [ -n "$DISPLAY" ]; then diff --git a/utils/viewer_3d.py b/utils/viewer_3d.py index c3db9d8..e327290 100644 --- a/utils/viewer_3d.py +++ b/utils/viewer_3d.py @@ -6009,8 +6009,12 @@ def _stop_recording(self) -> None: n = self._record_frame_idx print(f"[REC] Recording stopped — {n} frames captured") if n > 0: - desktop = os.path.expanduser("~/Desktop") - output_dir = desktop if os.path.isdir(desktop) else os.path.expanduser("~") + output_dir = os.path.join(os.path.expanduser(EVAL_PATH), self._seq_name) + os.makedirs(output_dir, exist_ok=True) + # fallback to ~/Desktop + if not os.path.isdir(output_dir): + desktop = os.path.expanduser("~/Desktop") + output_dir = desktop if os.path.isdir(desktop) else os.path.expanduser("~") base_name = f"viz_tracker_{self._seq_name}" output_name = f"{base_name}.mp4" out_path = os.path.join(output_dir, output_name) From 93a9106f2c893df06f7e6123470d4f0b264640f3 Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 03:17:27 +0200 Subject: [PATCH 5/7] doc: update readme with detailed docker usage instructions and examples --- README.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/README.md b/README.md index 51a5c85..3f4f288 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,33 @@ Boxer lifts 2D object detections into static, global, fused 3D oriented bounding ## Installation +### Docker + +Tested on a Ubuntu 24.04 Host Machine running a NVIDIA GPU with driver version *580.126.09* + +#### Requirements + +- [Docker](https://docs.docker.com/get-docker/) +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) + +#### Usage + +```bash +# build the image (only needed once) +docker build -f docker/Dockerfile -t boxer . + +# use the helper script to enter the container +bash scripts/boxer_docker.sh +bash scripts/boxer_docker.sh --help + +# OR: run a command directly without entering the container +bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --skip_viz +``` + +> Note: Model checkpoints and sample Aria data are **downloaded automatically on first run** and cached in `ckpts/` and `sample_data/` on your host. + +### Local install + We tested on MacOS (with mps acceleration) and Fedora (with CUDA acceleration). ```bash @@ -38,6 +65,14 @@ We host model checkpoints for BoxerNet, DinoV3 and OWLv2 on [HuggingFace](https: ```bash bash scripts/download_ckpts.sh ``` +
Docker + +Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *ckpts/* directory and downloads the checkpoints on first run, but if you want to download the checkpoints directly, you can run: + +```bash +bash scripts/boxer_docker.sh bash scripts/download_ckpts.sh +``` +
## Download Sample Project Aria Data @@ -53,6 +88,15 @@ Let's first start with Aria data. We host three sample [Project Aria](https://ww bash scripts/download_aria_data.sh ``` +
Docker + +Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *sample_data/* directory and downloads the data on first run, but if you want to download the data directly, you can run: + +```bash +bash scripts/boxer_docker.sh bash scripts/download_aria_data.sh +``` +
+ ## Demo #1: Hello World / Run BoxerNet in headless mode For this first demo, you do not need to have a display, so it will work if you are SSH'ed into a server. This will run BoxerNet on the first 90 images of a sequence from the test set of the [NymeriaPlus](https://arxiv.org/abs/2603.18496v1) dataset. This will confirm we can load up the data and run a forward passes with the model alongside the online tracker. @@ -62,6 +106,13 @@ Expected to take ~2 mins on mac MPS, <15 secs on CUDA. python run_boxer.py --input nym10_gen1 --max_n=90 --track ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --max_n=90 --track +``` +
+ This will dump out static images and a video to `outputs/nym10_gen1/`, e.g. something like this in `outputs/nym10_gen1/boxer_viz_current.png` ![Run Boxer Demo](docs/images/boxer_viz_current_hohen_gen1.jpg) @@ -72,6 +123,13 @@ For this demo, you need to have a valid display to have the GUI work. This demo python view_prompt.py --input nym10_gen1 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_prompt.py --input nym10_gen1 +``` +
+ You should see a window that looks like this: ![View Prompt Demo](docs/images/view_prompt_demo.jpg) @@ -92,6 +150,13 @@ Then, run the fusion script, which will by default search the above paths, to lo python view_fusion.py --input nym10_gen1 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_fusion.py --input nym10_gen1 +``` +
+ You should see a window like this: ![View Fusion Demo](docs/images/view_fusion_demo.jpg) @@ -104,6 +169,13 @@ Make sure to run Demo #1 above first to generate the 2DBB and 3DBB CSVs. Run the python view_tracker.py --input nym10_gen1 --autoplay ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_tracker.py --input nym10_gen1 --autoplay +``` +
+ ## Demo #5: Running on CA-1M data Extract a sample validation sequence (ca1m-val-42898570) to sample_data/ @@ -116,6 +188,14 @@ Run the view_prompt.py script on it: python view_prompt.py --input ca1m-val-42898570 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python scripts/download_ca1m_sample.py +bash scripts/boxer_docker.sh python view_prompt.py --input ca1m-val-42898570 +``` +
+ You should see a window like this: ![CA-1M Prompt](docs/images/ca1m_screenshot.jpg) @@ -132,6 +212,14 @@ Run the view_prompt.py script on it: python view_prompt.py --input SUNRGBD ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python scripts/download_omni3d_sample.py +bash scripts/boxer_docker.sh python view_prompt.py --input SUNRGBD +``` +
+ You should see a window like this: ![SUNRGBD Prompt](docs/images/sunrgbd_screenshot.jpg) @@ -146,6 +234,14 @@ Run just like the above examples: python view_prompt.py --input scene0707_00 ``` +
Docker + +```bash +# Place the scene directory in sample_data/ on the host first (e.g. sample_data/scene0707_00) +bash scripts/boxer_docker.sh python view_prompt.py --input scene0707_00 +``` +
+ ![ScanNet Prompt](docs/images/scannet_screenshot.jpg) ## run_boxer.py Usage Details From 2c03d2ee4f60a2b0bbf1acc9d3a104aeab1feb27 Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 03:22:18 +0200 Subject: [PATCH 6/7] doc: add Docker-related files and helper script to repo overview --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 3f4f288..61001c2 100644 --- a/README.md +++ b/README.md @@ -317,6 +317,10 @@ boxer/ ├── run_boxer.py # Main entry point (headless detection + lifting) ├── view_prompt.py # Interactive demo (2D prompts + OWL text detection) ├── view_fusion.py # View pre-computed 3D bounding boxes +├── docker/ +│ ├── Dockerfile # Docker image definition +│ ├── docker-compose.yml # Compose config +│ └── entrypoint.sh # Container entrypoint ├── boxernet/ │ ├── boxernet.py # BoxerNet model (encode → cross-attend → predict) │ └── dinov3_wrapper.py # DINOv3 backbone wrapper @@ -330,6 +334,7 @@ boxer/ │ ├── omni_loader.py # Omni3D dataset loader │ └── scannet_loader.py # ScanNet dataset loader ├── scripts/ +│ ├── boxer_docker.sh # Docker helper script for running commands in the container │ ├── download_ckpts.sh # Download model checkpoints │ ├── download_aria_data.sh # Download sample Aria sequences │ ├── download_ca1m_sample.py # Extract CA-1M sample data From 6a4716664e3f1e8f21cd52b8029d7107007b8652 Mon Sep 17 00:00:00 2001 From: Andreas Loeffler <73336148+andreas-loeffler@users.noreply.github.com> Date: Sat, 11 Apr 2026 03:24:51 +0200 Subject: [PATCH 7/7] doc: update docker section to include tested nvidia container toolkit version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 61001c2..03535c1 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Boxer lifts 2D object detections into static, global, fused 3D oriented bounding ### Docker -Tested on a Ubuntu 24.04 Host Machine running a NVIDIA GPU with driver version *580.126.09* +Tested on a Ubuntu 24.04 Host Machine running a NVIDIA GPU with driver version *580.126.09* and the NVIDIA Container Toolkit version *1.19.0*. #### Requirements