diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..5f6bd6e
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,35 @@
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.pytest_cache/
+
+# Environment
+.env
+*.egg
+boxer/
+
+# Version control
+.git/
+.gitignore
+
+# Coverage
+.coverage
+tests/htmlcov/
+htmlcov/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+.DS_Store
+
+# Checkpoints
+ckpts/
+
+# Sample data
+sample_data/
+
+# Visualization output
+output/
diff --git a/README.md b/README.md
index 51a5c85..03535c1 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,33 @@ Boxer lifts 2D object detections into static, global, fused 3D oriented bounding
## Installation
+### Docker
+
+Tested on a Ubuntu 24.04 Host Machine running a NVIDIA GPU with driver version *580.126.09* and the NVIDIA Container Toolkit version *1.19.0*.
+
+#### Requirements
+
+- [Docker](https://docs.docker.com/get-docker/)
+- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+
+#### Usage
+
+```bash
+# build the image (only needed once)
+docker build -f docker/Dockerfile -t boxer .
+
+# use the helper script to enter the container
+bash scripts/boxer_docker.sh
+bash scripts/boxer_docker.sh --help
+
+# OR: run a command directly without entering the container
+bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --skip_viz
+```
+
+> Note: Model checkpoints and sample Aria data are **downloaded automatically on first run** and cached in `ckpts/` and `sample_data/` on your host.
+
+### Local install
+
We tested on MacOS (with mps acceleration) and Fedora (with CUDA acceleration).
```bash
@@ -38,6 +65,14 @@ We host model checkpoints for BoxerNet, DinoV3 and OWLv2 on [HuggingFace](https:
```bash
bash scripts/download_ckpts.sh
```
+Docker
+
+Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *ckpts/* directory and downloads the checkpoints on first run, but if you want to download the checkpoints directly, you can run:
+
+```bash
+bash scripts/boxer_docker.sh bash scripts/download_ckpts.sh
+```
+
## Download Sample Project Aria Data
@@ -53,6 +88,15 @@ Let's first start with Aria data. We host three sample [Project Aria](https://ww
bash scripts/download_aria_data.sh
```
+Docker
+
+Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *sample_data/* directory and downloads the data on first run, but if you want to download the data directly, you can run:
+
+```bash
+bash scripts/boxer_docker.sh bash scripts/download_aria_data.sh
+```
+
+
## Demo #1: Hello World / Run BoxerNet in headless mode
For this first demo, you do not need to have a display, so it will work if you are SSH'ed into a server. This will run BoxerNet on the first 90 images of a sequence from the test set of the [NymeriaPlus](https://arxiv.org/abs/2603.18496v1) dataset. This will confirm we can load up the data and run a forward passes with the model alongside the online tracker.
@@ -62,6 +106,13 @@ Expected to take ~2 mins on mac MPS, <15 secs on CUDA.
python run_boxer.py --input nym10_gen1 --max_n=90 --track
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --max_n=90 --track
+```
+
+
This will dump out static images and a video to `outputs/nym10_gen1/`, e.g. something like this in `outputs/nym10_gen1/boxer_viz_current.png`

@@ -72,6 +123,13 @@ For this demo, you need to have a valid display to have the GUI work. This demo
python view_prompt.py --input nym10_gen1
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python view_prompt.py --input nym10_gen1
+```
+
+
You should see a window that looks like this:

@@ -92,6 +150,13 @@ Then, run the fusion script, which will by default search the above paths, to lo
python view_fusion.py --input nym10_gen1
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python view_fusion.py --input nym10_gen1
+```
+
+
You should see a window like this:

@@ -104,6 +169,13 @@ Make sure to run Demo #1 above first to generate the 2DBB and 3DBB CSVs. Run the
python view_tracker.py --input nym10_gen1 --autoplay
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python view_tracker.py --input nym10_gen1 --autoplay
+```
+
+
## Demo #5: Running on CA-1M data
Extract a sample validation sequence (ca1m-val-42898570) to sample_data/
@@ -116,6 +188,14 @@ Run the view_prompt.py script on it:
python view_prompt.py --input ca1m-val-42898570
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python scripts/download_ca1m_sample.py
+bash scripts/boxer_docker.sh python view_prompt.py --input ca1m-val-42898570
+```
+
+
You should see a window like this:

@@ -132,6 +212,14 @@ Run the view_prompt.py script on it:
python view_prompt.py --input SUNRGBD
```
+Docker
+
+```bash
+bash scripts/boxer_docker.sh python scripts/download_omni3d_sample.py
+bash scripts/boxer_docker.sh python view_prompt.py --input SUNRGBD
+```
+
+
You should see a window like this:

@@ -146,6 +234,14 @@ Run just like the above examples:
python view_prompt.py --input scene0707_00
```
+Docker
+
+```bash
+# Place the scene directory in sample_data/ on the host first (e.g. sample_data/scene0707_00)
+bash scripts/boxer_docker.sh python view_prompt.py --input scene0707_00
+```
+
+

## run_boxer.py Usage Details
@@ -221,6 +317,10 @@ boxer/
├── run_boxer.py # Main entry point (headless detection + lifting)
├── view_prompt.py # Interactive demo (2D prompts + OWL text detection)
├── view_fusion.py # View pre-computed 3D bounding boxes
+├── docker/
+│ ├── Dockerfile # Docker image definition
+│ ├── docker-compose.yml # Compose config
+│ └── entrypoint.sh # Container entrypoint
├── boxernet/
│ ├── boxernet.py # BoxerNet model (encode → cross-attend → predict)
│ └── dinov3_wrapper.py # DINOv3 backbone wrapper
@@ -234,6 +334,7 @@ boxer/
│ ├── omni_loader.py # Omni3D dataset loader
│ └── scannet_loader.py # ScanNet dataset loader
├── scripts/
+│ ├── boxer_docker.sh # Docker helper script for running commands in the container
│ ├── download_ckpts.sh # Download model checkpoints
│ ├── download_aria_data.sh # Download sample Aria sequences
│ ├── download_ca1m_sample.py # Extract CA-1M sample data
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..1c45969
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,65 @@
+# syntax=docker/dockerfile:1
+
+FROM nvidia/cuda:12.8.1-runtime-ubuntu24.04
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ python3.12 \
+ python3.12-venv \
+ python3.12-dev \
+ gcc \
+ curl \
+ wget \
+ ffmpeg \
+ libglib2.0-0 \
+ libsm6 \
+ libxext6 \
+ libxrender1 \
+ libgl1 \
+ libgl-dev \
+ libegl1 \
+ libegl-dev \
+ libglx0 \
+ libx11-6 \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install uv (https://docs.astral.sh/uv/)
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:${PATH}"
+
+# Create virtual environment with uv
+WORKDIR /boxer
+RUN uv venv /boxer/venv --python python3.12 --seed
+ENV VIRTUAL_ENV=/boxer/venv
+ENV PATH="/boxer/venv/bin:${PATH}"
+
+RUN uv pip install \
+ 'torch>=2.0' \
+ numpy \
+ opencv-python \
+ tqdm \
+ dill \
+ Pillow
+
+# To support Project Aria loading
+RUN uv pip install projectaria-tools || true
+
+# 3D interactive viewer for view_*.py scripts
+RUN uv pip install \
+ moderngl \
+ moderngl-window \
+ imgui-bundle
+
+# test deps
+RUN uv pip install pytest pytest-cov
+
+COPY . /boxer
+RUN chmod +x /boxer/docker/entrypoint.sh
+
+RUN echo 'source /boxer/venv/bin/activate' >> /root/.bashrc
+
+ENV PYTHONUNBUFFERED=1
+ENV DISPLAY=:0
+
+ENTRYPOINT ["/boxer/docker/entrypoint.sh"]
+CMD ["python", "run_boxer.py", "--help"]
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..d05d964
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,40 @@
+services:
+ boxer:
+ build:
+ context: ..
+ dockerfile: docker/Dockerfile
+ image: boxer
+
+ # Pass through GPU access
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: all
+ capabilities: [gpu]
+
+ environment:
+ - DISPLAY=${DISPLAY:-:0}
+ - NVIDIA_VISIBLE_DEVICES=all
+ - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
+ - HOST_UID=${BOXER_UID:-1000}
+ - HOST_GID=${BOXER_GID:-1000}
+
+ volumes:
+ # Checkpoints
+ - ../ckpts:/boxer/ckpts
+
+ # Sample data
+ - ../sample_data:/boxer/sample_data
+
+ # Visualization output
+ - ../output:/boxer/output
+
+ # x11 unix socket
+ - /tmp/.X11-unix:/tmp/.X11-unix
+
+ network_mode: host
+
+ stdin_open: true
+ tty: true
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 0000000..f4b178c
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -e
+
+CKPT_DIR="/boxer/ckpts"
+SENTINEL="${CKPT_DIR}/.downloaded"
+
+if [ ! -f "$SENTINEL" ]; then
+ echo "WARNING: Checkpoints not found, proceeding to download..."
+ bash /boxer/scripts/download_ckpts.sh
+ touch "$SENTINEL"
+ echo "INFO: Checkpoints downloaded and saved to $CKPT_DIR/"
+fi
+
+DATA_DIR="/boxer/sample_data"
+DATA_SENTINEL="${DATA_DIR}/.downloaded"
+
+if [ ! -f "$DATA_SENTINEL" ]; then
+ echo "INFO: Sample Aria data not found, downloading..."
+ bash /boxer/scripts/download_aria_data.sh
+ touch "$DATA_SENTINEL"
+ echo "INFO: Sample data downloaded to $DATA_DIR/"
+fi
+
+if [ -n "${HOST_UID:-}" ] && [ -n "${HOST_GID:-}" ]; then
+ chown -R "$HOST_UID:$HOST_GID" /boxer/ckpts /boxer/sample_data /boxer/output 2>/dev/null || true
+fi
+
+exec "$@"
diff --git a/scripts/boxer_docker.sh b/scripts/boxer_docker.sh
new file mode 100644
index 0000000..4a462ce
--- /dev/null
+++ b/scripts/boxer_docker.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
+ cat < /dev/null 2>&1; then
+ XHOST_GRANTED=true
+ fi
+fi
+
+docker compose -f "$SCRIPT_DIR/../docker/docker-compose.yml" run --rm --remove-orphans boxer "${@:-bash}"
+
+if [ "$XHOST_GRANTED" = true ]; then
+ xhost -local:docker > /dev/null 2>&1 || true
+fi
diff --git a/utils/viewer_3d.py b/utils/viewer_3d.py
index 9b219cf..e327290 100644
--- a/utils/viewer_3d.py
+++ b/utils/viewer_3d.py
@@ -6009,7 +6009,12 @@ def _stop_recording(self) -> None:
n = self._record_frame_idx
print(f"[REC] Recording stopped — {n} frames captured")
if n > 0:
- output_dir = os.path.expanduser("~/Desktop")
+ output_dir = os.path.join(os.path.expanduser(EVAL_PATH), self._seq_name)
+ os.makedirs(output_dir, exist_ok=True)
+ # fallback to ~/Desktop
+ if not os.path.isdir(output_dir):
+ desktop = os.path.expanduser("~/Desktop")
+ output_dir = desktop if os.path.isdir(desktop) else os.path.expanduser("~")
base_name = f"viz_tracker_{self._seq_name}"
output_name = f"{base_name}.mp4"
out_path = os.path.join(output_dir, output_name)