diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5f6bd6e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,35 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ + +# Environment +.env +*.egg +boxer/ + +# Version control +.git/ +.gitignore + +# Coverage +.coverage +tests/htmlcov/ +htmlcov/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.DS_Store + +# Checkpoints +ckpts/ + +# Sample data +sample_data/ + +# Visualization output +output/ diff --git a/README.md b/README.md index 51a5c85..03535c1 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,33 @@ Boxer lifts 2D object detections into static, global, fused 3D oriented bounding ## Installation +### Docker + +Tested on a Ubuntu 24.04 Host Machine running a NVIDIA GPU with driver version *580.126.09* and the NVIDIA Container Toolkit version *1.19.0*. + +#### Requirements + +- [Docker](https://docs.docker.com/get-docker/) +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) + +#### Usage + +```bash +# build the image (only needed once) +docker build -f docker/Dockerfile -t boxer . + +# use the helper script to enter the container +bash scripts/boxer_docker.sh +bash scripts/boxer_docker.sh --help + +# OR: run a command directly without entering the container +bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --skip_viz +``` + +> Note: Model checkpoints and sample Aria data are **downloaded automatically on first run** and cached in `ckpts/` and `sample_data/` on your host. + +### Local install + We tested on MacOS (with mps acceleration) and Fedora (with CUDA acceleration). ```bash @@ -38,6 +65,14 @@ We host model checkpoints for BoxerNet, DinoV3 and OWLv2 on [HuggingFace](https: ```bash bash scripts/download_ckpts.sh ``` +
Docker + +Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *ckpts/* directory and downloads the checkpoints on first run, but if you want to download the checkpoints directly, you can run: + +```bash +bash scripts/boxer_docker.sh bash scripts/download_ckpts.sh +``` +
## Download Sample Project Aria Data @@ -53,6 +88,15 @@ Let's first start with Aria data. We host three sample [Project Aria](https://ww bash scripts/download_aria_data.sh ``` +
Docker + +Not necessarily needed, since the [scripts/boxer_docker.sh](scripts/boxer_docker.sh) helper script automatically mounts the *sample_data/* directory and downloads the data on first run, but if you want to download the data directly, you can run: + +```bash +bash scripts/boxer_docker.sh bash scripts/download_aria_data.sh +``` +
+ ## Demo #1: Hello World / Run BoxerNet in headless mode For this first demo, you do not need to have a display, so it will work if you are SSH'ed into a server. This will run BoxerNet on the first 90 images of a sequence from the test set of the [NymeriaPlus](https://arxiv.org/abs/2603.18496v1) dataset. This will confirm we can load up the data and run a forward passes with the model alongside the online tracker. @@ -62,6 +106,13 @@ Expected to take ~2 mins on mac MPS, <15 secs on CUDA. python run_boxer.py --input nym10_gen1 --max_n=90 --track ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python run_boxer.py --input nym10_gen1 --max_n=90 --track +``` +
+ This will dump out static images and a video to `outputs/nym10_gen1/`, e.g. something like this in `outputs/nym10_gen1/boxer_viz_current.png` ![Run Boxer Demo](docs/images/boxer_viz_current_hohen_gen1.jpg) @@ -72,6 +123,13 @@ For this demo, you need to have a valid display to have the GUI work. This demo python view_prompt.py --input nym10_gen1 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_prompt.py --input nym10_gen1 +``` +
+ You should see a window that looks like this: ![View Prompt Demo](docs/images/view_prompt_demo.jpg) @@ -92,6 +150,13 @@ Then, run the fusion script, which will by default search the above paths, to lo python view_fusion.py --input nym10_gen1 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_fusion.py --input nym10_gen1 +``` +
+ You should see a window like this: ![View Fusion Demo](docs/images/view_fusion_demo.jpg) @@ -104,6 +169,13 @@ Make sure to run Demo #1 above first to generate the 2DBB and 3DBB CSVs. Run the python view_tracker.py --input nym10_gen1 --autoplay ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python view_tracker.py --input nym10_gen1 --autoplay +``` +
+ ## Demo #5: Running on CA-1M data Extract a sample validation sequence (ca1m-val-42898570) to sample_data/ @@ -116,6 +188,14 @@ Run the view_prompt.py script on it: python view_prompt.py --input ca1m-val-42898570 ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python scripts/download_ca1m_sample.py +bash scripts/boxer_docker.sh python view_prompt.py --input ca1m-val-42898570 +``` +
+ You should see a window like this: ![CA-1M Prompt](docs/images/ca1m_screenshot.jpg) @@ -132,6 +212,14 @@ Run the view_prompt.py script on it: python view_prompt.py --input SUNRGBD ``` +
Docker + +```bash +bash scripts/boxer_docker.sh python scripts/download_omni3d_sample.py +bash scripts/boxer_docker.sh python view_prompt.py --input SUNRGBD +``` +
+ You should see a window like this: ![SUNRGBD Prompt](docs/images/sunrgbd_screenshot.jpg) @@ -146,6 +234,14 @@ Run just like the above examples: python view_prompt.py --input scene0707_00 ``` +
Docker + +```bash +# Place the scene directory in sample_data/ on the host first (e.g. sample_data/scene0707_00) +bash scripts/boxer_docker.sh python view_prompt.py --input scene0707_00 +``` +
+ ![ScanNet Prompt](docs/images/scannet_screenshot.jpg) ## run_boxer.py Usage Details @@ -221,6 +317,10 @@ boxer/ ├── run_boxer.py # Main entry point (headless detection + lifting) ├── view_prompt.py # Interactive demo (2D prompts + OWL text detection) ├── view_fusion.py # View pre-computed 3D bounding boxes +├── docker/ +│ ├── Dockerfile # Docker image definition +│ ├── docker-compose.yml # Compose config +│ └── entrypoint.sh # Container entrypoint ├── boxernet/ │ ├── boxernet.py # BoxerNet model (encode → cross-attend → predict) │ └── dinov3_wrapper.py # DINOv3 backbone wrapper @@ -234,6 +334,7 @@ boxer/ │ ├── omni_loader.py # Omni3D dataset loader │ └── scannet_loader.py # ScanNet dataset loader ├── scripts/ +│ ├── boxer_docker.sh # Docker helper script for running commands in the container │ ├── download_ckpts.sh # Download model checkpoints │ ├── download_aria_data.sh # Download sample Aria sequences │ ├── download_ca1m_sample.py # Extract CA-1M sample data diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..1c45969 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,65 @@ +# syntax=docker/dockerfile:1 + +FROM nvidia/cuda:12.8.1-runtime-ubuntu24.04 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + gcc \ + curl \ + wget \ + ffmpeg \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgl1 \ + libgl-dev \ + libegl1 \ + libegl-dev \ + libglx0 \ + libx11-6 \ + && rm -rf /var/lib/apt/lists/* + +# Install uv (https://docs.astral.sh/uv/) +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + +# Create virtual environment with uv +WORKDIR /boxer +RUN uv venv /boxer/venv --python python3.12 --seed +ENV VIRTUAL_ENV=/boxer/venv +ENV PATH="/boxer/venv/bin:${PATH}" + +RUN uv pip install \ + 'torch>=2.0' \ + numpy \ + opencv-python \ + tqdm \ + dill \ + Pillow + +# To support Project Aria loading +RUN uv pip install projectaria-tools || true + +# 3D interactive viewer for view_*.py scripts +RUN uv pip install \ + moderngl \ + moderngl-window \ + imgui-bundle + +# test deps +RUN uv pip install pytest pytest-cov + +COPY . /boxer +RUN chmod +x /boxer/docker/entrypoint.sh + +RUN echo 'source /boxer/venv/bin/activate' >> /root/.bashrc + +ENV PYTHONUNBUFFERED=1 +ENV DISPLAY=:0 + +ENTRYPOINT ["/boxer/docker/entrypoint.sh"] +CMD ["python", "run_boxer.py", "--help"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..d05d964 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,40 @@ +services: + boxer: + build: + context: .. + dockerfile: docker/Dockerfile + image: boxer + + # Pass through GPU access + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + environment: + - DISPLAY=${DISPLAY:-:0} + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics + - HOST_UID=${BOXER_UID:-1000} + - HOST_GID=${BOXER_GID:-1000} + + volumes: + # Checkpoints + - ../ckpts:/boxer/ckpts + + # Sample data + - ../sample_data:/boxer/sample_data + + # Visualization output + - ../output:/boxer/output + + # x11 unix socket + - /tmp/.X11-unix:/tmp/.X11-unix + + network_mode: host + + stdin_open: true + tty: true diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..f4b178c --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +CKPT_DIR="/boxer/ckpts" +SENTINEL="${CKPT_DIR}/.downloaded" + +if [ ! -f "$SENTINEL" ]; then + echo "WARNING: Checkpoints not found, proceeding to download..." + bash /boxer/scripts/download_ckpts.sh + touch "$SENTINEL" + echo "INFO: Checkpoints downloaded and saved to $CKPT_DIR/" +fi + +DATA_DIR="/boxer/sample_data" +DATA_SENTINEL="${DATA_DIR}/.downloaded" + +if [ ! -f "$DATA_SENTINEL" ]; then + echo "INFO: Sample Aria data not found, downloading..." + bash /boxer/scripts/download_aria_data.sh + touch "$DATA_SENTINEL" + echo "INFO: Sample data downloaded to $DATA_DIR/" +fi + +if [ -n "${HOST_UID:-}" ] && [ -n "${HOST_GID:-}" ]; then + chown -R "$HOST_UID:$HOST_GID" /boxer/ckpts /boxer/sample_data /boxer/output 2>/dev/null || true +fi + +exec "$@" diff --git a/scripts/boxer_docker.sh b/scripts/boxer_docker.sh new file mode 100644 index 0000000..4a462ce --- /dev/null +++ b/scripts/boxer_docker.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then + cat < /dev/null 2>&1; then + XHOST_GRANTED=true + fi +fi + +docker compose -f "$SCRIPT_DIR/../docker/docker-compose.yml" run --rm --remove-orphans boxer "${@:-bash}" + +if [ "$XHOST_GRANTED" = true ]; then + xhost -local:docker > /dev/null 2>&1 || true +fi diff --git a/utils/viewer_3d.py b/utils/viewer_3d.py index 9b219cf..e327290 100644 --- a/utils/viewer_3d.py +++ b/utils/viewer_3d.py @@ -6009,7 +6009,12 @@ def _stop_recording(self) -> None: n = self._record_frame_idx print(f"[REC] Recording stopped — {n} frames captured") if n > 0: - output_dir = os.path.expanduser("~/Desktop") + output_dir = os.path.join(os.path.expanduser(EVAL_PATH), self._seq_name) + os.makedirs(output_dir, exist_ok=True) + # fallback to ~/Desktop + if not os.path.isdir(output_dir): + desktop = os.path.expanduser("~/Desktop") + output_dir = desktop if os.path.isdir(desktop) else os.path.expanduser("~") base_name = f"viz_tracker_{self._seq_name}" output_name = f"{base_name}.mp4" out_path = os.path.join(output_dir, output_name)