diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..bf261c7
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,24 @@
+.git
+*.egg-info
+oarphpy.egg-info
+*.pyc
+*/**/__pycache__
+.eggs
+.pytest_cache
+eggs
+build
+docs/build
+ext_data
+external_test_fixtures
+notebooks/**/*
+test_*
+*.bag
+dataroot
+test_run_output
+
+dataroot_costco
+my_html_viz
+trimeshtast*
+colmap*
+cv_charuco
+history*
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..6e1d71e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fa4adc0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+*.egg-info
+*.pyc
+*/**/__pycache__
+.DS_Store
+._.DS_Store
+.eggs
+.vscode
+*/**/._*
+eggs
+build
+dataroot
+ext_data
+external_test_fixtures
+notebooks/.ipynb_checkpoints
+notebooks/sparkmonitor_kernelextension.log
+notebooks/pybullet_debug_out.mp4
+psegs_test
diff --git a/LICENSE b/LICENSE
index 3821977..b52684e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2020 Maintainers of PSegs
+Copyright 2023 Maintainers of PSegs
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index d3ff1f3..74d245e 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,118 @@
-(Drafting)
+# PSegs: Perception Segments Library
 
+[![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+# PSegs Extensions
+
+notes about extensions and links to them
+
+readme!
+
+Things to finalize:
+ * Transforms: want some unit tests of chaining and demo of that
+ * frames: maybe | -> .
+ * maybe drop the timestamp member from cuboids, camera images, point clouds etc
+ * For a segment debug: fused painted cloud in plotly! sampeled to 100k pts obvi
+ * PointCloud: have column names for cloud (x, y, z, intensity, semantic [, instance])
+  * for nusc and mebbe others: should cloud labels be separate URIs?
+
+
+frames:
+ * http://www.mech.sharif.ir/c/document_library/get_file?uuid=5a4bb247-1430-4e46-942c-d692dead831f&groupId=14040
+ * http://wiki.ros.org/tf/Overview/Transformations
+
+
+ * trailer: 
+     * show a histogram with examples of distance / orientation with samples over ALL datasets
+     * show perf!  show time to fetch frames using Spark + Parquet
+     * show a video of one camera with debug overlays.  maybe one with delauny lidar too (!)
+     * show a frame HTML with 3d interface
+     * show new things: argo associated bikes, delauny lidar, occlusion tree
+ * supported datasets, how to get a blurb and **stats** on each of them.  prolly render histo reports for each.
+ * data structures:
+    * StampedDatum
+    * Frame
+
+
+cli
+  * install as part of pip install psegs
+  * dataset:
+     * stages:
+         - download (might be manual); download test fixtures
+         - place (symlinks or whatever)
+         - test (need way to check skipped tests)
+         - demo (show one segment)
+         - convert (all to sd table)
+
+
+
+
+
+cd /tmp
+pip3 install rosdep rospkg rosinstall_generator rosinstall wstool vcstools catkin_tools catkin_pkg
+
+rosdep init
+rosdep update
+mkdir ros_catkin_ws
+cd ros_catkin_ws
+catkin config --init -DCMAKE_BUILD_TYPE=Release -DROS_PYTHON_VERSION=3 --blacklist rqt_rviz rviz_plugin_tutorials librviz_tutorial --install
+
+rosinstall_generator desktop_full --rosdistro melodic --deps --tar > melodic-desktop-full.rosinstall
+wstool init -j8 src melodic-desktop-full.rosinstall
+
+export ROS_PYTHON_VERSION=3
+pip3 install -U -f https://extras.wxpython.org/wxPython4/extras/linux/gtk3/ubuntu-18.04 wxPython
+
+
+#!/bin/bash
+#Check whether root
+if [ $(whoami) != root ]; then
+    echo You must be root or use sudo to install packages.
+    return
+fi
+
+#Call apt-get for each package
+for pkg in "$@"
+do
+    echo "Installing $pkg"
+    sudo apt-get -my install $pkg >> install.log
+done
+
+
+chmod +x install_skip
+
+#./install_skip `rosdep check --from-paths src --ignore-src | grep python | sed -e "s/^apt\t//g" | sed -z "s/\n/ /g" | sed -e "s/python/python3/g"`
+
+echo 'Etc/UTC' > /etc/timezone && \
+    ln -s /usr/share/zoneinfo/Etc/UTC /etc/localtime && \
+    apt-get update && apt-get install -q -y tzdata
+
+apt-get install -y python3-psutil python3-catkin-pkg python3-empy python3-numpy python3-rospkg python3-yaml python3-pyqt5.qtwebkit python3-mock python3-rospkg python3-paramiko python3-cairo python3-pil python3-defusedxml python3-sip-dev python3-pyqt5.qtopengl python3-matplotlib python3-pyqt5 python3-pyqt5.qtsvg python3-sip-dev python3-pydot python3-pygraphviz python3-netifaces python3-yaml python3-opencv python3-catkin-pkg python3-rosdep python3-coverage python3-gnupg python3-lxml python3-mock python3-opengl python3-empy python3-nose
+
+# apt-get install -y python3-wxtools
+
+rosdep install --from-paths src --ignore-src -y --skip-keys="`rosdep check --from-paths src --ignore-src | grep python | sed -e "s/^apt\t//g" | sed -z "s/\n/ /g"`"
+find . -type f -exec sed -i 's/\/usr\/bin\/env[ ]*python/\/usr\/bin\/env python3/g' {} +
+
+
+cd src && git clone https://github.com/RobotWebTools/rosbridge_suite && git clone https://github.com/GT-RAIL/rosauth && cd -
+
+# https://github.com/RobotWebTools/rosbridge_suite/blob/ad63eb1f7a05d8d52470ac1364b033c74683bbbf/rosbridge_server/package.xml#L18
+apt-get install -y \
+    python3-twisted python3-autobahn python-backports.ssl-match-hostname python3-tornado python3-bson
+
+catkin build
+
+
+
+
+#############################
+# NOPE NOPE
+cd /tmp
+apt-get install -y lsb-release
+sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
+curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
+apt-get update
+apt-get install -y ros-melodic-rosbridge-server
+
+roslaunch rosbridge_server rosbridge_websocket.launch
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..4e90e80
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,125 @@
+# Copyright 2023 Maintainers of PSegs
+
+# syntax=docker/dockerfile:1
+FROM oarphpy/full:0.1.1
+
+## Include banner
+COPY docker/bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+RUN \
+  --mount=type=cache,target=/var/cache/apt \
+  --mount=type=cache,target=/root/.cache/pip \
+  apt-get update && \
+  apt-get install -y \
+    git \
+    wget \
+  && \
+  pip3 install --upgrade pip
+
+# # Pytorch3d
+# RUN \
+#   pip3 install torch==1.9.0+cu111 torchvision==0.9.2+cu111 torchaudio==0.8.2 && \
+#   pip3 install pytorch3d==0.6.0 -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu111_pyt190/download.html
+
+# # open3d
+# RUN apt-get install ffmpeg libsm6 libxext6 libc++-dev sudo git -y
+# RUN apt-get install -y build-essential cmake ccache
+# RUN cd /opt && \
+#   git clone https://github.com/isl-org/Open3D.git && \
+#   cd Open3D && \
+#   ./util/install_deps_ubuntu.sh assume-yes && \
+#   mkdir -p build && cd build && \
+#   cmake \
+#     -DBUILD_CUDA_MODULE=1 \
+#     -DBUILD_COMMON_CUDA_ARCHS=1 \
+#     -DENABLE_HEADLESS_RENDERING=1 \
+#     -DBUILD_WEBRTC=1 \
+#     -DCMAKE_BUILD_TYPE=Release \
+#     .. && \
+#   make -j $(nproc)
+
+# # RUN pip3 install open3d==0.13.0
+# # RUN \
+# #   ln -s \
+# #     /usr/local/cuda-11.1/targets/x86_64-linux/lib/libcusolver.so.11 \
+# #     $(python3 -c "import open3d as x; print(x.__path__[0])")/cuda/libcusolver.so.10
+
+# # ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/cuda-11.1/targets/x86_64-linux/lib/
+# RUN python3 -c ''
+
+# PSegs extras
+RUN \
+  --mount=type=cache,target=/var/cache/apt \
+  apt-get update && \
+  apt-get install -y \
+    ffmpeg
+
+# pycolmap and pycolmap: TODO use a wheel
+## COLMAP
+RUN \
+  --mount=type=cache,target=/var/cache/apt \
+  apt-get update && \
+  apt-get install -y \
+    git \
+    cmake \
+    ninja-build \
+    build-essential \
+    libboost-program-options-dev \
+    libboost-filesystem-dev \
+    libboost-graph-dev \
+    libboost-system-dev \
+    libboost-test-dev \
+    libeigen3-dev \
+    libflann-dev \
+    libfreeimage-dev \
+    libopencv-dev \
+    libmetis-dev \
+    libgoogle-glog-dev \
+    libgflags-dev \
+    libsqlite3-dev \
+    libglew-dev \
+    qtbase5-dev \
+    libqt5opengl5-dev \
+    libcgal-dev \
+    libceres-dev
+RUN \
+  cd /opt && \
+    (git clone --depth 1 --branch 3.8 https://github.com/colmap/colmap || true) && \
+    cd colmap && \
+    mkdir -p build && cd build && \
+    cmake .. -GNinja -DCUDA_ENABLED=OFF && \
+    ninja && \
+    ninja install && \
+    colmap -h
+RUN \
+  --mount=type=cache,target=/root/.cache/pip \
+  cd /opt && \
+  git clone --recursive https://github.com/colmap/pycolmap.git && \
+  cd pycolmap && \
+  git checkout 401f82658cdad1e8b657c77381863f9e261c7c3c && \
+  echo "Hack: build with debug symbols else segfaults on list comps python 3.10" && \
+  sed -i -e "s/cfg = 'Debug'/cfg = 'RelWithDebInfo' # /g" setup.py && \
+  pip3 install -v -e .
+
+# FIXME when upgrading base image
+RUN \
+  --mount=type=cache,target=/root/.cache/pip \
+  sudo apt remove -y python3-blinker && \
+  pip3 install "blinker>=1.7.0"
+
+COPY requirements.txt /tmp/requirements.txt
+RUN \
+  --mount=type=cache,target=/root/.cache/pip \
+  pip3 install -r /tmp/requirements.txt && \
+  rm /tmp/requirements.txt
+
+# RUN \
+#   --mount=type=cache,target=/root/.cache/pip \
+#   pip3 install \
+#     "opencv-contrib-python>=4.5.5.62"
+
+RUN mkdir -p /opt/psegs
+COPY . /opt/psegs
+WORKDIR /opt/psegs
+ENV PYTHONPATH $PYTHONPATH:/opt/psegs
diff --git a/docker/Dockerfile.pt3d b/docker/Dockerfile.pt3d
new file mode 100644
index 0000000..f85faa3
--- /dev/null
+++ b/docker/Dockerfile.pt3d
@@ -0,0 +1,114 @@
+# Copyright 2023 Maintainers of PSegs
+
+FROM nvidia/cuda:11.4.0-devel-ubuntu20.04
+
+# Oarphpy
+
+ENV PYTHONDONTWRITEBYTECODE 1
+
+
+### Core
+### Required for installing and testing things
+RUN \
+  apt-get update && \
+  apt-get install -y \
+    curl \
+    git \
+    python-dev \
+    python3-pip \
+    python3-dev \
+    wget
+
+
+### Spark (& Hadoop)
+### Use a binary distro for:
+###  * Spark LZ4 support through Hadoop
+###  * Spark env file hacking (e.g. debug / profiling)
+ENV HADOOP_VERSION 3.2.1
+ENV HADOOP_HOME /opt/hadoop
+ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
+ENV PATH $PATH:$HADOOP_HOME/bin
+ENV LD_LIBRARY_PATH "$HADOOP_HOME/lib/native/:$LD_LIBRARY_PATH"
+RUN curl -L --retry 3 \
+  "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
+  | gunzip \
+  | tar -x -C /opt/ \
+ && mv /opt/hadoop-$HADOOP_VERSION $HADOOP_HOME \
+ && rm -rf $HADOOP_HOME/share/doc
+
+ENV SPARK_VERSION 3.0.1
+ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-without-hadoop
+ENV SPARK_HOME /opt/spark
+ENV PYSPARK_PYTHON=python3
+ENV SPARK_DIST_CLASSPATH "$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*"
+ENV PATH $PATH:${SPARK_HOME}/bin
+RUN curl -L --retry 3 \
+  "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}.tgz" \
+  | gunzip \
+  | tar x -C /opt/ \
+ && mv /opt/$SPARK_PACKAGE $SPARK_HOME
+RUN cd /opt/spark/python && python3 setup.py install
+
+## Java
+RUN \
+  apt-get update && \
+  DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-11-jdk && \
+  ls -lhat /usr/lib/jvm/java-11-openjdk-amd64 && \
+  echo JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 >> /etc/environment
+ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
+
+RUN \
+  pip3 install imageio==2.4.1 && \
+  python3 -c 'import imageio; imageio.plugins.ffmpeg.download()'
+
+
+
+
+
+
+## Include banner
+COPY docker/bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
+
+COPY requirements.txt /tmp/requirements.txt
+RUN pip3 install -r /tmp/requirements.txt
+
+# Pytorch3d
+RUN pip3 install -vvv torch==1.9.0 torchvision==0.9.1 torchaudio==0.8.1 -f https://download.pytorch.org/whl/cu111/torch_stable.html
+RUN pip3 install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu111_pyt190/download.html
+
+RUN \
+  apt-get update && \
+  apt-get install -y \
+    git \
+    wget
+
+# # open3d
+# RUN apt-get install ffmpeg libsm6 libxext6 libc++-dev sudo git -y
+# RUN apt-get install -y build-essential cmake ccache
+# RUN cd /opt && \
+#   git clone https://github.com/isl-org/Open3D.git && \
+#   cd Open3D && \
+#   ./util/install_deps_ubuntu.sh assume-yes && \
+#   mkdir -p build && cd build && \
+#   cmake \
+#     -DBUILD_CUDA_MODULE=1 \
+#     -DBUILD_COMMON_CUDA_ARCHS=1 \
+#     -DENABLE_HEADLESS_RENDERING=1 \
+#     -DBUILD_WEBRTC=1 \
+#     -DCMAKE_BUILD_TYPE=Release \
+#     .. && \
+#   make -j $(nproc)
+
+# # RUN pip3 install open3d==0.13.0
+# # RUN \
+# #   ln -s \
+# #     /usr/local/cuda-11.1/targets/x86_64-linux/lib/libcusolver.so.11 \
+# #     $(python3 -c "import open3d as x; print(x.__path__[0])")/cuda/libcusolver.so.10
+
+# # ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/cuda-11.1/targets/x86_64-linux/lib/
+# RUN python3 -c ''
+
+RUN mkdir -p /opt/psegs
+COPY . /opt/psegs
+WORKDIR /opt/psegs
diff --git a/docker/bashrc b/docker/bashrc
new file mode 100644
index 0000000..c1874e6
--- /dev/null
+++ b/docker/bashrc
@@ -0,0 +1,33 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export PS1="\[\e[31m\]psegs\[\e[m\] \[\e[33m\]\w\[\e[m\] > "
+export TERM=xterm-256color
+alias grep="grep --color=auto"
+alias ls="ls --color=auto"
+
+echo -e "\e[1;36m"
+cat <<"EOF"
+      _________  ____   _____________        
+     /////// _ \/ __/__ ___ ____  ///
+    /////// ___/\ \/ -_) _ `(_-< ///
+   ///////_/  /___/\__/\_, /___////
+  /////////////////////___////////  
+EOF
+echo -e "\e[0;36m"
+PS_VERSION=$(
+  python -c "import psegs; print('v' + psegs.__version__)" 2> /dev/null \
+    || echo "[version unknown]")
+echo "  PSegs Environment $PS_VERSION"
+echo -e "\e[m"
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..7c632b5
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,74 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(0, os.path.abspath('../..'))
+import psegs
+
+# -- Project information -----------------------------------------------------
+
+project = 'psegs'
+copyright = '2020, Maintainers of PSegs'
+author = 'Maintainers of PSegs'
+
+import imp
+path = '/opt/psegs/psegs/__init__.py'
+m = imp.load_source('_', path)
+release = m.__version__
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+  'sphinx.ext.autodoc',
+  'sphinx.ext.viewcode',
+  'sphinx.ext.napoleon',
+  'sphinx.ext.mathjax',
+  'm2r',
+  'sphinx_rtd_theme',
+  'autoapi.extension',
+]
+
+autoapi_type = 'python'
+autoapi_dirs = ['/opt/psegs/psegs']
+
+pygments_style = "sphinx"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'classic'
+html_domain_indices = True
+html_show_sourcelink = False
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+autodoc_member_order = 'bysource'
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..d486578
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,22 @@
+.. avsegs documentation master file, created by
+   sphinx-quickstart on Fri Feb  7 12:23:57 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+View AVSegs `on Github <https://github.com/pwais/avsegs>`_
+
+.. mdinclude:: ../../README.md
+
+
+AVSegs API Documentation
+=========================
+
+.. toctree::
+   :maxdepth: 2
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
\ No newline at end of file
diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb
new file mode 100644
index 0000000..19d7886
--- /dev/null
+++ b/notebooks/Tutorial.ipynb
@@ -0,0 +1,694 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " * trailer: \n",
+    "     * show a histogram with examples of distance / orientation with samples over ALL datasets\n",
+    "     * show perf!  show time to fetch frames using Spark + Parquet\n",
+    "     * show a video of one camera with debug overlays.  maybe one with delauny lidar too (!)\n",
+    "     * show a frame HTML with 3d interface\n",
+    "     * show new things: argo associated bikes, delauny lidar, occlusion tree\n",
+    " * supported datasets, how to get a blurb and **stats** on each of them.  prolly render histo reports for each.\n",
+    " * data structures:\n",
+    "    * StampedDatum\n",
+    "    * Frame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys\n",
+    "sys.path.append('/opt/psegs')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from psegs.datasets import nuscenes\n",
+    "\n",
+    "nuscenes.NuscStampedDatumTableLabelsAllFrames.build()\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# from psegs.datasets import kitti\n",
+    "\n",
+    "# kitti.DSUtil.test()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "from psegs.datasets import kitti_360\n",
+    "\n",
+    "kitti_360.KITTI360SDTable.build()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "from oarphpy.spark import NBSpark\n",
+    "NBSpark.SRC_ROOT = '/opt/psegs'\n",
+    "NBSpark.SRC_ROOT_MODULES = ['psegs']\n",
+    "NBSpark.MAYBE_REBUILD_EGG_EVERY_CELL_RUN = False\n",
+    "NBSpark.CONF_KV = {\n",
+    "    'spark.driver.memory': '8g',\n",
+    "    'spark.pyspark.python': 'python3',\n",
+    "    'spark.python.worker.reuse': False,\n",
+    "    'spark.sql.files.maxPartitionBytes': int(8 * 1e6),\n",
+    "    'spark.port.maxRetries': '256',\n",
+    "  }\n",
+    "from psegs.datasets import kitti\n",
+    "\n",
+    "spark = NBSpark.getOrCreate()\n",
+    "df = kitti.KITTISDTable.as_df(spark)\n",
+    "# df = spark.read.parquet('/tmp/avs_test/test_kitti_sd_table_tracking/sd_table/')\n",
+    "df.show(5)\n",
+    "# print(df.count())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# seg = df.filter('uri.segment_id = \"kitti-tracking-train-0009\"')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# seg_uri = seg.select('uri').persist()\n",
+    "df.createOrReplaceTempView('data')\n",
+    "# seg_uri.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.sql(\"\"\"\n",
+    "    SELECT\n",
+    "      uri.segment_id AS seg, \n",
+    "      uri.topic AS topic,\n",
+    "      count(*) AS N,\n",
+    "      MAX(uri.timestamp),\n",
+    "      MIN(uri.timestamp),\n",
+    "      (MAX(uri.timestamp) - MIN(uri.timestamp)) * 1e-9 AS len\n",
+    "    FROM data\n",
+    "    GROUP BY topic, seg\n",
+    "    ORDER BY len ASC\n",
+    "\"\"\").show(1000, truncate=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from plotly.offline import init_notebook_mode, iplot\n",
+    "from plotly.graph_objs import *\n",
+    "\n",
+    "init_notebook_mode(connected=False)         # initiate notebook for offline plot\n",
+    "\n",
+    "trace0 = Scatter(\n",
+    "  x=[1, 2, 3, 4],\n",
+    "  y=[10, 15, 13, 17]\n",
+    ")\n",
+    "trace1 = Scatter(\n",
+    "  x=[1, 2, 3, 4],\n",
+    "  y=[16, 5, 11, 9]\n",
+    ")\n",
+    "\n",
+    "iplot([trace0, trace1])  \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys\n",
+    "sys.path.append('/opt/psegs')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import psegs\n",
+    "from psegs.datasets import ios_lidar\n",
+    "from psegs import datum\n",
+    "\n",
+    "from oarphpy.spark import NBSpark\n",
+    "# NBSpark.SRC_ROOT = '/opt/psegs'\n",
+    "NBSpark.SRC_ROOT_MODULES = ['psegs']\n",
+    "NBSpark.MAYBE_REBUILD_EGG_EVERY_CELL_RUN = True\n",
+    "NBSpark.CONF_KV = {\n",
+    "    'spark.driver.memory': '8g',\n",
+    "    'spark.pyspark.python': 'python3',\n",
+    "    'spark.python.worker.reuse': False,\n",
+    "    'spark.sql.files.maxPartitionBytes': int(8 * 1e6),\n",
+    "    'spark.port.maxRetries': '256',\n",
+    "  }\n",
+    "\n",
+    "\n",
+    "spark = NBSpark.getOrCreate()\n",
+    "\n",
+    "\n",
+    "suri = datum.URI.from_str(\n",
+    "      'psegs://dataset=psegs-ios-lidar-ext&split=threeDScannerApp_data&segment_id=charuco-test-fixture-lowres')\n",
+    "sd_df = ios_lidar.IOSLidarSDTable.as_df(spark, force_compute=True, only_segments=[suri])\n",
+    "\n",
+    "sd_df.createTempView('sd_df')\n",
+    "sd_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.sql(\"\"\"\n",
+    "    SELECT \n",
+    "      topic,\n",
+    "      n,\n",
+    "      (n / (1e-9 * duration_ns)) AS Hz,\n",
+    "      1e-9 * duration_ns AS duration_sec,\n",
+    "      width,\n",
+    "      height,\n",
+    "      channel_names,\n",
+    "      uncompressed_MBytes,\n",
+    "      uncompressed_MBytes / (1e-9 * duration_ns) AS uncompressed_MBps,\n",
+    "      FROM_UNIXTIME(start * 1e-9) AS start,\n",
+    "      FROM_UNIXTIME(end * 1e-9) AS end\n",
+    "\n",
+    "    FROM \n",
+    "        (\n",
+    "            SELECT\n",
+    "                FIRST(uri.topic) AS topic,\n",
+    "                MIN(uri.timestamp) AS start,\n",
+    "                MAX(uri.timestamp) AS end,\n",
+    "                MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,\n",
+    "                FIRST(camera_image.width) AS width,\n",
+    "                FIRST(camera_image.height) AS height,\n",
+    "                FIRST(camera_image.channel_names) AS channel_names,\n",
+    "                1e-6 * FIRST(camera_image.width * camera_image.height * 3) AS uncompressed_MBytes,\n",
+    "                COUNT(*) AS n\n",
+    "            FROM sd_df\n",
+    "            WHERE camera_image IS NOT NULL\n",
+    "            GROUP BY uri.topic\n",
+    "        )\n",
+    "    ORDER BY topic\n",
+    "\"\"\").toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.sql(\"\"\"\n",
+    "    SELECT \n",
+    "      topic,\n",
+    "      n,\n",
+    "      (n / (1e-9 * duration_ns)) AS Hz,\n",
+    "      1e-9 * duration_ns AS duration_sec,\n",
+    "      cloud_colnames,\n",
+    "      FROM_UNIXTIME(start * 1e-9) AS start,\n",
+    "      FROM_UNIXTIME(end * 1e-9) AS end\n",
+    "\n",
+    "    FROM \n",
+    "        (\n",
+    "            SELECT\n",
+    "                FIRST(uri.topic) AS topic,\n",
+    "                MIN(uri.timestamp) AS start,\n",
+    "                MAX(uri.timestamp) AS end,\n",
+    "                MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,\n",
+    "                FIRST(point_cloud.cloud_colnames) AS cloud_colnames,\n",
+    "                COUNT(*) AS n\n",
+    "            FROM sd_df\n",
+    "            WHERE point_cloud IS NOT NULL\n",
+    "            GROUP BY uri.topic\n",
+    "        )\n",
+    "    ORDER BY topic\n",
+    "\"\"\").toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.sql(\"\"\"\n",
+    "    SELECT \n",
+    "      topic,\n",
+    "      n,\n",
+    "      (n / (1e-9 * duration_ns)) AS Hz,\n",
+    "      1e-9 * duration_ns AS duration_sec,\n",
+    "      xform,\n",
+    "      FROM_UNIXTIME(start * 1e-9) AS start,\n",
+    "      FROM_UNIXTIME(end * 1e-9) AS end\n",
+    "\n",
+    "    FROM \n",
+    "        (\n",
+    "            SELECT\n",
+    "                FIRST(uri.topic) AS topic,\n",
+    "                MIN(uri.timestamp) AS start,\n",
+    "                MAX(uri.timestamp) AS end,\n",
+    "                MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,\n",
+    "                FIRST(CONCAT(transform.src_frame, '->', transform.dest_frame)) AS xform,\n",
+    "                COUNT(*) AS n\n",
+    "            FROM sd_df\n",
+    "            WHERE transform IS NOT NULL\n",
+    "            GROUP BY uri.topic\n",
+    "        )\n",
+    "    ORDER BY topic\n",
+    "\"\"\").toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.sql(\"\"\"\n",
+    "    SELECT \n",
+    "      topic,\n",
+    "      n,\n",
+    "      (n / (1e-9 * duration_ns)) AS Hz,\n",
+    "      1e-9 * duration_ns AS duration_sec,\n",
+    "      FROM_UNIXTIME(start * 1e-9) AS start,\n",
+    "      FROM_UNIXTIME(end * 1e-9) AS end\n",
+    "\n",
+    "    FROM \n",
+    "        (\n",
+    "            SELECT\n",
+    "                FIRST(uri.topic) AS topic,\n",
+    "                MIN(uri.timestamp) AS start,\n",
+    "                MAX(uri.timestamp) AS end,\n",
+    "                MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,\n",
+    "                COUNT(*) AS n\n",
+    "            FROM sd_df\n",
+    "            WHERE SIZE(cuboids) > 0\n",
+    "            GROUP BY uri.topic\n",
+    "        )\n",
+    "    ORDER BY topic\n",
+    "\"\"\").toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "import psegs\n",
+    "from psegs.datasets import ios_lidar\n",
+    "from psegs import datum\n",
+    "\n",
+    "from oarphpy.spark import NBSpark\n",
+    "# NBSpark.SRC_ROOT = '/opt/psegs'\n",
+    "NBSpark.SRC_ROOT_MODULES = ['psegs']\n",
+    "NBSpark.MAYBE_REBUILD_EGG_EVERY_CELL_RUN = True\n",
+    "NBSpark.CONF_KV = {\n",
+    "    'spark.driver.memory': '8g',\n",
+    "    'spark.pyspark.python': 'python3',\n",
+    "    'spark.python.worker.reuse': False,\n",
+    "    'spark.sql.files.maxPartitionBytes': int(8 * 1e6),\n",
+    "    'spark.port.maxRetries': '256',\n",
+    "  }\n",
+    "\n",
+    "\n",
+    "spark = NBSpark.getOrCreate()\n",
+    "\n",
+    "\n",
+    "suri = datum.URI.from_str(\n",
+    "      'psegs://dataset=psegs-ios-lidar-ext&split=threeDScannerApp_data&segment_id=charuco-test-fixture-lowres')\n",
+    "sd_df = ios_lidar.IOSLidarSDTable.as_df(spark, force_compute=True, only_segments=[suri])\n",
+    "\n",
+    "sd_df.createOrReplaceTempView('sd_df')\n",
+    "# sd_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "def get_depth_90th(row):\n",
+    "    ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "    depth = ci.get_depth()\n",
+    "    if depth is None:\n",
+    "        return 0\n",
+    "    else:\n",
+    "        import numpy as np\n",
+    "        return np.percentile(depth, 0.9)\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "sample_sd_df = spark.sql(\"\"\"\n",
+    "                        SELECT \n",
+    "                          *\n",
+    "                        FROM sd_df\n",
+    "                        WHERE uri.topic == 'camera|front'\n",
+    "                        ORDER BY RAND(1337)\n",
+    "                        LIMIT 50\n",
+    "                    \"\"\")\n",
+    "\n",
+    "depth_top_90th = sample_sd_df.rdd.map(get_depth_90th).max()\n",
+    "\n",
+    "\n",
+    "if depth_top_90th <= 0.1:\n",
+    "    period_meters = 0.005\n",
+    "elif depth_top_90th <= 1.0:\n",
+    "    period_meters = 0.05\n",
+    "elif depth_top_90th <= 10.0:\n",
+    "    period_meters = 0.5\n",
+    "else:\n",
+    "    period_meters = 10.\n",
+    "\n",
+    "print('period_meters', period_meters)\n",
+    "    \n",
+    "def to_t_debug_image(row):\n",
+    "    import cv2\n",
+    "    ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "    image = ci.get_debug_image(period_meters=period_meters)\n",
+    "#     if len(image.shape) != 3:\n",
+    "#         import numpy\n",
+    "#         image = np.tile([image, image, image], axis=2)\n",
+    "#     assert False, image.shape\n",
+    "    aspect = float(ci.width) / float(ci.height)\n",
+    "    target_height = 400\n",
+    "    target_width = int(aspect * target_height)\n",
+    "    \n",
+    "    # Pad the width a little to make ffmpeg most efficient\n",
+    "    if target_width % 16 != 0:\n",
+    "        target_width += 16 - (target_width % 16)\n",
+    "    \n",
+    "    image = cv2.resize(image, (target_width, target_height))\n",
+    "\n",
+    "    return row.uri.timestamp, image\n",
+    "\n",
+    "\n",
+    "\n",
+    "iter_t_image = sample_sd_df.rdd.map(to_t_debug_image).collect()\n",
+    "images = [image for t, image in sorted(iter_t_image, key=lambda ti: ti[0])]\n",
+    "\n",
+    "im_min = min(i.min() for i in images)\n",
+    "print(im_min)\n",
+    "im_max = min(i.max() for i in images)\n",
+    "print(im_max)\n",
+    "images = [(255 * (i.astype('float') - im_min) / (im_max - im_min)).astype('uint8') for i in images]\n",
+    "\n",
+    "from psegs.util.plotting import images_to_html_video\n",
+    "\n",
+    "html = images_to_html_video(images, fps=4)\n",
+    "print('html size', 1e-6 * len(html))\n",
+    "\n",
+    "def show_html(s):\n",
+    "    from IPython.core.display import display, HTML\n",
+    "    display(HTML(s), metadata=dict(isolated=True))\n",
+    "\n",
+    "show_html(html)\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from plotly.offline import init_notebook_mode, iplot\n",
+    "from plotly.graph_objs import *\n",
+    "\n",
+    "init_notebook_mode(connected=False)         # initiate notebook for offline plot\n",
+    "\n",
+    "\n",
+    "import plotly\n",
+    "import plotly.graph_objects as go\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "sample_sd_df = spark.sql(\"\"\"\n",
+    "                        SELECT \n",
+    "                          *\n",
+    "                        FROM sd_df\n",
+    "                        WHERE uri.topic == 'camera|front|depth'\n",
+    "                        ORDER BY RAND(1337)\n",
+    "                        LIMIT 50\n",
+    "                    \"\"\")\n",
+    "\n",
+    "def get_cloud_world(row):\n",
+    "    ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "    pc = ci.depth_image_to_point_cloud()\n",
+    "    cloud = pc.get_cloud()\n",
+    "    cloud = cloud[:, :3]\n",
+    "    T_world_from_ego = ci.ego_pose['ego', 'world']\n",
+    "    cloud_world = T_world_from_ego.apply(cloud).T\n",
+    "    return cloud_world\n",
+    "\n",
+    "cloud_worlds = sample_sd_df.rdd.map(get_cloud_world).collect()\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "cloud_df = None\n",
+    "for i, cloud in enumerate(cloud_worlds):\n",
+    "    color = int(255 * (float(i) / len(cloud_worlds)))\n",
+    "    \n",
+    "    to_sample = 1000\n",
+    "    if cloud.shape[0] > to_sample:\n",
+    "        idx = np.random.choice(np.arange(cloud.shape[0]), to_sample)\n",
+    "        cloud = cloud[idx, :]\n",
+    "    \n",
+    "    cur_df = pd.DataFrame(cloud, columns=['x', 'y', 'z'])\n",
+    "    cur_df['color'] = 'rgb(%s, %s, %s)' % (color, color, color)\n",
+    "    if cloud_df is None:\n",
+    "        cloud_df = cur_df\n",
+    "    else:\n",
+    "        cloud_df = pd.concat([cloud_df, cur_df])\n",
+    "    \n",
+    "plots = []\n",
+    "\n",
+    "\n",
+    "# cloud_df = pd.DataFrame(cloud_world, columns=['x', 'y', 'z'])\n",
+    "# from psegs.util.plotting import rgb_for_distance\n",
+    "# cloud_df['color'] = [\n",
+    "#   (128, 128, 128)#rgb_for_distance(np.linalg.norm(pt), period_meters=period_meters)\n",
+    "#   for pt in cloud_df[['x', 'y', 'z']].values\n",
+    "# ]\n",
+    "scatter = go.Scatter3d(\n",
+    "                x=cloud_df['x'], y=cloud_df['y'], z=cloud_df['z'],\n",
+    "                mode='markers',\n",
+    "                marker=dict(size=2, color=cloud_df['color'], opacity=0.5),)\n",
+    "\n",
+    "plots.append(scatter)\n",
+    "\n",
+    "\n",
+    "fig = go.Figure(data=plots)\n",
+    "\n",
+    "fig.update_layout(\n",
+    "  width=1000, height=700,\n",
+    "  scene_aspectmode='data')\n",
+    "  # scene_camera=dict(\n",
+    "  #   up=dict(x=0, y=0, z=1),\n",
+    "  #   eye=dict(x=0, y=0, z=0),\n",
+    "  #   center=dict(x=1, y=0, z=0),\n",
+    "  # ))\n",
+    "    \n",
+    "iplot(fig)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_sd_df = spark.sql(\"\"\"\n",
+    "                        SELECT \n",
+    "                          *\n",
+    "                        FROM sd_df\n",
+    "                        WHERE uri.topic == 'camera|front'\n",
+    "                        ORDER BY RAND(1337)\n",
+    "                        LIMIT 50\n",
+    "                    \"\"\")\n",
+    "\n",
+    "def get_t_ci(row):\n",
+    "    ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "    return row.uri.timestamp, ci\n",
+    "\n",
+    "t_cis = sample_sd_df.rdd.map(get_t_ci).collect()\n",
+    "cis = [ci for t, ci in sorted(t_cis, key=lambda tc: tc[0])]\n",
+    "\n",
+    "plots += [ci.to_plotly_world_frame_3d() for ci in cis]\n",
+    "\n",
+    "\n",
+    "fig = go.Figure(data=plots)\n",
+    "\n",
+    "fig.update_layout(\n",
+    "  width=1000, height=700,\n",
+    "  scene_aspectmode='data')\n",
+    "  # scene_camera=dict(\n",
+    "  #   up=dict(x=0, y=0, z=1),\n",
+    "  #   eye=dict(x=0, y=0, z=0),\n",
+    "  #   center=dict(x=1, y=0, z=0),\n",
+    "  # ))\n",
+    "    \n",
+    "iplot(fig)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# def get_xzy_90ths(row):\n",
+    "#     ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "#     pc = ci.depth_image_to_point_cloud()\n",
+    "#     cloud = pc.get_cloud()\n",
+    "    \n",
+    "#     import numpy as np\n",
+    "#     return np.percentile(depth, 0.9)\n",
+    "    \n",
+    "\n",
+    "# def to_t_pc_rv_debug_image(row):\n",
+    "#     ci = ios_lidar.IOSLidarSDTable.from_row(row.camera_image)\n",
+    "#     pc = ci.depth_image_to_point_cloud()\n",
+    "#     debug = pc.get_front_rv_debug_image()\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/cheap_optical_flow_eval.ipynb b/notebooks/cheap_optical_flow_eval.ipynb
new file mode 100644
index 0000000..27f92dd
--- /dev/null
+++ b/notebooks/cheap_optical_flow_eval.ipynb
@@ -0,0 +1,4489 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cheap Optical Flow: Is it Good? Does it Boost?\n",
+    "\n",
+    "\n",
+    "## Quickstart\n",
+    "\n",
+    "## Credits\n",
+    "\n",
+    "Some portions of this notebook adapted from:\n",
+    " * [Middlebury Flow code by Johannes Oswald](https://github.com/Johswald/flow-code-python/blob/master/readFlowFile.py)\n",
+    " * [DeepDeform Demo Code](https://github.com/AljazBozic/DeepDeform)\n",
+    " * [OpticalFlowToolkit by RUOTENG LI](https://github.com/liruoteng/OpticalFlowToolkit)\n",
+    " * [OpenCV Samples](https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# parameters\n",
+    "SHOW_DEMO_OUTPUT = False\n",
+    "DEMO_FPS = []\n",
+    "\n",
+    "RUN_FULL_ANALYSIS = False\n",
+    "ALL_FP_FACTORY_CLSS = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: pypng in /usr/local/lib/python3.8/dist-packages (0.0.20)\n",
+      "Requirement already satisfied: scikit-image in /usr/lib/python3/dist-packages (0.16.2)\n",
+      "\u001b[33mWARNING: You are using pip version 21.0.1; however, version 21.1 is available.\n",
+      "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
+      "fixme installs\n",
+      "\n",
+      "\n",
+      "Putting analysis lib in /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis\n",
+      "running clean\n",
+      "running bdist_egg\n",
+      "running egg_info\n",
+      "writing psegs.egg-info/PKG-INFO\n",
+      "writing dependency_links to psegs.egg-info/dependency_links.txt\n",
+      "writing top-level names to psegs.egg-info/top_level.txt\n",
+      "reading manifest file 'psegs.egg-info/SOURCES.txt'\n",
+      "writing manifest file 'psegs.egg-info/SOURCES.txt'\n",
+      "installing library code to build/bdist.linux-x86_64/egg\n",
+      "running install_lib\n",
+      "running build_py\n",
+      "warning: build_py: byte-compiling is disabled, skipping.\n",
+      "\n",
+      "creating build/bdist.linux-x86_64/egg\n",
+      "creating build/bdist.linux-x86_64/egg/psegs\n",
+      "copying build/lib/psegs/dummyrun.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "creating build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "copying build/lib/psegs/datasets/kitti.py -> build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "copying build/lib/psegs/datasets/idsutil.py -> build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "copying build/lib/psegs/datasets/kitti_360.py -> build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "copying build/lib/psegs/datasets/__init__.py -> build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "copying build/lib/psegs/datasets/nuscenes.py -> build/bdist.linux-x86_64/egg/psegs/datasets\n",
+      "creating build/bdist.linux-x86_64/egg/psegs/util\n",
+      "copying build/lib/psegs/util/__init__.py -> build/bdist.linux-x86_64/egg/psegs/util\n",
+      "copying build/lib/psegs/util/misc.py -> build/bdist.linux-x86_64/egg/psegs/util\n",
+      "copying build/lib/psegs/util/plotting.py -> build/bdist.linux-x86_64/egg/psegs/util\n",
+      "creating build/bdist.linux-x86_64/egg/psegs/table\n",
+      "copying build/lib/psegs/table/sd_db.py -> build/bdist.linux-x86_64/egg/psegs/table\n",
+      "copying build/lib/psegs/table/sd_table.py -> build/bdist.linux-x86_64/egg/psegs/table\n",
+      "copying build/lib/psegs/table/__init__.py -> build/bdist.linux-x86_64/egg/psegs/table\n",
+      "copying build/lib/psegs/browser.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "creating build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/uri.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/bbox2d.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/datumutils.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/transform.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/stamped_datum.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/cuboid.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/frame.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/point_cloud.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/camera_image.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/datum/__init__.py -> build/bdist.linux-x86_64/egg/psegs/datum\n",
+      "copying build/lib/psegs/spark.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "copying build/lib/psegs/conf.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "copying build/lib/psegs/dsutil.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "copying build/lib/psegs/__init__.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "creating build/bdist.linux-x86_64/egg/psegs/exp\n",
+      "copying build/lib/psegs/exp/semantic_kitti.py -> build/bdist.linux-x86_64/egg/psegs/exp\n",
+      "copying build/lib/psegs/exp/fused_lidar_flow.py -> build/bdist.linux-x86_64/egg/psegs/exp\n",
+      "copying build/lib/psegs/exp/__init__.py -> build/bdist.linux-x86_64/egg/psegs/exp\n",
+      "copying build/lib/psegs/ros.py -> build/bdist.linux-x86_64/egg/psegs\n",
+      "warning: install_lib: byte-compiling is disabled, skipping.\n",
+      "\n",
+      "creating build/bdist.linux-x86_64/egg/EGG-INFO\n",
+      "copying psegs.egg-info/PKG-INFO -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
+      "copying psegs.egg-info/SOURCES.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
+      "copying psegs.egg-info/dependency_links.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
+      "copying psegs.egg-info/top_level.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
+      "zip_safe flag not set; analyzing archive contents...\n",
+      "creating 'dist/psegs-0.0.1-py3.8.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n",
+      "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:11:44,327\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:44,327\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:44,371\toarph 1807391 : Generating egg to /tmp/tmprsc_pbn6_oarphpy_eggbuild ...\n",
+      "2021-04-28 21:11:44,383\toarph 1807391 : ... done.  Egg at /tmp/tmprsc_pbn6_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Setup\n",
+    "\n",
+    "!pip3 install pypng scikit-image\n",
+    "print('fixme installs')\n",
+    "print()\n",
+    "print()\n",
+    "\n",
+    "import copy\n",
+    "import imageio\n",
+    "import IPython.display\n",
+    "import math\n",
+    "import os\n",
+    "import PIL.Image\n",
+    "import six\n",
+    "import sys\n",
+    "import tempfile\n",
+    "\n",
+    "\n",
+    "## General Notebook Utilities\n",
+    "    \n",
+    "def imshow(x):\n",
+    "    IPython.display.display(PIL.Image.fromarray(x))\n",
+    "\n",
+    "def show_html(x):\n",
+    "    from IPython.core.display import display, HTML\n",
+    "    display(HTML(x))\n",
+    "\n",
+    "    \n",
+    "PLOTLY_INIT_HTML = \"\"\"\n",
+    "    <script src=\"https://cdn.plot.ly/plotly-latest.min.js\"></script>\n",
+    "    <script src='https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js'></script>\n",
+    "    <script>requirejs.config({\n",
+    "        paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});\n",
+    "        if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>\n",
+    "    \"\"\"\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    show_html(PLOTLY_INIT_HTML)\n",
+    "\n",
+    "## Create a random temporary directory for analysis library (for Spark-enabled full analysis mode)\n",
+    "old_cwd = os.getcwd()\n",
+    "tempdir = tempfile.TemporaryDirectory(suffix='_cheap_optical_flow_eval_analysis')\n",
+    "ALIB_SRC_DIR = tempdir.name\n",
+    "print(\"Putting analysis lib in %s\" % ALIB_SRC_DIR)\n",
+    "os.chdir(ALIB_SRC_DIR)\n",
+    "!mkdir -p cheap_optical_flow_eval_analysis\n",
+    "!touch cheap_optical_flow_eval_analysis/__init__.py\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "sys.path.append(ALIB_SRC_DIR)\n",
+    "\n",
+    "\n",
+    "## Prepare a build of local psegs for inclusion\n",
+    "!cd /opt/psegs && python3 setup.py clean bdist_egg\n",
+    "PSEGS_EGG_PATH = '/opt/psegs/dist/psegs-0.0.1-py3.8.egg'\n",
+    "assert os.path.exists(PSEGS_EGG_PATH), \"Build failed?\"\n",
+    "sys.path.append('/opt/psegs')\n",
+    "import psegs\n",
+    "\n",
+    "\n",
+    "## Prepare Spark session with local PSegs and local Analysis Lib\n",
+    "from psegs.spark import NBSpark\n",
+    "NBSpark.SRC_ROOT = os.path.join(ALIB_SRC_DIR, 'cheap_optical_flow_eval_analysis')\n",
+    "NBSpark.CONF_KV.update({\n",
+    "    'spark.driver.maxResultSize': '2g',\n",
+    "    'spark.driver.memory': '16g',\n",
+    "    'spark.submit.pyFiles': PSEGS_EGG_PATH,\n",
+    "  })\n",
+    "spark = NBSpark.getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing cheap_optical_flow_eval_analysis/ofp.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile cheap_optical_flow_eval_analysis/ofp.py\n",
+    "\n",
+    "## Data Model & Utility Code\n",
+    "\n",
+    "import attr\n",
+    "import cv2\n",
+    "import imageio\n",
+    "import math\n",
+    "import os\n",
+    "import PIL.Image\n",
+    "import six\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from psegs import datum\n",
+    "\n",
+    "from oarphpy import plotting as op_plt\n",
+    "from oarphpy.spark import CloudpickeledCallable\n",
+    "img_to_data_uri = lambda x: op_plt.img_to_data_uri(x, format='png')\n",
+    "\n",
+    "@attr.s(slots=True, eq=False, weakref_slot=False)\n",
+    "class OpticalFlowPair(object):\n",
+    "    \"\"\"A flyweight for a pair of images with an optical flow field.\n",
+    "    Supports lazy-loading of large data attributes.\"\"\"\n",
+    "    \n",
+    "    ## Core Attributes (Required for All Datasets)\n",
+    "    \n",
+    "    dataset = attr.ib(type=str, default='')\n",
+    "    \"\"\"(Display name) To which dataset does this pair belong?\"\"\"\n",
+    "    \n",
+    "    id1 = attr.ib(type=str, default='')\n",
+    "    \"\"\"Identifier or URI for the first image\"\"\"\n",
+    "    \n",
+    "    id2 = attr.ib(type=str, default='')\n",
+    "    \"\"\"Identifier or URI for the second image\"\"\"\n",
+    "    \n",
+    "    img1 = attr.ib(default=None)\n",
+    "    \"\"\"URI or numpy array or CloudPickleCallable for the first image (source image)\"\"\"\n",
+    "\n",
+    "    img2 = attr.ib(default=None)\n",
+    "    \"\"\"URI or numpy array or CloudpickeledCallable for the second image (target image)\"\"\"\n",
+    "    \n",
+    "    flow = attr.ib(default=None)\n",
+    "    \"\"\"A numpy array or callable or CloudpickeledCallable representing optical flow from img1 -> img2\"\"\"\n",
+    "    \n",
+    "    uri = attr.ib(type=datum.URI, default=None, converter=datum.URI.from_str)\n",
+    "    \"\"\"A URI addressing this pair; to make dynamic construction of the pair easier\"\"\"\n",
+    "    \n",
+    "    \n",
+    "    ## Optional Attributes (For Select Datasets)\n",
+    "    \n",
+    "    diff_time_sec = attr.ib(type=float, default=-1.0)\n",
+    "    \"\"\"Difference in time (in seconds) between the views / poses depicted in `img1` and `img2`.\"\"\"\n",
+    "    \n",
+    "    translation_meters = attr.ib(type=float, default=-1.0)\n",
+    "    \"\"\"Difference in ego translation (in meters) between the views / poses depicted in `img1` and `img2`.\"\"\"\n",
+    "    \n",
+    "    uvdviz_im1 = attr.ib(default=None)\n",
+    "    \"\"\"An nx4 numpy array representing UVD-visible points for `img1`\"\"\"\n",
+    "    \n",
+    "    uvdviz_im2 = attr.ib(default=None)\n",
+    "    \"\"\"An nx4 numpy array representing UVD-visible points for `img2`\"\"\"\n",
+    "    \n",
+    "    K = attr.ib(default=None)\n",
+    "    \"\"\"A 3x3 numpy array representing the camera matrix K for both views\"\"\"\n",
+    "    \n",
+    "    # to add:\n",
+    "    # semantic image for frame 1, frame 2 [could be painted by cuboids]\n",
+    "    # instance images for frame 1, frame 2 [could be painted by cuboids]\n",
+    "    #   -- for colored images, at first just pivot all oflow metrics by colors\n",
+    "    # get uvd1 uvd2 (lidar for nearest neighbor stuff)\n",
+    "    # depth image for frame 1, frame 2 [could be interpolated by cuboids]\n",
+    "    #   -- at first bucket the depth coarsely and pivot al oflow by colors\n",
+    "    \n",
+    "    def get_img1(self):\n",
+    "        if isinstance(self.img1, CloudpickeledCallable):\n",
+    "            self.img1 = self.img1()\n",
+    "        if isinstance(self.img1, six.string_types):\n",
+    "            self.img1 = imageio.imread(self.img1)\n",
+    "        return self.img1\n",
+    "    \n",
+    "    def get_img2(self):\n",
+    "        if isinstance(self.img2, CloudpickeledCallable):\n",
+    "            self.img2 = self.img2()\n",
+    "        if isinstance(self.img2, six.string_types):\n",
+    "            self.img2 = imageio.imread(self.img2)\n",
+    "        return self.img2\n",
+    "    \n",
+    "    def get_flow(self):\n",
+    "        if not isinstance(self.flow, (np.ndarray, np.generic)):\n",
+    "            self.flow = self.flow()\n",
+    "        return self.flow\n",
+    "    \n",
+    "    def has_scene_flow(self):\n",
+    "        return (\n",
+    "            self.uvdviz_im1 is not None and \n",
+    "            self.uvdviz_im1.shape[0] > 0 and\n",
+    "            self.uvdviz_im2 is not None and \n",
+    "            self.uvdviz_im2.shape[0] > 0 and\n",
+    "            self.K is not None)\n",
+    "    \n",
+    "    def get_sf_viz_html(self):\n",
+    "        uvd1 = self.uvdviz_im1[:, :3]\n",
+    "        uvd2 = self.uvdviz_im2[:, :3]\n",
+    "#         visible_either = ((uvd1[:, -1] == 1) | (uvd2[:, -1] == 1))\n",
+    "#         uvd1 = uvd1[visible_either, :3]\n",
+    "#         uvd2 = uvd2[visible_either, :3]\n",
+    "        \n",
+    "        xyzrgb1 = uvd_to_xyzrgb(uvd1, self.K, imgs=[self.get_img1()])\n",
+    "        xyzrgb2 = uvd_to_xyzrgb(uvd2, self.K, imgs=[self.get_img2()])\n",
+    "        html1 = create_xyzrgb_3d_plot_html(xyzrgb1)\n",
+    "        html2 = create_xyzrgb_3d_plot_html(xyzrgb2)\n",
+    "        html_sf = create_xyzrgb_3d_sf_plot_html(xyzrgb1, xyzrgb2)\n",
+    "        \n",
+    "        html = \"View 1:<br />%s<br /><br />View 2:<br />%s<br /><br />Flow:<br />%s\" % (html1, html2, html_sf)\n",
+    "        return html\n",
+    "    \n",
+    "    def to_html(self):\n",
+    "        im1 = self.get_img1()\n",
+    "        im2 = self.get_img2()\n",
+    "        flow = self.get_flow()\n",
+    "        fviz = draw_flow(im1, flow)\n",
+    "        \n",
+    "        sf_html = ''\n",
+    "        if self.has_scene_flow():\n",
+    "            sf_html = \"\"\"\n",
+    "                <tr><td style=\"text-align:left\"><b>Scene Flow</b></td></tr>\n",
+    "                <tr><td>{viz_html}</td></tr>\n",
+    "            \"\"\".format(viz_html=self.get_sf_viz_html())\n",
+    "        \n",
+    "        html = \"\"\"\n",
+    "            \n",
+    "            <table>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Dataset:</b> {dataset}</td></tr>\n",
+    "            <tr><td style=\"text-align:left\"><b>URI:</b> {uri}</td></tr>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Diff (seconds, optional):</b> {diff_time_sec}</td></tr>\n",
+    "            <tr><td style=\"text-align:left\"><b>Translation (meters, optional):</b> {translation_meters}</td></tr>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Source Image:</b> {id1}</td></tr>\n",
+    "            <tr><td><img src=\"{im1}\" /></td></tr>\n",
+    "\n",
+    "            <tr><td style=\"text-align:left\"><b>Target Image:</b> {id2}</td></tr>\n",
+    "            <tr><td><img src=\"{im2}\" /></td></tr>\n",
+    "\n",
+    "            <tr><td style=\"text-align:left\"><b>Flow</b></td></tr>\n",
+    "            <tr><td><img src=\"{fviz}\" /></td></tr>\n",
+    "            \n",
+    "            {sf_html}\n",
+    "            </table>\n",
+    "        \"\"\".format(\n",
+    "                dataset=self.dataset,\n",
+    "                uri=str(self.uri),\n",
+    "                diff_time_sec=self.diff_time_sec,\n",
+    "                translation_meters=self.translation_meters,\n",
+    "                id1=self.id1, id2=self.id2,\n",
+    "                im1=img_to_data_uri(im1), im2=img_to_data_uri(im2),\n",
+    "                fviz=img_to_data_uri(fviz),\n",
+    "                sf_html=sf_html)\n",
+    "        return html\n",
+    "\n",
+    "def draw_flow(img, flow, step=8):\n",
+    "    \"\"\"Based upon OpenCV sample: https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py\"\"\"\n",
+    "    h, w = img.shape[:2]\n",
+    "    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)\n",
+    "    fx, fy = flow[y,x].T\n",
+    "    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)\n",
+    "    lines = np.int32(lines + 0.5)\n",
+    "    vis = img.copy()\n",
+    "    cv2.polylines(vis, lines, 0, (0, 255, 0))\n",
+    "    for (x1, y1), (_x2, _y2) in lines:\n",
+    "        cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)\n",
+    "    return vis\n",
+    "\n",
+    "def uvd_to_xyzrgb(uvd, K, imgs=None):\n",
+    "    import numpy as np\n",
+    "    from psegs import datum\n",
+    "    \n",
+    "    fx = K[0, 0]\n",
+    "    cx = K[0, 2]\n",
+    "    fy = K[1, 1]\n",
+    "    cy = K[1, 2]\n",
+    "    \n",
+    "    rays = np.zeros((uvd.shape[0], 3))\n",
+    "    rays[:, 0] = (uvd[:, 0] - cx) / fx\n",
+    "    rays[:, 1] = (uvd[:, 1] - cy) / fy\n",
+    "    rays[:, 2] = 1.\n",
+    "    rays /= np.linalg.norm(rays, axis=-1)[:, np.newaxis]\n",
+    "    xyz = uvd[:, 2][:, np.newaxis] * rays\n",
+    "    \n",
+    "    from psegs import datum\n",
+    "    pc = datum.PointCloud(cloud=xyz)\n",
+    "    cis = [datum.CameraImage(image_factory=lambda: img, K=K) for img in (imgs or [])]\n",
+    "    xyzrgb = datum.PointCloud.paint_ego_cloud(xyz, camera_images=cis)\n",
+    "    return xyzrgb\n",
+    "\n",
+    "def create_xyzrgb_3d_plot_html(xyzrgb, max_points=10000):\n",
+    "    import plotly\n",
+    "    import plotly.graph_objects as go\n",
+    "    import pandas as pd\n",
+    "\n",
+    "    pcloud_df = pd.DataFrame(xyzrgb, columns=['x', 'y', 'z', 'r', 'g', 'b'])\n",
+    "    pcloud_df = pcloud_df.sample(n=min(xyzrgb.shape[0], max_points))\n",
+    "    scatter = go.Scatter3d(\n",
+    "                x=pcloud_df['x'], y=pcloud_df['y'], z=pcloud_df['z'],\n",
+    "                mode='markers',\n",
+    "                marker=dict(size=3, color=pcloud_df[['r', 'g', 'b']], opacity=0.9))\n",
+    "    fig = go.Figure(data=[scatter])\n",
+    "    fig.update_layout(\n",
+    "            width=900, height=600,\n",
+    "            scene_camera=dict(\n",
+    "                up=dict(x=0, y=-1, z=0),\n",
+    "                center=dict(x=0, y=0, z=0),\n",
+    "                eye=dict(x=1.25, y=-1.25, z=-1.25)\n",
+    "            ),\n",
+    "            scene_aspectmode='data')\n",
+    "    \n",
+    "#     trace0 = go.Scatter(\n",
+    "#       x=[1, 2, 3, 4],\n",
+    "#       y=[10, 15, 13, 17]\n",
+    "#     )\n",
+    "#     fig = go.Figure(data=[trace0])\n",
+    "    \n",
+    "    center = xyzrgb[:, :3].mean(axis=0)\n",
+    "    footer = \"<i>Showing %s of %s points with mean (%s, %s, %s)</i>\" % (\n",
+    "                    len(pcloud_df), xyzrgb.shape[0], center[0], center[1], center[2])\n",
+    "    \n",
+    "    return fig.to_html(include_plotlyjs=False, full_html=False) + '<br/><br/>' + footer\n",
+    "#     fig_html = plotly.offline.plot(fig, include_plotlyjs=True, output_type='file', filename='/tmp/yay.html')\n",
+    "    \n",
+    "def create_xyzrgb_3d_sf_plot_html(xyzrgb1, xyzrgb2, max_points=5000):\n",
+    "    import plotly\n",
+    "    import plotly.graph_objects as go\n",
+    "    import pandas as pd\n",
+    "    \n",
+    "    xyzrgbuvw = np.zeros((xyzrgb1.shape[0], 9))\n",
+    "    xyzrgbuvw[:, :6] = xyzrgb1\n",
+    "    xyzrgbuvw[:, 6:] = xyzrgb2[:, :3] - xyzrgb1[:, :3]\n",
+    "    \n",
+    "#     # Change the colors to make it easier to distinguish source from target\n",
+    "#     xyzrgb1 = xyzrgb1.copy()\n",
+    "#     xyzrgb1[:, (3, 4, 5)] *= 1.5\n",
+    "#     xyzrgb2 = xyzrgb2.copy()\n",
+    "#     xyzrgb2[:, (3, 4, 5)] *= 0.5\n",
+    "    \n",
+    "    pcloud_df = pd.DataFrame(xyzrgbuvw, columns=['x', 'y', 'z', 'r', 'g', 'b', 'u', 'v', 'w'])\n",
+    "    pcloud_df = pcloud_df.sample(n=min(xyzrgbuvw.shape[0], max_points))\n",
+    "    cones = go.Cone(\n",
+    "                x=pcloud_df['x'], y=pcloud_df['y'], z=pcloud_df['z'],\n",
+    "                u=pcloud_df['u'], v=pcloud_df['v'], w=pcloud_df['w'],\n",
+    "                sizemode=\"scaled\",\n",
+    "                sizeref=2,\n",
+    "                colorscale='Blues')\n",
+    "#     ,\n",
+    "#                 marker=dict(size=3, color=pcloud_df[['r', 'g', 'b']], opacity=0.9))\n",
+    "    \n",
+    "    fig = go.Figure(data=[cones])\n",
+    "    fig.update_layout(\n",
+    "            width=900, height=600,\n",
+    "            scene_camera=dict(\n",
+    "                up=dict(x=0, y=-1, z=0),\n",
+    "                center=dict(x=0, y=0, z=0),\n",
+    "                eye=dict(x=1.25, y=-1.25, z=-1.25)\n",
+    "            ),\n",
+    "            scene_aspectmode='data')\n",
+    "    return fig.to_html(include_plotlyjs=False, full_html=False)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "#     return \"<iframe>\" + open('/tmp/yay.html').read() + \"</iframe>\"\n",
+    "# #     return fig.to_html(full_html=False, include_plotlyjs=\"cdn\")\n",
+    "# #     html = \"\"\"\n",
+    "# #         <iframe>\n",
+    "# #         <html><head>\n",
+    "# #             <script type=\"text/javascript\">\n",
+    "# #                 if (typeof require !== 'undefined') {{\n",
+    "# #                 require.undef(\"plotly\");\n",
+    "# #                 requirejs.config({{\n",
+    "# #                     paths: {{\n",
+    "# #                         'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
+    "# #                     }}\n",
+    "# #                 }});\n",
+    "# #                 require(['plotly'], function(Plotly) {{\n",
+    "# #                     window._Plotly = Plotly;\n",
+    "# #                 }});\n",
+    "# #                 }}\n",
+    "# #         </script></head>\n",
+    "# #         <body>\"\"\" + fig_html + \"\"\"\n",
+    "# #         <div id=\"7979e646-13e6-4f44-8d32-d8effc3816df\" style=\"height: 525; width: 100%;\" class=\"plotly-graph-div\"></div><script type=\"text/javascript\">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL=\"https://plot.ly\";Plotly.newPlot(\"7979e646-13e6-4f44-8d32-d8effc3816df\", [{\"x\": [1, 2, 3], \"y\": [3, 1, 6]}], {}, {\"showLink\": false, \"linkText\": \"\"})</script>\n",
+    "# #         </body>        \n",
+    "# #         </iframe>\n",
+    "# #         \"\"\"\n",
+    "# #     return html\n",
+    "\n",
+    "class FlowPairFactoryBase(object):\n",
+    "    DATASET = ''\n",
+    "\n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        return []\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        uris = [datum.URI.from_str(u) for u in uris]\n",
+    "        uris = [u for u in uris if u.dataset == cls.DATASET]\n",
+    "        if not uris:\n",
+    "            return None\n",
+    "        return cls._get_fp_rdd_for_uris(spark, uris)\n",
+    "\n",
+    "    @classmethod\n",
+    "    def _get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        return None\n",
+    "\n",
+    "class FlowPairUnionFactory(FlowPairFactoryBase):\n",
+    "    FACTORIES = []\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        import itertools\n",
+    "        return list(itertools.chain.from_iterable(F.list_fp_uris(spark) for F in cls.FACTORIES))\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        rdds = []\n",
+    "        for F in cls.FACTORIES:\n",
+    "            rdd = F.get_fp_rdd_for_uris(spark, uris)\n",
+    "            if rdd is not None:\n",
+    "                rdds.append(rdd)\n",
+    "        assert rdds, \"No RDDs for %s\" % uris\n",
+    "        return spark.sparkContext.union(rdds)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:11:47,935\toarph 1807391 : Source has changed! Rebuilding Egg ...\n",
+      "2021-04-28 21:11:47,936\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:47,936\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:47,938\toarph 1807391 : Generating egg to /tmp/tmpr662tmbc_oarphpy_eggbuild ...\n",
+      "2021-04-28 21:11:47,945\toarph 1807391 : ... done.  Egg at /tmp/tmpr662tmbc_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cheap_optical_flow_eval_analysis.ofp import *\n",
+    "\n",
+    "# # from plotly.offline import init_notebook_mode, iplot\n",
+    "# # from plotly.graph_objs import *\n",
+    "\n",
+    "# # # init_notebook_mode(connected=False)         # initiate notebook for offline plot\n",
+    "\n",
+    "# # trace0 = Scatter(\n",
+    "# #   x=[1, 2, 3, 4],\n",
+    "# #   y=[10, 15, 13, 17]\n",
+    "# # )\n",
+    "# # trace1 = Scatter(\n",
+    "# #   x=[1, 2, 3, 4],\n",
+    "# #   y=[16, 5, 11, 9]\n",
+    "# # )\n",
+    "\n",
+    "# # iplot([trace0, trace1])  \n",
+    "\n",
+    "# # from IPython.core.display import display, HTML\n",
+    "# # omg finally!\n",
+    "# show_html('''\n",
+    "#             <script src='https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js'></script>\n",
+    "#             <script>requirejs.config({\n",
+    "#                 paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});\n",
+    "#                 if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>\n",
+    "#                 ''')\n",
+    "\n",
+    "# # import plotly\n",
+    "# # plotly.offline.init_notebook_mode(connected=True)\n",
+    "# show_html(fp.to_html())\n",
+    "\n",
+    "# # import plotly\n",
+    "# # plotly.__version__\n",
+    "# with open('/opt/psegs/tast.html', 'w') as f:\n",
+    "#     f.write(fp.to_html())\n",
+    "# # import IPython\n",
+    "# # IPython.display.HTML(filename='/opt/psegs/tast.html')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Middlebury Optical Flow\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# TODO talk configs\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing cheap_optical_flow_eval_analysis/midd.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile cheap_optical_flow_eval_analysis/midd.py\n",
+    "\n",
+    "from psegs import datum\n",
+    "\n",
+    "from cheap_optical_flow_eval_analysis.ofp import *\n",
+    "\n",
+    "# Please unzip `other-color-allframes.zip` and `other-gt-flow.zip` to a directory and provide the target below:\n",
+    "MIDD_DATA_ROOT = '/opt/psegs/ext_data/middlebury-flow/'\n",
+    "\n",
+    "# For the Middlebury Flow dataset, we only consider the real scenes\n",
+    "MIDD_SCENES = [\n",
+    "    {\n",
+    "        'input': 'other-data/Dimetrodon/frame10.png',\n",
+    "        'expected_out': 'other-data/Dimetrodon/frame11.png',\n",
+    "        'flow_gt': 'other-gt-flow/Dimetrodon/flow10.flo',\n",
+    "    },\n",
+    "        {\n",
+    "        'input': 'other-data/Hydrangea/frame10.png',\n",
+    "        'expected_out': 'other-data/Hydrangea/frame11.png',\n",
+    "        'flow_gt': 'other-gt-flow/Hydrangea/flow10.flo',\n",
+    "    },\n",
+    "        {\n",
+    "        'input': 'other-data/RubberWhale/frame10.png',\n",
+    "        'expected_out': 'other-data/RubberWhale/frame11.png',\n",
+    "        'flow_gt': 'other-gt-flow/RubberWhale/flow10.flo',\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "\n",
+    "def midd_read_flow(path):\n",
+    "    import os\n",
+    "    import numpy as np\n",
+    "    # Based upon: https://github.com/Johswald/flow-code-python/blob/master/readFlowFile.py\n",
+    "    # compute colored image to visualize optical flow file .flo\n",
+    "    # Author: Johannes Oswald, Technical University Munich\n",
+    "    # Contact: johannes.oswald@tum.de\n",
+    "    # Date: 26/04/2017\n",
+    "    # For more information, check http://vision.middlebury.edu/flow/ \n",
+    "    assert os.path.exists(path) and path.endswith('.flo'), path\n",
+    "    f = open(path, 'rb')\n",
+    "    flo_number = np.fromfile(f, np.float32, count=1)[0]\n",
+    "    TAG_FLOAT = 202021.25\n",
+    "    assert flo_number == TAG_FLOAT, 'Flow number %r incorrect.' % flo_number\n",
+    "    w = np.fromfile(f, np.int32, count=1)\n",
+    "    h = np.fromfile(f, np.int32, count=1)\n",
+    "\n",
+    "    #if error try: data = np.fromfile(f, np.float32, count=2*w[0]*h[0])\n",
+    "    data = np.fromfile(f, np.float32, count=int(2*w*h))\n",
+    "\n",
+    "    # Reshape data into 3D array (columns, rows, bands)\n",
+    "    flow = np.resize(data, (int(h), int(w), 2))\t\n",
+    "    f.close()\n",
+    "\n",
+    "    # We found that there are some invalid (?) (i.e. very large) flows, so we're going\n",
+    "    # to ignore those for this experiment.\n",
+    "    invalid = (flow >= 1666)\n",
+    "    flow[invalid] = 0\n",
+    "\n",
+    "    return flow\n",
+    "\n",
+    "def midd_create_fp(uri):\n",
+    "    scene_idx = int(uri.extra['midd.scene_idx'])\n",
+    "    scene = MIDD_SCENES[scene_idx]\n",
+    "    data_root = uri.extra['midd.dataroot']\n",
+    "    return OpticalFlowPair(\n",
+    "                uri=uri,\n",
+    "                dataset=\"Middlebury Optical Flow\",\n",
+    "                id1=scene['input'],\n",
+    "                img1='file://' + os.path.join(data_root, scene['input']),\n",
+    "                id2=scene['expected_out'],\n",
+    "                img2='file://' + os.path.join(data_root, scene['expected_out']),\n",
+    "                flow=CloudpickeledCallable(lambda: midd_read_flow(os.path.join(data_root, scene['flow_gt']))))\n",
+    "    \n",
+    "\n",
+    "class MiddFactory(FlowPairFactoryBase):\n",
+    "    DATASET = 'midd_oflow'\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        return [\n",
+    "            datum.URI(dataset=cls.DATASET, extra={'midd.scene_idx': i, 'midd.dataroot': MIDD_DATA_ROOT})\n",
+    "            for i, scene in enumerate(MIDD_SCENES)\n",
+    "        ]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        uri_rdd = spark.sparkContext.parallelize(uris)\n",
+    "        fp_rdd = uri_rdd.map(midd_create_fp)\n",
+    "        return fp_rdd\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:11:48,035\toarph 1807391 : Source has changed! Rebuilding Egg ...\n",
+      "2021-04-28 21:11:48,035\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:48,036\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:48,037\toarph 1807391 : Generating egg to /tmp/tmp7q4rpnel_oarphpy_eggbuild ...\n",
+      "2021-04-28 21:11:48,044\toarph 1807391 : ... done.  Egg at /tmp/tmp7q4rpnel_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cheap_optical_flow_eval_analysis.midd import MiddFactory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 3 Midd scenes\n"
+     ]
+    }
+   ],
+   "source": [
+    "ALL_FP_FACTORY_CLSS.append(MiddFactory)\n",
+    "\n",
+    "print(\"Found %s Midd scenes\" % len(MiddFactory.list_fp_uris(spark)))\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    demo_uris = MiddFactory.list_fp_uris(spark)\n",
+    "    fp_rdd = MiddFactory.get_fp_rdd_for_uris(spark, demo_uris)\n",
+    "    fps = fp_rdd.collect()\n",
+    "    \n",
+    "    for fp in fps:\n",
+    "        show_html(fp.to_html() + \"<br/><br/><br/>\")\n",
+    "        DEMO_FPS.append(fp)\n",
+    "\n",
+    "# for i, scene in enumerate(MIDD_SCENES):\n",
+    "#     p = OpticalFlowPair(\n",
+    "#             dataset=\"Middlebury Optical Flow\",\n",
+    "#             id1=scene['input'],\n",
+    "#             img1='file://' + os.path.join(MIDD_DATA_ROOT, scene['input']),\n",
+    "#             id2=scene['expected_out'],\n",
+    "#             img2='file://' + os.path.join(MIDD_DATA_ROOT, scene['expected_out']),\n",
+    "#             flow=CloudpickeledCallable(lambda: midd_read_flow(os.path.join(MIDD_DATA_ROOT, scene['flow_gt']))))\n",
+    "    \n",
+    "#     if RUN_FULL_ANALYSIS:\n",
+    "#         ALL_FPS.append(copy.deepcopy(p))\n",
+    "    \n",
+    "#     if SHOW_DEMO_OUTPUT:\n",
+    "#         show_html(p.to_html() + \"<br/><br/><br/>\")\n",
+    "#         DEMO_FPS.append(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## DeepDeform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing cheap_optical_flow_eval_analysis/deepdeform.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile cheap_optical_flow_eval_analysis/deepdeform.py\n",
+    "\n",
+    "from psegs import datum\n",
+    "\n",
+    "from cheap_optical_flow_eval_analysis.ofp import *\n",
+    "\n",
+    "# Please extract deepdeform_v1.7z to a directory and provide the target below:\n",
+    "DD_DATA_ROOT = '/opt/psegs/ext_data/deepdeform_v1/'\n",
+    "\n",
+    "def dd_load_raw_flow(path):\n",
+    "    # Based upon https://github.com/AljazBozic/DeepDeform/blob/master/utils.py#L1\n",
+    "    import shutil\n",
+    "    import struct\n",
+    "    import os\n",
+    "    import numpy as np\n",
+    "\n",
+    "    # Flow is stored row-wise in order [channels, height, width].\n",
+    "    assert os.path.isfile(path), path\n",
+    "\n",
+    "    flow_gt = None\n",
+    "    with open(path, 'rb') as fin:\n",
+    "        width = struct.unpack('I', fin.read(4))[0]\n",
+    "        height = struct.unpack('I', fin.read(4))[0]\n",
+    "        channels = struct.unpack('I', fin.read(4))[0]\n",
+    "        n_elems = height * width * channels\n",
+    "\n",
+    "        flow = struct.unpack('f' * n_elems, fin.read(n_elems * 4))\n",
+    "        raw_flow_gt = np.asarray(flow, dtype=np.float32).reshape([channels, height, width])\n",
+    "    return raw_flow_gt\n",
+    "\n",
+    "def dd_load_oflow(path):\n",
+    "    raw_flow_gt = dd_load_raw_flow(path)\n",
+    "\n",
+    "    # Match format used in this analysis\n",
+    "    flow_gt = np.moveaxis(raw_flow_gt, 0, -1) # (h, w, 2)\n",
+    "    invalid_flow = flow_gt == -np.Inf\n",
+    "    flow_gt[invalid_flow] = 0.0\n",
+    "    return flow_gt\n",
+    "\n",
+    "def dd_load_depth_meters(path):\n",
+    "    import imageio\n",
+    "    \n",
+    "    # \"Every pixel contains 3 values for flow in x, y and z direction, in meters\"\n",
+    "    depth_img_raw = imageio.imread(path)\n",
+    "    d_meters = depth_img_raw.astype('float64') / 1000.\n",
+    "    return d_meters\n",
+    "\n",
+    "def dd_load_sflow(sflow_path):\n",
+    "    # NB: we actually ignore the the DeepDeform SceneFlow data since it appears to be\n",
+    "    # deduced from the optical flow / visual point correspondence.  So we just\n",
+    "    # do the same deduction but capture the Scene Flow in uvd form; Deep\n",
+    "    # Deform has it in (x, y, z) [meters]\n",
+    "    raw_flow = dd_load_raw_flow(sflow_path.replace('.sflow', '.oflow').replace('scene_flow', 'optical_flow'))\n",
+    "    raw_flow = np.moveaxis(raw_flow, 0, -1) # (h, w, 2)\n",
+    "    \n",
+    "    # File name format: {obj_id}_{src_frame}_{dest_frame}.sflow\n",
+    "    obj_id, src_id, target_id = os.path.basename(sflow_path).replace('.sflow', '').split('_')\n",
+    "    \n",
+    "    # So we need the depth for frame 1 at least\n",
+    "    depth_path1 = os.path.join(os.path.dirname(sflow_path), '../depth/%s.png' % src_id)\n",
+    "    depth_path2 = os.path.join(os.path.dirname(sflow_path), '../depth/%s.png' % target_id)\n",
+    "    \n",
+    "    d1 = dd_load_depth_meters(depth_path1)\n",
+    "    d2 = dd_load_depth_meters(depth_path2)\n",
+    "    \n",
+    "    h, w = d1.shape[:2]\n",
+    "    px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])\n",
+    "    px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])\n",
+    "    pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)\n",
+    "    pyx = pyx.astype(np.float32)\n",
+    "    \n",
+    "    vud1 = np.dstack([pyx, d1]).reshape([-1, 3])\n",
+    "    uvdviz_im1 = np.zeros((vud1.shape[0], 4))\n",
+    "    uvdviz_im1[:, :3] = vud1[:, (1, 0, 2)]\n",
+    "    uvdviz_im1[:, -1] = np.logical_and(\n",
+    "                            (raw_flow != -np.Inf).reshape([-1, 2])[:, 0], # Flow is valid\n",
+    "                            uvdviz_im1[:, 2] > 0)                        # Depth is valid\n",
+    "    \n",
+    "    vu2 = (pyx + raw_flow[:, :, (1, 0)]).reshape([-1, 2])\n",
+    "    invalid = np.where(\n",
+    "            (np.rint(vu2[:, 0]) < 0) | (np.rint(vu2[:, 0]) >= h) |\n",
+    "            (np.rint(vu2[:, 1]) < 0) | (np.rint(vu2[:, 1]) >= w) |\n",
+    "            (vu2[:, 0] == -np.Inf))\n",
+    "    j2 = np.rint(vu2[:, 0]).astype(np.int64)\n",
+    "    i2 = np.rint(vu2[:, 1]).astype(np.int64)\n",
+    "    j2[invalid] = 0\n",
+    "    i2[invalid] = 0\n",
+    "    d2_col = d2[j2, i2]\n",
+    "    vud2 = np.hstack([vu2, d2_col[:, np.newaxis]])\n",
+    "    \n",
+    "    uvdviz_im2 = np.ones((vud1.shape[0], 4))\n",
+    "    uvdviz_im2[:, :3] = vud2[:, (1, 0, 2)]\n",
+    "    uvdviz_im2[invalid, -1] = 0\n",
+    "    \n",
+    "#     vudviz_im2[:, -1] = (vudviz_im2[:, 0] != -np.Inf)\n",
+    "#     vudviz_im1[:, -1] = np.logical_and(vudviz_im1[:, -1], (vudviz_im1[:, 2] > 0))\n",
+    "    \n",
+    "    visible_either = ((uvdviz_im1[:, -1] == 1) | (uvdviz_im2[:, -1] == 1))\n",
+    "    uvdviz_im1 = uvdviz_im1[visible_either]\n",
+    "    uvdviz_im2 = uvdviz_im2[visible_either]\n",
+    "#         xyz1 = uvd_to_xyzrgb(uvd1, fp.K)[:, :3]\n",
+    "#         xyz2 = uvd_to_xyzrgb(uvd2, fp.K)[:, :3]     \n",
+    "    \n",
+    "    return uvdviz_im1, uvdviz_im2\n",
+    "\n",
+    "\n",
+    "# def dd_load_sflow(path):\n",
+    "#     # Based upon https://github.com/AljazBozic/DeepDeform/blob/master/utils.py#L1\n",
+    "#     import shutil\n",
+    "#     import struct\n",
+    "#     import os\n",
+    "#     import numpy as np\n",
+    "#     import imageio\n",
+    "\n",
+    "#     # Scene Flow is stored row-wise in order [channels (x, y, z), height, width].\n",
+    "#     assert os.path.isfile(path)\n",
+    "\n",
+    "#     flow_gt = None\n",
+    "#     with open(path, 'rb') as fin:\n",
+    "#         width = struct.unpack('I', fin.read(4))[0]\n",
+    "#         height = struct.unpack('I', fin.read(4))[0]\n",
+    "#         channels = struct.unpack('I', fin.read(4))[0]\n",
+    "#         n_elems = height * width * channels\n",
+    "#         flow = struct.unpack('f' * n_elems, fin.read(n_elems * 4))\n",
+    "#         flow_gt = np.asarray(flow, dtype=np.float32).reshape([channels, height, width])\n",
+    "\n",
+    "#     sflow_gt = np.moveaxis(flow_gt, 0, -1) # (h, w, 3)\n",
+    "        \n",
+    "#     # \"Every pixel contains 3 values for flow in x, y and z direction, in meters\"\n",
+    "#     # So we need the depth for frame 1 at least\n",
+    "#     obj_id, src_id, target_id = os.path.basename(path).replace('.sflow', '').split('_')\n",
+    "#     depth_path = os.path.join(os.path.dirname(path), '../depth/%s.png' % src_id)\n",
+    "#     depth_img_raw = imageio.imread(depth_path)\n",
+    "    \n",
+    "#     # \"depth images as 16-bit .png (divide by 1000 to obtain depth in meters)\"\n",
+    "#     d_uvd_meters = depth_img_raw.astype('float64') / 1000.\n",
+    "        \n",
+    "#     sflow_uv_dxyz = np.dstack([d_uvd_meters[:, :, np.newaxis], sflow_gt])\n",
+    "#     return sflow_uv_dxyz\n",
+    "\n",
+    "\n",
+    "# def dd_convert_sflow(K, sflow_uv_dxyz):\n",
+    "#     import numpy as np\n",
+    "\n",
+    "#     h, w = sflow_uv_dxyz.shape[:2]\n",
+    "#     px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])\n",
+    "#     px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])\n",
+    "#     pxy = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)\n",
+    "\n",
+    "    \n",
+    "#     sflow_uv_ijdxyz = np.dstack([pxy, sflow_uv_dxyz])\n",
+    "#     sflow_ijdxyz = sflow_uv_ijdxyz.reshape([-1, 6])\n",
+    "    \n",
+    "#     # Trim invalid / invisible\n",
+    "#     sflow_ijdxyz = sflow_ijdxyz[sflow_ijdxyz[:, -1] != -np.Inf]\n",
+    "\n",
+    "#     uvdviz_im1 = np.ones((sflow_ijdxyz.shape[0], 4))\n",
+    "#     uvdviz_im1[:, 0] = sflow_ijdxyz[:, 1]\n",
+    "#     uvdviz_im1[:, 1] = sflow_ijdxyz[:, 0]\n",
+    "#     uvdviz_im1[:, 2] = sflow_ijdxyz[:, 2]\n",
+    "\n",
+    "#     fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]\n",
+    "#     rays = np.zeros((h, w, 3))\n",
+    "#     rays[:, :, 0] = (rays[:, :, 0] - cy) / fy\n",
+    "#     rays[:, :, 1] = (rays[:, :, 1] - cx) / fx\n",
+    "#     rays[:, :, 2] = 1\n",
+    "#     rays /= np.linalg.norm(rays, axis=-1)[:, :, np.newaxis]\n",
+    "\n",
+    "#     yxz_1 = rays * sflow_uv_dxyz[:, :, 0][:, :, np.newaxis]\n",
+    "#     yxz_2 = yxz_1 + sflow_uv_dxyz[:, :, (2, 1, 3)]\n",
+    "#     xyz_2 = yxz_2[:, :, (1, 0, 2)]\n",
+    "    \n",
+    "#     xyz_2_valid = xyz_2.reshape([-1, 3])\n",
+    "#     xyz_2_valid = xyz_2_valid[xyz_2_valid[:, 0] != -np.Inf]\n",
+    "    \n",
+    "#     # nawwww use the oflow look at the paper they prolly are inferring SF from oflow and depth \n",
+    "    \n",
+    "#     # DeepDeform scene flow is always visible -> visible\n",
+    "#     assert xyz_2_valid.shape[0] == uvdviz_im1.shape[0], (xyz_2_valid.shape, uvdviz_im1.shape)\n",
+    "    \n",
+    "#     uvd2 = K[:3, :3].dot(xyz_2_valid.T)\n",
+    "#     uvd2[0:2, :] /= uvd2[2, :]\n",
+    "#     uvd2 = uvd2.T\n",
+    "    \n",
+    "#     uvdviz_im2 = np.ones((xyz_2_valid.shape[0], 4))\n",
+    "#     uvdviz_im2[:, 0:3] = uvd2\n",
+    "    \n",
+    "    \n",
+    "# #     uvdviz_im2[:, 0] = (fx * xyz_2_valid[:, 0]) / (xyz_2_valid[:, 2] + cx)\n",
+    "# #     uvdviz_im2[:, 1] = (fy * xyz_2_valid[:, 1]) / (xyz_2_valid[:, 2] + cy)\n",
+    "# #     uvdviz_im2[:, 2] = np.linalg.norm(xyz_2_valid, axis=1)\n",
+    "    \n",
+    "#     return uvdviz_im1, uvdviz_im2\n",
+    "\n",
+    "def dd_create_fp(uri):\n",
+    "    if \"dd.sf_gt\" in uri.extra:\n",
+    "        K = dd_read_K(os.path.join(DD_DATA_ROOT, uri.extra['dd.K']))\n",
+    "        uvdviz_im1, uvdviz_im2 = dd_load_sflow(os.path.join(DD_DATA_ROOT, uri.extra[\"dd.sf_gt\"]))\n",
+    "    else:\n",
+    "        K = None\n",
+    "        uvdviz_im1 = None\n",
+    "        uvdviz_im2 = None\n",
+    "    return OpticalFlowPair(\n",
+    "                uri=uri,\n",
+    "                dataset=\"DeepDeform Semi-Synthetic Optical Flow\",\n",
+    "                id1=uri.extra['dd.input'],\n",
+    "                img1='file://' + os.path.join(DD_DATA_ROOT, uri.extra['dd.input']),\n",
+    "                id2=uri.extra['dd.expected_out'],\n",
+    "                img2='file://' + os.path.join(DD_DATA_ROOT, uri.extra['dd.expected_out']),\n",
+    "                flow=dd_load_oflow(os.path.join(DD_DATA_ROOT, uri.extra['dd.flow_gt'])),\n",
+    "        \n",
+    "                K=K[:3, :3],\n",
+    "                uvdviz_im1=uvdviz_im1,\n",
+    "                uvdviz_im2=uvdviz_im2)\n",
+    "\n",
+    "def dd_read_K(path):\n",
+    "    import numpy as np\n",
+    "    with open(path, 'r') as f:\n",
+    "        lines = f.read().split('\\n')\n",
+    "    lines = [l for l in lines if l]\n",
+    "    K = np.array([[float(ll) for ll in l.split(' ') if ll] for l in lines])\n",
+    "    return K\n",
+    "\n",
+    "class DDFactory(FlowPairFactoryBase):\n",
+    "    DATASET = 'deep_deform'\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_all_scenes(cls):\n",
+    "        import json\n",
+    "        DD_ALIGNMENTS = json.load(open(os.path.join(DD_DATA_ROOT, 'train_alignments.json')))\n",
+    "        ALL_DD_SCENES = [\n",
+    "            {\n",
+    "                \"dd.input\": ascene['source_color'],\n",
+    "                \"dd.expected_out\": ascene['target_color'],\n",
+    "                \"dd.flow_gt\": ascene['optical_flow'],\n",
+    "                \"dd.sf_gt\": ascene['scene_flow'],\n",
+    "                \"dd.K\": os.path.join(os.path.dirname(ascene['scene_flow']), '../intrinsics.txt'),\n",
+    "            }\n",
+    "            for ascene in DD_ALIGNMENTS\n",
+    "        ]\n",
+    "        return ALL_DD_SCENES\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        scenes = cls._get_all_scenes()\n",
+    "        return [\n",
+    "            datum.URI(dataset=cls.DATASET, extra=scene)\n",
+    "            for scene in scenes\n",
+    "        ]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        uri_rdd = spark.sparkContext.parallelize(uris)\n",
+    "        fp_rdd = uri_rdd.map(dd_create_fp)\n",
+    "        return fp_rdd\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 15:38:47,827\toarph 723307 : Source has changed! Rebuilding Egg ...\n",
+      "2021-04-28 15:38:47,828\toarph 723307 : Using source root /tmp/tmpcbogta6w_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 15:38:47,828\toarph 723307 : Using source root /tmp/tmpcbogta6w_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 15:38:47,830\toarph 723307 : Generating egg to /tmp/tmp3c2y9mgb_oarphpy_eggbuild ...\n",
+      "2021-04-28 15:38:47,836\toarph 723307 : ... done.  Egg at /tmp/tmp3c2y9mgb_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cheap_optical_flow_eval_analysis.deepdeform import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 4540 DeepDeform scenes\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psegs import datum\n",
+    "\n",
+    "DD_DEMO_URIS = [\n",
+    "    datum.URI(dataset=DDFactory.DATASET, extra={\n",
+    "        \"dd.input\": \"train/seq000/color/000000.jpg\",\n",
+    "        \"dd.expected_out\": \"train/seq000/color/000200.jpg\",\n",
+    "        \"dd.flow_gt\": \"train/seq000/optical_flow/blackdog_000000_000200.oflow\",\n",
+    "        \"dd.sf_gt\": \"train/seq000/scene_flow/blackdog_000000_000200.sflow\",\n",
+    "        \"dd.K\": \"train/seq000/intrinsics.txt\",\n",
+    "    }),\n",
+    "    datum.URI(dataset=DDFactory.DATASET, extra={\n",
+    "        \"dd.input\": \"train/seq000/color/000000.jpg\",\n",
+    "        \"dd.expected_out\": \"train/seq000/color/001200.jpg\",\n",
+    "        \"dd.flow_gt\": \"train/seq000/optical_flow/blackdog_000000_001200.oflow\",\n",
+    "        \"dd.sf_gt\": \"train/seq000/scene_flow/blackdog_000000_001200.sflow\",\n",
+    "        \"dd.K\": \"train/seq000/intrinsics.txt\",\n",
+    "    }),\n",
+    "    datum.URI(dataset=DDFactory.DATASET, extra={\n",
+    "        \"dd.input\": \"train/seq001/color/003400.jpg\",\n",
+    "        \"dd.expected_out\": \"train/seq001/color/003600.jpg\",\n",
+    "        \"dd.flow_gt\": \"train/seq001/optical_flow/lady_003400_003600.oflow\",\n",
+    "        \"dd.sf_gt\": \"train/seq001/scene_flow/lady_003400_003600.sflow\",\n",
+    "        \"dd.K\": \"train/seq001/intrinsics.txt\",\n",
+    "    }),\n",
+    "    datum.URI(dataset=DDFactory.DATASET, extra={\n",
+    "        \"dd.input\": \"train/seq337/color/000050.jpg\",\n",
+    "        \"dd.expected_out\": \"train/seq337/color/000350.jpg\",\n",
+    "        \"dd.flow_gt\": \"train/seq337/optical_flow/adult_000050_000350.oflow\",\n",
+    "        \"dd.sf_gt\": \"train/seq337/scene_flow/adult_000050_000350.sflow\",\n",
+    "        \"dd.K\": \"train/seq337/intrinsics.txt\",\n",
+    "    }),\n",
+    "]\n",
+    "\n",
+    "ALL_FP_FACTORY_CLSS.append(DDFactory)\n",
+    "\n",
+    "print(\"Found %s DeepDeform scenes\" % len(DDFactory.list_fp_uris(spark)))\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    fp_rdd = DDFactory.get_fp_rdd_for_uris(spark, DD_DEMO_URIS)\n",
+    "    fps = fp_rdd.collect()\n",
+    "    \n",
+    "    for fp in fps:\n",
+    "        show_html(fp.to_html() + \"<br/><br/><br/>\")\n",
+    "        DEMO_FPS.append(fp)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# import json\n",
+    "# DD_ALIGNMENTS = json.load(open(os.path.join(DD_DATA_ROOT, 'train_alignments.json')))\n",
+    "# ALL_DD_SCENES = [\n",
+    "#     {\n",
+    "#         \"input\": ascene['source_color'],\n",
+    "#         \"expected_out\": ascene['target_color'],\n",
+    "#         \"flow_gt\": ascene['optical_flow'],\n",
+    "#     }\n",
+    "#     for ascene in DD_ALIGNMENTS\n",
+    "# ]\n",
+    "\n",
+    "# print(\"Found %s DeepDeform scenes\" % len(ALL_DD_SCENES))\n",
+    "# if SHOW_DEMO_OUTPUT:\n",
+    "#     for scene in DD_DEMO_SCENES:\n",
+    "#         p = dd_create_fp(scene)\n",
+    "#         show_html(p.to_html())\n",
+    "#         DEMO_FPS.append(p)\n",
+    "\n",
+    "# if RUN_FULL_ANALYSIS:\n",
+    "#     for scene in ALL_DD_SCENES:\n",
+    "#         p = dd_create_fp(scene)\n",
+    "#         ALL_FPS.append(p)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Kitti Scene Flow Benchmark (2015)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# # Please unzip `data_scene_flow.zip` and `data_scene_flow_calib.zip` to a directory and provide that target below:\n",
+    "# KITTI_SF15_DATA_ROOT = '/opt/psegs/ext_data/kitti_scene_flow_2015/'\n",
+    "\n",
+    "\n",
+    "\n",
+    "# from oarphpy import util as oputil\n",
+    "# KITTI_SF15_ALL_FLOW_OCC = [\n",
+    "#     os.path.basename(p)\n",
+    "#     for p in oputil.all_files_recursive(\n",
+    "#         os.path.join(KITTI_SF15_DATA_ROOT, 'training/flow_occ'), pattern='*.png')\n",
+    "# ]\n",
+    "    \n",
+    "# KITTI_SF15_ALL_SCENES = [\n",
+    "#     {\n",
+    "#         \"input\": 'training/image_2/%s' % fname,\n",
+    "#         \"expected_out\": 'training/image_2/%s' % fname.replace('_10', '_11'),\n",
+    "#         \"flow_gt\": 'training/flow_occ/%s' % fname,\n",
+    "#     }\n",
+    "#     for fname in KITTI_SF15_ALL_FLOW_OCC\n",
+    "# ]\n",
+    "# print(\"Found %s KITTI SceneFlow 2015 scenes\" % len(KITTI_SF15_ALL_SCENES))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing cheap_optical_flow_eval_analysis/kittisf15.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile cheap_optical_flow_eval_analysis/kittisf15.py\n",
+    "\n",
+    "from psegs import datum\n",
+    "\n",
+    "from cheap_optical_flow_eval_analysis.ofp import *\n",
+    "\n",
+    "# Please unzip `data_scene_flow.zip` and `data_scene_flow_calib.zip` to a directory and provide that target below:\n",
+    "KITTI_SF15_DATA_ROOT = '/opt/psegs/ext_data/kitti_scene_flow_2015/'\n",
+    "\n",
+    "\n",
+    "def kittisf15_load_flow(path):\n",
+    "    # Based upon https://github.com/liruoteng/OpticalFlowToolkit/blob/master/lib/flowlib.py#L559\n",
+    "    import png\n",
+    "    import numpy as np\n",
+    "    flow_object = png.Reader(filename=path)\n",
+    "    flow_direct = flow_object.asDirect()\n",
+    "    flow_data = list(flow_direct[2])\n",
+    "    w, h = flow_direct[3]['size']\n",
+    "    flow = np.zeros((h, w, 3), dtype=np.float64)\n",
+    "    for i in range(len(flow_data)):\n",
+    "        flow[i, :, 0] = flow_data[i][0::3]\n",
+    "        flow[i, :, 1] = flow_data[i][1::3]\n",
+    "        flow[i, :, 2] = flow_data[i][2::3]\n",
+    "\n",
+    "    invalid_idx = (flow[:, :, 2] == 0)\n",
+    "    flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0\n",
+    "    flow[invalid_idx, 0] = 0\n",
+    "    flow[invalid_idx, 1] = 0\n",
+    "    return flow[:, :, :2]\n",
+    "\n",
+    "def kittisf15_load_disp(disp_path):\n",
+    "    import imageio\n",
+    "    \n",
+    "    # From KITTI SF Devkit:\n",
+    "    # \"Disparity maps are saved as uint16 PNG images, which can be opened with\n",
+    "    # either MATLAB or libpng++. A 0 value indicates an invalid pixel (ie, no\n",
+    "    # ground truth exists, or the estimation algorithm didn't produce an estimate\n",
+    "    # for that pixel). Otherwise, the disparity for a pixel can be computed by\n",
+    "    # converting the uint16 value to float and dividing it by 256.0\"\n",
+    "\n",
+    "    img = imageio.imread(disp_path)\n",
+    "    disp = img.astype('float32') / 256.\n",
+    "    return disp\n",
+    "\n",
+    "def kittisf15_load_K_baseline(cam_to_cam_path):\n",
+    "    import numpy as np\n",
+    "    \n",
+    "    K_line = None\n",
+    "    T_00_line = None\n",
+    "    T_01_line = None\n",
+    "    with open(cam_to_cam_path, 'r') as f:\n",
+    "        for l in f.readlines():\n",
+    "            if 'P_rect_02' in l:\n",
+    "                K_line = l\n",
+    "            if 'T_02' in l:\n",
+    "                T_00_line = l\n",
+    "            if 'T_03' in l:\n",
+    "                T_01_line = l\n",
+    "    \n",
+    "    assert K_line\n",
+    "    params = K_line.split('P_rect_02: ')[-1]\n",
+    "    params = [float(tok.strip()) for tok in params.split(' ') if tok]\n",
+    "    K = np.array(params).reshape([3, 4])\n",
+    "    K = K[:3, :3]\n",
+    "    \n",
+    "    assert T_00_line\n",
+    "    assert T_01_line\n",
+    "    params = T_00_line.split('T_02: ')[-1]\n",
+    "    params = [float(tok.strip()) for tok in params.split(' ') if tok]\n",
+    "    T_00 = np.array(params)\n",
+    "    params = T_01_line.split('T_03: ')[-1]\n",
+    "    params = [float(tok.strip()) for tok in params.split(' ') if tok]\n",
+    "    T_01 = np.array(params)\n",
+    "    baseline = np.linalg.norm(T_00 - T_01)\n",
+    "    \n",
+    "    return K, baseline\n",
+    "\n",
+    "def kittisf15_load_sflow(flow, K, baseline, disp0_path, disp1_path):\n",
+    "    fx = K[0, 0]\n",
+    "    \n",
+    "    disp0 = kittisf15_load_disp(disp0_path)\n",
+    "    disp0_valid = disp0[:, :] > 0\n",
+    "    d0 = fx * baseline / (disp0 + 1e-5)\n",
+    "    d0[~disp0_valid] = 0\n",
+    "    \n",
+    "    disp1 = kittisf15_load_disp(disp1_path)\n",
+    "    disp1_valid = disp1[:, :] > 0\n",
+    "    d1 = fx * baseline / (disp1 + 1e-5)\n",
+    "    d1[~disp1_valid] = 0\n",
+    "    \n",
+    "    h, w = d1.shape[:2]\n",
+    "    px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])\n",
+    "    px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])\n",
+    "    pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)\n",
+    "    pyx = pyx.astype(np.float32)\n",
+    "    \n",
+    "    vud1 = np.dstack([pyx, d0]).reshape([-1, 3])\n",
+    "    uvdviz_im1 = np.zeros((vud1.shape[0], 4))\n",
+    "    uvdviz_im1[:, :3] = vud1[:, (1, 0, 2)]\n",
+    "    uvdviz_im1[:, -1] = np.logical_and(\n",
+    "                            (flow > 0).reshape([-1, 2])[:, 0], # Flow is valid\n",
+    "                            (d0 > 0).reshape([-1]))            # Depth is valid\n",
+    "\n",
+    "    vu2 = (pyx + flow[:, :, (1, 0)]).reshape([-1, 2])\n",
+    "    d2_valid = (d1 > 0).reshape([-1])\n",
+    "    invalid = np.where(\n",
+    "            (np.rint(vu2[:, 0]) < 0) | (np.rint(vu2[:, 0]) >= h) |\n",
+    "            (np.rint(vu2[:, 1]) < 0) | (np.rint(vu2[:, 1]) >= w) |\n",
+    "            (flow[:, :, 0] == 0).reshape([-1]) |\n",
+    "            (~d2_valid))\n",
+    "    j2 = np.rint(vu2[:, 0]).astype(np.int64)\n",
+    "    i2 = np.rint(vu2[:, 1]).astype(np.int64)\n",
+    "    j2[invalid] = 0\n",
+    "    i2[invalid] = 0\n",
+    "    d2_col = d1[j2, i2]\n",
+    "    vud2 = np.hstack([vu2, d2_col[:, np.newaxis]])\n",
+    "    \n",
+    "    uvdviz_im2 = np.ones((vud1.shape[0], 4))\n",
+    "    uvdviz_im2[:, :3] = vud2[:, (1, 0, 2)]\n",
+    "    uvdviz_im2[invalid, -1] = 0\n",
+    "    \n",
+    "#     vudviz_im2[:, -1] = (vudviz_im2[:, 0] != -np.Inf)\n",
+    "#     vudviz_im1[:, -1] = np.logical_and(vudviz_im1[:, -1], (vudviz_im1[:, 2] > 0))\n",
+    "    \n",
+    "    visible_either = ((uvdviz_im1[:, -1] == 1) | (uvdviz_im2[:, -1] == 1))\n",
+    "    uvdviz_im1 = uvdviz_im1[visible_either]\n",
+    "    uvdviz_im2 = uvdviz_im2[visible_either]\n",
+    "#         xyz1 = uvd_to_xyzrgb(uvd1, fp.K)[:, :3]\n",
+    "#         xyz2 = uvd_to_xyzrgb(uvd2, fp.K)[:, :3]     \n",
+    "    \n",
+    "    return uvdviz_im1, uvdviz_im2\n",
+    "\n",
+    "\n",
+    "def kittisf15_create_fp(uri):\n",
+    "    flow = kittisf15_load_flow(os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.flow_gt']))\n",
+    "    K, baseline = kittisf15_load_K_baseline(os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.K']))\n",
+    "    uvdviz_im1, uvdviz_im2 = kittisf15_load_sflow(\n",
+    "                                    flow, K, baseline,\n",
+    "                                    os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.disp0']),\n",
+    "                                    os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.disp1']))\n",
+    "    \n",
+    "    return OpticalFlowPair(\n",
+    "                uri=uri,\n",
+    "                dataset=\"KITTI Scene Flow 2015\",\n",
+    "                id1=uri.extra['ksf15.input'],\n",
+    "                img1='file://' + os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.input']),\n",
+    "                id2=uri.extra['ksf15.expected_out'],\n",
+    "                img2='file://' + os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.expected_out']),\n",
+    "                flow=flow,\n",
+    "        \n",
+    "                K=K,\n",
+    "                uvdviz_im1=uvdviz_im1,\n",
+    "                uvdviz_im2=uvdviz_im2)\n",
+    "\n",
+    "\n",
+    "class KITTISF15Factory(FlowPairFactoryBase):\n",
+    "    DATASET = 'kitti_sf15'\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_all_scenes(cls):\n",
+    "        from oarphpy import util as oputil\n",
+    "        KITTI_SF15_ALL_FLOW_OCC = [\n",
+    "            os.path.basename(p)\n",
+    "            for p in oputil.all_files_recursive(\n",
+    "                os.path.join(KITTI_SF15_DATA_ROOT, 'training/flow_occ'), pattern='*.png')\n",
+    "        ]\n",
+    "\n",
+    "        KITTI_SF15_ALL_SCENES = [\n",
+    "            {\n",
+    "                \"ksf15.input\": 'training/image_2/%s' % fname,\n",
+    "                \"ksf15.expected_out\": 'training/image_2/%s' % fname.replace('_10', '_11'),\n",
+    "                \"ksf15.flow_gt\": 'training/flow_occ/%s' % fname,\n",
+    "                \"ksf15.disp0\": 'training/disp_occ_0/%s' % fname,\n",
+    "                \"ksf15.disp1\": 'training/disp_occ_1/%s' % fname,\n",
+    "                \"ksf15.K\": 'training/calib_cam_to_cam/%s' % fname.replace('_10.png', '.txt'),\n",
+    "            }\n",
+    "            for fname in KITTI_SF15_ALL_FLOW_OCC\n",
+    "        ]\n",
+    "        return KITTI_SF15_ALL_SCENES\n",
+    "\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        scenes = cls._get_all_scenes()\n",
+    "        return [\n",
+    "            datum.URI(dataset=cls.DATASET, extra=scene)\n",
+    "            for scene in scenes\n",
+    "        ]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        uri_rdd = spark.sparkContext.parallelize(uris)\n",
+    "        fp_rdd = uri_rdd.map(kittisf15_create_fp)\n",
+    "        return fp_rdd\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 15:39:03,569\toarph 723307 : Source has changed! Rebuilding Egg ...\n",
+      "2021-04-28 15:39:03,570\toarph 723307 : Using source root /tmp/tmpcbogta6w_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 15:39:03,571\toarph 723307 : Using source root /tmp/tmpcbogta6w_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 15:39:03,572\toarph 723307 : Generating egg to /tmp/tmp7d55fgjb_oarphpy_eggbuild ...\n",
+      "2021-04-28 15:39:03,578\toarph 723307 : ... done.  Egg at /tmp/tmp7d55fgjb_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cheap_optical_flow_eval_analysis.kittisf15 import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 200 Kitti Scene Flow 2015 scenes\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psegs import datum\n",
+    "\n",
+    "# You have to ls flow_occ to get the paths\n",
+    "KITTI_SF15_DEMO_URIS = [\n",
+    "    datum.URI(dataset=KITTISF15Factory.DATASET, extra={\n",
+    "        'ksf15.input': 'training/image_2/000000_10.png',\n",
+    "        'ksf15.expected_out': 'training/image_2/000000_11.png',\n",
+    "        'ksf15.flow_gt': 'training/flow_occ/000000_10.png',\n",
+    "        'ksf15.disp0': 'training/disp_occ_0/000000_10.png',\n",
+    "        'ksf15.disp1': 'training/disp_occ_1/000000_10.png',\n",
+    "        'ksf15.K': 'training/calib_cam_to_cam/000000.txt',\n",
+    "    }),\n",
+    "    datum.URI(dataset=KITTISF15Factory.DATASET, extra={\n",
+    "        'ksf15.input': 'training/image_2/000007_10.png',\n",
+    "        'ksf15.expected_out': 'training/image_2/000007_11.png',\n",
+    "        'ksf15.flow_gt': 'training/flow_occ/000007_10.png',\n",
+    "        'ksf15.disp0': 'training/disp_occ_0/000007_10.png',\n",
+    "        'ksf15.disp1': 'training/disp_occ_1/000007_10.png',\n",
+    "        'ksf15.K': 'training/calib_cam_to_cam/000007.txt',\n",
+    "    }),\n",
+    "    datum.URI(dataset=KITTISF15Factory.DATASET, extra={\n",
+    "        'ksf15.input': 'training/image_2/000023_10.png',\n",
+    "        'ksf15.expected_out': 'training/image_2/000023_11.png',\n",
+    "        'ksf15.flow_gt': 'training/flow_occ/000023_10.png',\n",
+    "        'ksf15.disp0': 'training/disp_occ_0/000023_10.png',\n",
+    "        'ksf15.disp1': 'training/disp_occ_1/000023_10.png',\n",
+    "        'ksf15.K': 'training/calib_cam_to_cam/000023.txt',\n",
+    "    }),\n",
+    "    datum.URI(dataset=KITTISF15Factory.DATASET, extra={\n",
+    "        'ksf15.input': 'training/image_2/000051_10.png',\n",
+    "        'ksf15.expected_out': 'training/image_2/000051_11.png',\n",
+    "        'ksf15.flow_gt': 'training/flow_occ/000051_10.png',\n",
+    "        'ksf15.disp0': 'training/disp_occ_0/000051_10.png',\n",
+    "        'ksf15.disp1': 'training/disp_occ_1/000051_10.png',\n",
+    "        'ksf15.K': 'training/calib_cam_to_cam/000051.txt',\n",
+    "    }),\n",
+    "    datum.URI(dataset=KITTISF15Factory.DATASET, extra={\n",
+    "        'ksf15.input': 'training/image_2/000003_10.png',\n",
+    "        'ksf15.expected_out': 'training/image_2/000003_11.png',\n",
+    "        'ksf15.flow_gt': 'training/flow_occ/000003_10.png',\n",
+    "        'ksf15.disp0': 'training/disp_occ_0/000003_10.png',\n",
+    "        'ksf15.disp1': 'training/disp_occ_1/000003_10.png',\n",
+    "        'ksf15.K': 'training/calib_cam_to_cam/000003.txt',\n",
+    "    }),\n",
+    "]\n",
+    "\n",
+    "ALL_FP_FACTORY_CLSS.append(KITTISF15Factory)\n",
+    "\n",
+    "print(\"Found %s Kitti Scene Flow 2015 scenes\" % len(KITTISF15Factory.list_fp_uris(spark)))\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    fp_rdd = KITTISF15Factory.get_fp_rdd_for_uris(spark, KITTI_SF15_DEMO_URIS)\n",
+    "    fps = fp_rdd.collect()\n",
+    "    \n",
+    "    for fp in fps:\n",
+    "#         show_html(fp.to_html() + \"<br/><br/><br/>\")\n",
+    "        print('fixme show html')\n",
+    "        DEMO_FPS.append(fp)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# def kitti_sf15_create_fp(info):\n",
+    "#      return OpticalFlowPair(\n",
+    "#                 dataset=\"KITTI Scene Flow 2015\",\n",
+    "#                 id1=scene['input'],\n",
+    "#                 img1='file://' + os.path.join(KITTI_SF15_DATA_ROOT, scene['input']),\n",
+    "#                 id2=scene['expected_out'],\n",
+    "#                 img2='file://' + os.path.join(KITTI_SF15_DATA_ROOT, scene['expected_out']),\n",
+    "#                 flow=KITTISF15LoadFlowFromPng(os.path.join(KITTI_SF15_DATA_ROOT, scene['flow_gt'])))\n",
+    "\n",
+    "# if SHOW_DEMO_OUTPUT:\n",
+    "#     for scene in KITTI_SF15_DEMO_SCENES:\n",
+    "#         p = kitti_sf15_create_fp(scene)\n",
+    "#         show_html(p.to_html())\n",
+    "#         DEMO_FPS.append(p)\n",
+    "\n",
+    "# if RUN_FULL_ANALYSIS:\n",
+    "#     for scene in KITTI_SF15_ALL_SCENES:\n",
+    "#         p = kitti_sf15_create_fp(scene)\n",
+    "#         ALL_FPS.append(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PSegs Synthetic Flow from Fused Lidar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# PSEGS_SYNTHFLOW_PARQUET_ROOT = '/outer_root/media/rocket4q/psegs_flow_records_short'\n",
+    "\n",
+    "# from psegs.exp.fused_lidar_flow import FlowRecTable\n",
+    "\n",
+    "# T = FlowRecTable(spark, PSEGS_SYNTHFLOW_PARQUET_ROOT)\n",
+    "# synthflow_record_uris = T.get_record_uris()\n",
+    "# print(\"Found %s PSegs SynthFlow records\" % len(synthflow_record_uris))\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# fr_samp_rdd = T.get_records_with_samples_rdd(\n",
+    "#                     record_uris=[PSEGS_SYNTHFLOW_DEMO_RECORD_URIS[0]],\n",
+    "#                     include_cameras=False,\n",
+    "#                     include_cuboids=False,\n",
+    "#                     include_point_clouds=False)\n",
+    "# flow_rec = fr_samp_rdd.take(1)[0][0]\n",
+    "\n",
+    "# print(\"Sample record:\")\n",
+    "# show_html(flow_rec.to_html())\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing cheap_optical_flow_eval_analysis/psegs_synthflow.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile cheap_optical_flow_eval_analysis/psegs_synthflow.py\n",
+    "\n",
+    "from psegs import datum\n",
+    "from psegs.exp.fused_lidar_flow import FlowRecTable\n",
+    "\n",
+    "from cheap_optical_flow_eval_analysis.ofp import *\n",
+    "\n",
+    "from oarphpy.spark import CloudpickeledCallable\n",
+    "\n",
+    "\n",
+    "# Please provide the PSegs synthetic flow Parquet directory root below:\n",
+    "# PSEGS_SYNTHFLOW_PARQUET_ROOT = '/outer_root/media/rocket4q/psegs_flow_records_short_fixed'\n",
+    "PSEGS_SYNTHFLOW_PARQUET_ROOT = '/outer_root/media/rocket4q/psegs_flow_records_FULL_fixed'\n",
+    "\n",
+    "def psegs_synthflow_flow_rec_to_fp(flow_rec, sample):\n",
+    "  fr = flow_rec\n",
+    "\n",
+    "  uri_str_to_datum = sample.get_uri_str_to_datum()\n",
+    "\n",
+    "  # Find the camera_images associated with `flow_rec`\n",
+    "  ci1_url_str = str(flow_rec.clouds[0].ci_uris[0])\n",
+    "  ci1_sd = uri_str_to_datum[ci1_url_str]\n",
+    "  ci1 = ci1_sd.camera_image\n",
+    "\n",
+    "  ci2_url_str = str(flow_rec.clouds[1].ci_uris[0])\n",
+    "  ci2_sd = uri_str_to_datum[ci2_url_str]\n",
+    "  ci2 = ci2_sd.camera_image\n",
+    "\n",
+    "  import numpy as np\n",
+    "  world_T1 = ci1.ego_pose.translation\n",
+    "  world_T2 = ci2.ego_pose.translation\n",
+    "  translation_meters = np.linalg.norm(world_T2 - world_T1)\n",
+    "\n",
+    "  id1 = ci1_url_str + '&extra.psegs_flow_sids=' + str(fr.clouds[0].sample_id)\n",
+    "  id2 = ci2_url_str + '&extra.psegs_flow_sids=' + str(fr.clouds[1].sample_id)\n",
+    "\n",
+    "  import urllib.parse\n",
+    "  eval_uri = datum.URI(dataset=PSegsSynthFlowFactory.DATASET, extra={'pssf.ruri': urllib.parse.quote(str(fr.uri))})\n",
+    "\n",
+    "  uvdviz_im1 = flow_rec.clouds[0].uvdvis\n",
+    "  uvdviz_im2 = flow_rec.clouds[1].uvdvis\n",
+    "  K = ci1.K\n",
+    "\n",
+    "  fp = OpticalFlowPair(\n",
+    "          uri=eval_uri,\n",
+    "          dataset=\"PSegs SynthFlow for %s (%s)\" % (fr.uri.dataset, fr.uri.split),\n",
+    "          id1=id1,\n",
+    "          id2=id2,\n",
+    "          img1=CloudpickeledCallable(lambda: ci1.image),\n",
+    "          img2=CloudpickeledCallable(lambda: ci2.image),\n",
+    "          flow=CloudpickeledCallable(lambda: fr.to_optical_flow()),\n",
+    "\n",
+    "          diff_time_sec=float(1e9 * abs(ci2_sd.uri.timestamp - ci1_sd.uri.timestamp)),\n",
+    "          translation_meters=translation_meters,\n",
+    "      \n",
+    "          uvdviz_im1=uvdviz_im1,\n",
+    "          uvdviz_im2=uvdviz_im2,\n",
+    "          K=K)\n",
+    "  return fp\n",
+    "\n",
+    "def psegs_synthflow_create_fps(\n",
+    "        spark,\n",
+    "        flow_record_pq_table_path,\n",
+    "        record_uris,\n",
+    "        include_cuboids=False,\n",
+    "        include_point_clouds=False):\n",
+    "\n",
+    "  T = FlowRecTable(spark, flow_record_pq_table_path)\n",
+    "  rec_sample_rdd = T.get_records_with_samples_rdd(\n",
+    "                          record_uris=record_uris,\n",
+    "                          include_cameras=True,\n",
+    "                          include_cuboids=include_cuboids,\n",
+    "                          include_point_clouds=include_point_clouds)\n",
+    "\n",
+    "  fps = [\n",
+    "    flow_rec_to_fp(flow_rec, sample)\n",
+    "    for flow_rec, sample in rec_sample_rdd.collect()\n",
+    "  ]\n",
+    "\n",
+    "  return fps\n",
+    "\n",
+    "\n",
+    "class PSegsSynthFlowFactory(FlowPairFactoryBase):\n",
+    "    DATASET = 'psegs_synthflow'\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_frec_table(cls, spark):\n",
+    "        if not hasattr(cls, '_frec_table'):\n",
+    "            cls._frec_table = FlowRecTable(spark, PSEGS_SYNTHFLOW_PARQUET_ROOT)\n",
+    "        return cls._frec_table\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def list_fp_uris(cls, spark):\n",
+    "        import urllib.parse\n",
+    "        T = cls._get_frec_table(spark)\n",
+    "        ruris = T.get_record_uris()\n",
+    "        fp_uris = [\n",
+    "            datum.URI(dataset=cls.DATASET, extra={'pssf.ruri': urllib.parse.quote(str(ruri))})\n",
+    "            for ruri in ruris\n",
+    "        ]\n",
+    "        return fp_uris\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_fp_rdd_for_uris(cls, spark, uris):\n",
+    "        import urllib.parse\n",
+    "        T = cls._get_frec_table(spark)\n",
+    "        ruris = [urllib.parse.unquote(uri.extra['pssf.ruri']) for uri in uris]\n",
+    "        rec_sample_rdd = T.get_records_with_samples_rdd(\n",
+    "                          record_uris=ruris,\n",
+    "                          include_cameras=True,\n",
+    "                          include_cuboids=False,\n",
+    "                          include_point_clouds=False)\n",
+    "        fp_rdd = rec_sample_rdd.map(lambda fs: psegs_synthflow_flow_rec_to_fp(*fs))\n",
+    "        return fp_rdd\n",
+    "        \n",
+    "\n",
+    "\n",
+    "# def psegs_synthflow_iter_fp_rdds(\n",
+    "#         spark,\n",
+    "#         flow_record_pq_table_path,\n",
+    "#         fps_per_rdd=100,\n",
+    "#         include_cuboids=False,\n",
+    "#         include_point_clouds=False):\n",
+    "  \n",
+    "#   T = FlowRecTable(spark, flow_record_pq_table_path)\n",
+    "#   ruris = T.get_record_uris()\n",
+    "\n",
+    "#   # Ensure a sort so that pairs from similar segments will load in the same\n",
+    "#   # RDD -- that makes joins smaller and faster\n",
+    "#   ruris = sorted(ruris)\n",
+    "\n",
+    "#   from oarphpy import util as oputil\n",
+    "#   for ruri_chunk in oputil.ichunked(ruris, fps_per_rdd):\n",
+    "#     frec_sample_rdd = T.get_records_with_samples_rdd(\n",
+    "#                           record_uris=rids,\n",
+    "#                           include_cuboids=include_cuboids,\n",
+    "#                           include_point_clouds=include_point_clouds)\n",
+    "#     fp_rdd = frec_sample_rdd.map(flow_rec_to_fp)\n",
+    "#     yield fp_rdd\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:11:57,017\toarph 1807391 : Source has changed! Rebuilding Egg ...\n",
+      "2021-04-28 21:11:57,018\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis/cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:57,018\toarph 1807391 : Using source root /tmp/tmpekna3vk7_cheap_optical_flow_eval_analysis \n",
+      "2021-04-28 21:11:57,020\toarph 1807391 : Generating egg to /tmp/tmp9pgzm5jp_oarphpy_eggbuild ...\n",
+      "2021-04-28 21:11:57,026\toarph 1807391 : ... done.  Egg at /tmp/tmp9pgzm5jp_oarphpy_eggbuild/cheap_optical_flow_eval_analysis-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cheap_optical_flow_eval_analysis.psegs_synthflow import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 20:52:13,503\tps   723307 : FlowRecTable: Reading parquet from /outer_root/media/rocket4q/psegs_flow_records_FULL_fixed \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 500 PSegs SynthFlow scenes\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psegs import datum\n",
+    "\n",
+    "import urllib.parse\n",
+    "\n",
+    "PSEGS_SYNTHFLOW_DEMO_FPS_DO_CACHE = True\n",
+    "PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH = '/tmp/psegs_synthflow_demo.pkl'\n",
+    "\n",
+    "PSEGS_SYNTHFLOW_DEMO_RECORD_RURIS = (\n",
+    "  'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=4340,4339',\n",
+    "  'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=11219,11269',\n",
+    "\n",
+    "  'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=40009,40010',\n",
+    "  'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=50013,50014',\n",
+    "\n",
+    "#   'psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=11103,11104',\n",
+    "#   'psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=1181,1182',\n",
+    "\n",
+    "#   'psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0002&extra.psegs_flow_sids=10016,10017',\n",
+    "#   'psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0582&extra.psegs_flow_sids=60035,60036',\n",
+    "\n",
+    "#   'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0393&extra.psegs_flow_sids=50017,50018',\n",
+    "#   'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=40019,40020',\n",
+    "    #  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D30009%2C30010%26sel_datums%3Dcamera%7CCAM_BACK_RIGHT%2C1535478534928113000%2Ccamera%7CCAM_BACK_RIGHT%2C1535478535428113000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50016%2C50017%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478538404799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478538904799000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50018%2C50019%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478539504799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478540004799000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D30031%2C30032%26sel_datums%3Dcamera%7CCAM_BACK_RIGHT%2C1535478546028113000%2Ccamera%7CCAM_BACK_RIGHT%2C1535478546528113000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50023%2C50024%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478541904799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478542504811000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D20006%2C20007%26sel_datums%3Dcamera%7CCAM_BACK_LEFT%2C1535478533447405000%2Ccamera%7CCAM_BACK_LEFT%2C1535478533947405000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D10019%2C10020%26sel_datums%3Dcamera%7CCAM_BACK%2C1535478540037558000%2Ccamera%7CCAM_BACK%2C1535478540537558000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D30023%2C30024%26sel_datums%3Dcamera%7CCAM_BACK_RIGHT%2C1535478541928113000%2Ccamera%7CCAM_BACK_RIGHT%2C1535478542528113000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50007%2C50008%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478533904799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478534404799000',\n",
+    " 'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D60014%2C60015%26sel_datums%3Dcamera%7CCAM_FRONT_RIGHT%2C1535478537420482000%2Ccamera%7CCAM_FRONT_RIGHT%2C1535478537870482000',\n",
+    " 'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dkitti-360%26split%3Dtrain%26segment_id%3D2013_05_28_drive_0004_sync%26extra.psegs_flow_sids%3D10412%2C10413%26sel_datums%3Dcamera%7Cright_rect%2C1369736347374754304%2Ccamera%7Cright_rect%2C1369736347479072256',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50002%2C50003%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478531354799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478531854807000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D50027%2C50028%26sel_datums%3Dcamera%7CCAM_FRONT_LEFT%2C1535478543904799000%2Ccamera%7CCAM_FRONT_LEFT%2C1535478544404799000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dkitti-360%26split%3Dtrain%26segment_id%3D2013_05_28_drive_0004_sync%26extra.psegs_flow_sids%3D10412%2C10413%26sel_datums%3Dcamera%7Cleft_rect%2C1369736347374744320%2Ccamera%7Cleft_rect%2C1369736347479187968',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D10027%2C10028%26sel_datums%3Dcamera%7CCAM_BACK%2C1535478543937558000%2Ccamera%7CCAM_BACK%2C1535478544437558000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D10035%2C10036%26sel_datums%3Dcamera%7CCAM_BACK%2C1535478548187558000%2Ccamera%7CCAM_BACK%2C1535478548687558000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D40005%2C40006%26sel_datums%3Dcamera%7CCAM_FRONT%2C1535478532912404000%2Ccamera%7CCAM_FRONT%2C1535478533412404000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D40002%2C40003%26sel_datums%3Dcamera%7CCAM_FRONT%2C1535478531362404000%2Ccamera%7CCAM_FRONT%2C1535478531862404000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D30034%2C30035%26sel_datums%3Dcamera%7CCAM_BACK_RIGHT%2C1535478547628113000%2Ccamera%7CCAM_BACK_RIGHT%2C1535478548178113000',\n",
+    "#  'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dkitti-360%26split%3Dtrain%26segment_id%3D2013_05_28_drive_0000_sync%26extra.psegs_flow_sids%3D4340%2C4339%26sel_datums%3Dcamera%7Cleft_rect%2C1369731215809577984%2Ccamera%7Cleft_rect%2C1369731215914083072'\n",
+    "\n",
+    ")\n",
+    "\n",
+    "PSEGS_SYNTHFLOW_DEMO_FP_URIS = [\n",
+    "    datum.URI.from_str(s)\n",
+    "    for s in (\n",
+    "         'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dnuscenes%26split%3Dtrain_track%26segment_id%3Dscene-0501%26extra.psegs_flow_sids%3D60014%2C60015%26sel_datums%3Dcamera%7CCAM_FRONT_RIGHT%2C1535478537420482000%2Ccamera%7CCAM_FRONT_RIGHT%2C1535478537870482000',\n",
+    "         'psegs://dataset=psegs_synthflow&extra.pssf.ruri=psegs%3A//dataset%3Dkitti-360%26split%3Dtrain%26segment_id%3D2013_05_28_drive_0004_sync%26extra.psegs_flow_sids%3D10412%2C10413%26sel_datums%3Dcamera%7Cright_rect%2C1369736347374754304%2Ccamera%7Cright_rect%2C1369736347479072256',\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "ALL_FP_FACTORY_CLSS.append(PSegsSynthFlowFactory)\n",
+    "\n",
+    "psegs_synthflow_all_rids = PSegsSynthFlowFactory.list_fp_uris(spark)\n",
+    "print(\"Found %s PSegs SynthFlow scenes\" % len(psegs_synthflow_all_rids))\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    if os.path.exists(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH):\n",
+    "        print(\"Loading demo FlowPairs from %s\" % PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH)\n",
+    "        import pickle\n",
+    "        fps = pickle.load(open(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH, 'rb'))\n",
+    "    else:\n",
+    "        print(\"Building Demo FlowPairs, this might take a while ....\")\n",
+    "        fp_rdd = PSegsSynthFlowFactory.get_fp_rdd_for_uris(spark, PSEGS_SYNTHFLOW_DEMO_FP_URIS)\n",
+    "        fps = fp_rdd.collect()\n",
+    "        if PSEGS_SYNTHFLOW_DEMO_FPS_DO_CACHE:\n",
+    "            print(\"Saving demo FlowPairs to %s ...\" % PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH)\n",
+    "            import pickle\n",
+    "            with open(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH, 'wb') as f:\n",
+    "                pickle.dump(fps, f, protocol=4)\n",
+    "    \n",
+    "    for fp in fps:\n",
+    "        print('fixme html')\n",
+    "        continue\n",
+    "        show_html(fp.to_html())\n",
+    "        DEMO_FPS.append(fp)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "#     import urllib.parse\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "#     for fp in fps:\n",
+    "#         show_html(fp.to_html() + \"<br/><br/><br/>\")\n",
+    "#         DEMO_FPS.append(fp)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# if SHOW_DEMO_OUTPUT:\n",
+    "#     if os.path.exists(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH):\n",
+    "#         print(\"Loading demo FlowPairs from %s\" % PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH)\n",
+    "#         import pickle\n",
+    "#         fps = pickle.load(open(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH, 'rb'))\n",
+    "#     else:\n",
+    "#         print(\"Building Demo FlowPairs, this might take a while ....\")\n",
+    "#         fps = psegs_synthflow_create_fps(spark, PSEGS_SYNTHFLOW_PARQUET_ROOT, PSEGS_SYNTHFLOW_DEMO_RECORD_URIS)\n",
+    "#         if PSEGS_SYNTHFLOW_DEMO_FPS_DO_CACHE:\n",
+    "#             print(\"Saving demo FlowPairs to %s ...\" % PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH)\n",
+    "#             import pickle\n",
+    "#             with open(PSEGS_SYNTHFLOW_DEMO_FPS_CACHE_PATH, 'wb') as f:\n",
+    "#                 pickle.dump(fps, f, protocol=4)\n",
+    "    \n",
+    "#     for fp in fps:\n",
+    "#         show_html(fp.to_html())\n",
+    "#         DEMO_FPS.append(fp)\n",
+    "        \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reconstruction via Optical Flow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "## Reconstruction via Optical Flow\n",
+    "\n",
+    "def zero_flow(flow):\n",
+    "    return (flow[:, :, :2] == np.array([0, 0])).all(axis=-1)\n",
+    "\n",
+    "def warp_flow_backwards(img, flow):\n",
+    "    \"\"\"Given an image, apply the inverse of `flow`\"\"\"\n",
+    "    h, w = flow.shape[:2]\n",
+    "    flow = -flow\n",
+    "    flow[:,:,0] += np.arange(w)\n",
+    "    flow[:,:,1] += np.arange(h)[:,np.newaxis]\n",
+    "    res = cv2.remap(img, flow.astype(np.float32), None, cv2.INTER_LINEAR)\n",
+    "    return res\n",
+    "    \n",
+    "def warp_flow_forwards(img, flow):\n",
+    "    \"\"\"Given an image, apply the given optical flow `flow`.  Returns not only the warped\n",
+    "    image, but a `mask` indicating warped pixels (i.e. there was non-zero flow *into* these pixels ).\n",
+    "    With some help from https://stackoverflow.com/questions/41703210/inverting-a-real-valued-index-grid/46009462#46009462\n",
+    "    \"\"\"\n",
+    "    h, w = img.shape[:2]\n",
+    "    pts = flow.copy()\n",
+    "    pts[:, :, 0] += np.arange(w)\n",
+    "    pts[:, :, 1] += np.arange(h)[:, np.newaxis]\n",
+    "    exclude = zero_flow(flow)\n",
+    "    if exclude.all():\n",
+    "        # No flow anywhere!\n",
+    "        return img.copy(), np.zeros((h, w)).astype(np.bool)\n",
+    "    else:\n",
+    "        inpts = pts[~exclude]\n",
+    "    \n",
+    "    from scipy.interpolate import griddata\n",
+    "    inpts = np.reshape(inpts, [-1, 2])\n",
+    "    grid_y, grid_x = np.mgrid[:h, :w]\n",
+    "    chan_out = []\n",
+    "    for ch in range(img.shape[-1]):\n",
+    "        spts = img[:, :, ch][~exclude].reshape([-1, 1])\n",
+    "        mapped = griddata(inpts, spts, (grid_x, grid_y), method='linear')\n",
+    "        chan_out.append(mapped.astype(img.dtype))\n",
+    "    out = np.stack(chan_out, axis=-1)\n",
+    "    out = out.reshape([h, w, len(chan_out)])\n",
+    "\n",
+    "    mask = np.reshape(inpts, [-1, 2])\n",
+    "    mask = np.rint(mask).astype(np.int)\n",
+    "    mask = mask[np.where((mask[:, 0] >= 0) & (mask[:, 0] < w) & (mask[:, 1] >= 0) & (mask[:, 1] < h))]\n",
+    "    valid_mask = np.zeros((h, w))\n",
+    "    valid_mask[mask[:, 1], mask[:, 0]] = 1\n",
+    "    \n",
+    "    return out, valid_mask.astype(np.bool)\n",
+    "\n",
+    "# @attr.s(slots=True, eq=False, weakref_slot=False)\n",
+    "class FlowReconstructedImagePair(object):\n",
+    "    \"\"\"A pair of reconstructed images using an input pair of images and optical\n",
+    "    flow field (i.e. an `OpticalFlowPair` instance).\"\"\"\n",
+    "\n",
+    "    slots = (\n",
+    "        'opair',\n",
+    "        'img2_recon_fwd',\n",
+    "        'img2_recon_fwd_valid',\n",
+    "        'img1_recon_bkd',\n",
+    "        'img1_recon_bkd_valid'\n",
+    "    )\n",
+    "    \n",
+    "    def __init__(self, **kwargs):\n",
+    "        for k in self.slots:\n",
+    "            setattr(self, k, kwargs.get(k))\n",
+    "    \n",
+    "#     opair = attr.ib(default=OpticalFlowPair())\n",
+    "#     \"\"\"The original `OpticalFlowPair` with the source of the data for this reconstruction result.\"\"\"\n",
+    "    \n",
+    "#     img2_recon_fwd = attr.ib(default=np.array([]))\n",
+    "#     \"\"\"A Numpy image containing the result of FORWARDS-WARPING OpticalFlowPair::img1\n",
+    "#     via OpticalFlowPair::flow to reconstruct OpticalFlowPair::img2\"\"\"\n",
+    "\n",
+    "#     img2_recon_fwd_valid = attr.ib(default=np.array([]))\n",
+    "#     \"\"\"A Numpy boolean mask indicating which pixels of `img2_recon_fwd` were modified via non-zero flow\"\"\"\n",
+    "    \n",
+    "#     img1_recon_bkd = attr.ib(default=np.array([]))\n",
+    "#     \"\"\"A Numpy image containing the result of BACKWARDS-WARPING OpticalFlowPair::img2\n",
+    "#     via OpticalFlowPair::flow to reconstruct OpticalFlowPair::img1\"\"\"\n",
+    "\n",
+    "#     img1_recon_bkd_valid = attr.ib(default=np.array([]))\n",
+    "#     \"\"\"A Numpy boolean mask indicating which pixels of `img1_recon_bkd` were modified via non-zero flow\"\"\"\n",
+    "        \n",
+    "    @classmethod\n",
+    "    def create_from(cls, oflow_pair: OpticalFlowPair):\n",
+    "        flow = oflow_pair.get_flow()\n",
+    "        \n",
+    "        # Forward Warp\n",
+    "        fwarped, fvalid = warp_flow_forwards(oflow_pair.get_img1(), flow)\n",
+    "\n",
+    "        # Backwards Warp\n",
+    "        exclude = zero_flow(flow)\n",
+    "        bwarped = warp_flow_backwards(oflow_pair.get_img2(), -flow[:, :, :2])\n",
+    "        bvalid = ~exclude\n",
+    "        \n",
+    "        return FlowReconstructedImagePair(\n",
+    "                opair=oflow_pair,\n",
+    "                img2_recon_fwd=fwarped,\n",
+    "                img2_recon_fwd_valid=fvalid,\n",
+    "                img1_recon_bkd=bwarped,\n",
+    "                img1_recon_bkd_valid=bvalid)\n",
+    "    \n",
+    "    def to_html(self):\n",
+    "        # We use pixels from the destination image in order to make the reconstruction \n",
+    "        # easier to interpret; we'll fade them in intensity so that they are more\n",
+    "        # conspicuous.        \n",
+    "        FADE_UNTOUCHED_PIXELS = 0.3\n",
+    "        \n",
+    "        viz_fwd = self.img2_recon_fwd.copy().astype(np.float32)\n",
+    "        im2 = self.opair.get_img2()\n",
+    "        if (~self.img2_recon_fwd_valid).any():\n",
+    "            viz_fwd[~self.img2_recon_fwd_valid] = im2[~self.img2_recon_fwd_valid]\n",
+    "            viz_fwd[~self.img2_recon_fwd_valid] *= FADE_UNTOUCHED_PIXELS\n",
+    "        else:\n",
+    "            # viz_fwd = im2.copy() * FADE_UNTOUCHED_PIXELS\n",
+    "            print('no invalids forward!')\n",
+    "        \n",
+    "        viz_bkd = self.img1_recon_bkd.copy().astype(np.float32)\n",
+    "        im1 = self.opair.get_img1()\n",
+    "        if (~self.img1_recon_bkd_valid).any():\n",
+    "            viz_bkd[~self.img1_recon_bkd_valid] = im1[~self.img1_recon_bkd_valid]\n",
+    "            viz_bkd[~self.img1_recon_bkd_valid] *= FADE_UNTOUCHED_PIXELS\n",
+    "        else:\n",
+    "            # viz_bkd = im1.copy() * FADE_UNTOUCHED_PIXELS\n",
+    "            print('no invalids backwards!')\n",
+    "        \n",
+    "        html = \"\"\"\n",
+    "            <table>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Forwards Warped <i>(dark pixels unwarped)</i></b></td></tr>\n",
+    "            <tr><td><img src=\"{viz_fwd}\" width=\"100%\" /></td></tr>\n",
+    "\n",
+    "            <tr><td style=\"text-align:left\"><b>Backwards Warped <i>(dark pixels unwarped)</i></b></td></tr>\n",
+    "            <tr><td><img src=\"{viz_bkd}\" width=\"100%\" /></td></tr>\n",
+    "\n",
+    "            </table>\n",
+    "        \"\"\".format(\n",
+    "                viz_fwd=img_to_data_uri(viz_fwd.astype(np.uint8)),\n",
+    "                viz_bkd=img_to_data_uri(viz_bkd.astype(np.uint8)))\n",
+    "        return html\n",
+    "\n",
+    "        \n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    DEMO_RECONS = []\n",
+    "    for p in DEMO_FPS:\n",
+    "        print('fixme html')\n",
+    "        continue\n",
+    "        recon = FlowReconstructedImagePair.create_from(p)\n",
+    "#         show_html(recon.to_html() + \"</br></br></br>\")\n",
+    "        DEMO_RECONS.append(recon)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analysis: Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Analysis Utils\n",
+    "\n",
+    "def mse(i1, i2, valid):\n",
+    "    return np.mean((i1[valid] - i2[valid]) ** 2)\n",
+    "\n",
+    "def rmse(i1, i2, valid):\n",
+    "    return math.sqrt(mse(i1, i2, valid))\n",
+    "\n",
+    "def psnr(i1, i2, valid):\n",
+    "    return 20 * math.log10(255) - 10 * math.log10(max((mse(i1, i2, valid), 1e-12)))\n",
+    "\n",
+    "def ssim(i1, i2, valid):\n",
+    "    # Some variance out there ...\n",
+    "    # https://github.com/scikit-image/scikit-image/blob/master/skimage/metrics/_structural_similarity.py#L12-L232\n",
+    "    # https://github.com/nianticlabs/monodepth2/blob/13200ab2f29f2f10dec3aa5db29c32a23e29d376/layers.py#L218\n",
+    "    # https://cvnote.ddlee.cn/2019/09/12/psnr-ssim-python\n",
+    "    # We will just use SKImage for now ...\n",
+    "    from skimage.metrics import structural_similarity as ssim\n",
+    "    mssim, S = ssim(i1, i2, win_size=11, multichannel=True, full=True)\n",
+    "    return np.mean(S[valid])\n",
+    "\n",
+    "def to_edge_im(img):\n",
+    "    return np.stack([\n",
+    "        cv2.Laplacian(cv2.cvtColor(img, cv2.COLOR_RGB2GRAY), cv2.CV_32F, ksize=1),\n",
+    "        cv2.Sobel(cv2.cvtColor(img, cv2.COLOR_RGB2GRAY), cv2.CV_32F, 1, 0, ksize=3),\n",
+    "        cv2.Sobel(cv2.cvtColor(img, cv2.COLOR_RGB2GRAY), cv2.CV_32F, 0, 1, ksize=3),\n",
+    "    ], axis=-1)\n",
+    "\n",
+    "def edges_mse(i1, i2, valid):\n",
+    "    return mse(to_edge_im(i1), to_edge_im(i2), valid)\n",
+    "\n",
+    "\n",
+    "def oflow_coverage(valid):\n",
+    "    return valid.sum() / (valid.shape[0] * valid.shape[1])\n",
+    "\n",
+    "def oflow_magnitude_hist(flow, valid, bins=50):\n",
+    "    flow_l2s = np.sqrt( flow[valid][:, 0] ** 2 + flow[valid][:, 1] ** 2 )\n",
+    "    bin_counts, bin_edges = np.histogram(flow_l2s, bins=bins)\n",
+    "    return bin_edges, bin_counts\n",
+    "\n",
+    "\n",
+    "# Analysis Data Model\n",
+    "\n",
+    "class OFlowReconErrors(object):\n",
+    "    \"\"\"Various measures of reconstruction error for a `FlowReconstructedImagePair` instance.\n",
+    "    Encapsulated as two dictionaries of stats for easy interop with Spark SQL.\"\"\"\n",
+    "\n",
+    "    RECONSTRUCTION_ERR_METRICS = {\n",
+    "        'SSIM': ssim,\n",
+    "        'MSE': mse,\n",
+    "        'RMSE': rmse,\n",
+    "        'PSNR': psnr,\n",
+    "        'Edges_MSE': edges_mse,\n",
+    "    }\n",
+    "    \n",
+    "    def __init__(self, recon_pair: FlowReconstructedImagePair):\n",
+    "        im2 = recon_pair.opair.get_img2()\n",
+    "        img2_recon_fwd = recon_pair.img2_recon_fwd\n",
+    "        img2_recon_fwd_valid = recon_pair.img2_recon_fwd_valid\n",
+    "        self.forward_stats = dict(\n",
+    "            (name, func(im2, img2_recon_fwd, img2_recon_fwd_valid))\n",
+    "            for name, func in self.RECONSTRUCTION_ERR_METRICS.items())\n",
+    "        \n",
+    "        im1 = recon_pair.opair.get_img1()\n",
+    "        img1_recon_fwd = recon_pair.img1_recon_bkd\n",
+    "        img1_recon_fwd_valid = recon_pair.img1_recon_bkd_valid\n",
+    "        self.backward_stats = dict(\n",
+    "            (name, func(im1, img1_recon_fwd, img1_recon_fwd_valid))\n",
+    "            for name, func in self.RECONSTRUCTION_ERR_METRICS.items())\n",
+    "\n",
+    "    def to_html(self):\n",
+    "        stat_names = self.RECONSTRUCTION_ERR_METRICS.keys()\n",
+    "\n",
+    "        rows = [\n",
+    "            \"\"\"\n",
+    "            <tr>\n",
+    "              <td style=\"text-align:left\"><b>{name}</b></td>\n",
+    "              <td style=\"text-align:left\">{fwd:.2f}</td>\n",
+    "              <td style=\"text-align:left\">{bkd:.2f}</td>\n",
+    "            </tr>\n",
+    "            \"\"\".format(name=name, fwd=self.forward_stats[name], bkd=self.backward_stats[name])\n",
+    "            for name in stat_names\n",
+    "        ]\n",
+    "        \n",
+    "        \n",
+    "        html = \"\"\"\n",
+    "            <table>\n",
+    "              <tr>\n",
+    "                  <th></th> <th><b>Forwards Warp</b></th> <th><b>Backwards Warp</b></th>\n",
+    "              </tr>\n",
+    "\n",
+    "              {table_rows}\n",
+    "\n",
+    "            </table>\n",
+    "        \"\"\".format(table_rows=\"\".join(rows))\n",
+    "        \n",
+    "        return html\n",
+    "            \n",
+    "# @attr.s(slots=True, eq=False, weakref_slot=False)\n",
+    "class OFlowStats(object):\n",
+    "    \"\"\"Stats on the optical flow of a `OpticalFlowPair` instance\"\"\"\n",
+    "\n",
+    "    slots = (\n",
+    "        'opair',\n",
+    "        'coverage',\n",
+    "        'magnitude_hist',\n",
+    "    )\n",
+    "    \n",
+    "    def __init__(self, **kwargs):\n",
+    "        for k in self.slots:\n",
+    "            setattr(self, k, kwargs.get(k))\n",
+    "    \n",
+    "#     opair = attr.ib(default=OpticalFlowPair())\n",
+    "#     \"\"\"The original `OpticalFlowPair` with the source of the data for this reconstruction result.\"\"\"\n",
+    "    \n",
+    "#     coverage = attr.ib(default=0)\n",
+    "#     \"\"\"Fraction of the image with valid flow\"\"\"\n",
+    "    \n",
+    "#     magnitude_hist = attr.ib(default=[np.array([]), np.array([])])\n",
+    "#     \"\"\"Histogram [bin edges, bin counts] of flow magnitudes\"\"\"\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def create_from(cls, oflow_pair: OpticalFlowPair):\n",
+    "        flow = oflow_pair.get_flow()\n",
+    "        valid = ~zero_flow(flow)\n",
+    "        return OFlowStats(\n",
+    "                 opair=oflow_pair,\n",
+    "                 coverage=oflow_coverage(valid),\n",
+    "                 magnitude_hist=oflow_magnitude_hist(flow, valid))\n",
+    "                 \n",
+    "    def to_html(self):\n",
+    "        import matplotlib.pyplot as plt\n",
+    "        fig = plt.figure()\n",
+    "        bin_edges, bin_counts = self.magnitude_hist\n",
+    "        plt.bar(bin_edges[:-1], bin_counts)\n",
+    "        plt.title(\"Histogram of Flow Magnitudes\")\n",
+    "        plt.xlabel('Flow Magnitude (pixels)')\n",
+    "        plt.ylabel('Count')\n",
+    "\n",
+    "        hist_img = matplotlib_fig_to_img(fig)\n",
+    "        \n",
+    "        html = \"\"\"\n",
+    "            <table>           \n",
+    "            <tr><td style=\"text-align:left\"><b>Flow Coverage:</b> {coverage:.2f}% </td></tr>\n",
+    "            <tr><td><img src=\"{flow_hist}\" width=\"100%\" /></td></tr>\n",
+    "            </table>\n",
+    "        \"\"\".format(\n",
+    "                coverage=100. * self.coverage,\n",
+    "                flow_hist=img_to_data_uri(matplotlib_fig_to_img(hist_img)))\n",
+    "        return html\n",
+    "\n",
+    "\n",
+    "# Misc\n",
+    "\n",
+    "def matplotlib_fig_to_img(fig):\n",
+    "    import io\n",
+    "    import matplotlib.pyplot as plt\n",
+    "    from PIL import Image\n",
+    "    buf = io.BytesIO()\n",
+    "    plt.savefig(buf, format='png')\n",
+    "    buf.seek(0)\n",
+    "    im = Image.open(buf)\n",
+    "    im.show()\n",
+    "    buf.seek(0)\n",
+    "\n",
+    "    import imageio\n",
+    "    hist_img = imageio.imread(buf)\n",
+    "    buf.close()\n",
+    "    return hist_img\n",
+    "\n",
+    "\n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    %matplotlib agg\n",
+    "    for recon in DEMO_RECONS:\n",
+    "        p = recon.opair\n",
+    "        errors = OFlowReconErrors(recon)\n",
+    "        err_html = errors.to_html()  \n",
+    "            \n",
+    "        fstats = OFlowStats.create_from(p)\n",
+    "        stats_html = fstats.to_html()\n",
+    "            \n",
+    "        title = \"<b>{dataset} {id1} -> {id2}</b>\".format(dataset=p.dataset, id1=p.id1, id2=p.id2)\n",
+    "        \n",
+    "        show_html(title + stats_html + err_html + \"</br></br></br>\")\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scene Flow Analysis (where depth and intrinsics are available)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    " * for psegs, we have uvd and K\n",
+    " * for kitti tracking, we'll be able to have uvd and K\n",
+    " * for deepdeform, the intrinsics are in each seq.  also a mask for maybe the images of interest?\n",
+    " * for kitti sf, we can get K (P) from kitti-like file.  !!! kitti has obj_map colors image!!  \n",
+    "     need to figure out depth meters from disparity ...  derrrp to get the raw velodynes we have to cross-ref\n",
+    "     with odometry dataset. let's talk to yiyi about that...\n",
+    " * !!! do a test where you use nearest neighbor correspondence on raw clouds for OFlow. then can see how bad\n",
+    "     the pairing is sometimes\n",
+    " \n",
+    " * metrics: end-pt-error for NN forward; same for backward; then also do a chamfer distance metric\n",
+    " * (do all this again but first do an ICP on the raw depths-- the rigid background should probably align, right?\n",
+    "     use the ICP's RT to pose raw and \n",
+    " * a common class for all these is background / foreground.  want to break down chamfer dist etc bucket by at least\n",
+    "      background / foreground\n",
+    " * debug image: surface pairs of points with end pt error larger than E and plot on the image\n",
+    " \n",
+    " * another good test: (1) train self-sup SF on raw clouds.  then test on large displacement pair \n",
+    "     (walk a prediction forward many time steps). then can see how well that holds up vs our \"GT\"\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "\n",
+    "# def nn_distance(xyz_src, xyz_target):\n",
+    "#     import numpy as np\n",
+    "#     import open3d as o3d\n",
+    "#     pcds = o3d.geometry.PointCloud()\n",
+    "#     pcds.points = o3d.utility.Vector3dVector(xyz_src)\n",
+    "#     pcdt = o3d.geometry.PointCloud()\n",
+    "#     pcdt.points = o3d.utility.Vector3dVector(xyz_target)\n",
+    "#     dists = pcds.compute_point_cloud_distance(pcdt)\n",
+    "#     dists = np.asarray(dists)\n",
+    "#     return dists\n",
+    "\n",
+    "def get_icp_results(xyz_src, xyz_target):\n",
+    "    if xyz_target.shape[0] == 0 or xyz_src.shape[0] == 0:\n",
+    "        return 0.0, -1.\n",
+    "    \n",
+    "    import numpy as np\n",
+    "    import open3d as o3d\n",
+    "    pcds = o3d.geometry.PointCloud()\n",
+    "    pcds.points = o3d.utility.Vector3dVector(xyz_src)\n",
+    "    pcdt = o3d.geometry.PointCloud()\n",
+    "    pcdt.points = o3d.utility.Vector3dVector(xyz_target)\n",
+    "    \n",
+    "    threshold = 0.01\n",
+    "    trans_init = np.eye(4, 4)\n",
+    "    reg = o3d.pipelines.registration.registration_icp(\n",
+    "                    pcds, pcdt, threshold, trans_init,\n",
+    "                    o3d.pipelines.registration.TransformationEstimationPointToPoint(),\n",
+    "                    o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=100))\n",
+    "    return reg.fitness, reg.inlier_rmse\n",
+    "\n",
+    "def get_nearest_neighbors(xyz_src, xyz_target):\n",
+    "    if xyz_target.shape[0] == 0 or xyz_src.shape[0] == 0:\n",
+    "        return np.zeros((0, 3), dtype=np.float32)\n",
+    "    \n",
+    "    # We need to use scipy KDTree because open3d doesn't provide an API \n",
+    "    # for efficiently querying for more than one point\n",
+    "    \n",
+    "    import open3d as o3d\n",
+    "    pcdt = o3d.geometry.PointCloud()\n",
+    "    pcdt.points = o3d.utility.Vector3dVector(xyz_target)\n",
+    "    pcd_tree = o3d.geometry.KDTreeFlann(pcdt)\n",
+    "    \n",
+    "    pcds = o3d.geometry.PointCloud()\n",
+    "    pcds.points = o3d.utility.Vector3dVector(xyz_src)\n",
+    "    \n",
+    "    found = np.zeros(xyz_src.shape[0], dtype=np.int64)\n",
+    "    print(xyz_src.shape[0])\n",
+    "    for i in range(xyz_src.shape[0]):\n",
+    "        k, idx, dist = pcd_tree.search_hybrid_vector_3d(pcds.points[i], float('inf'), 1)\n",
+    "        found[i] = idx[0]\n",
+    "    return xyz_target[found]\n",
+    "    \n",
+    "#     from scipy.spatial import KDTree\n",
+    "#     print('fixme try open3d...')\n",
+    "#     print('tree size', xyz_target.shape[0])\n",
+    "#     tree = KDTree(xyz_target)\n",
+    "#     print('query size', xyz_src.shape[0])\n",
+    "#     dists, idx = tree.query(xyz_src)\n",
+    "#     return xyz_target[idx]\n",
+    "\n",
+    "\n",
+    "class SFlowStats(object):\n",
+    "    \"\"\"Stats on the scene flow of a `OpticalFlowPair` instance (that has scene flow data)\"\"\"\n",
+    "\n",
+    "    slots = (\n",
+    "        'sf_norm_hist',\n",
+    "           # Histogram [bin edges, bin counts] of scene flow vector L2 norms\n",
+    "        'sf_norm_var',\n",
+    "           # Variance of Scene Flow displacements\n",
+    "        \n",
+    "        'fwd_nn_xyz',\n",
+    "        'fwd_nn_hist',\n",
+    "\n",
+    "        'fwd_nn_dist_to_sf_dist',\n",
+    "        \n",
+    "        'fwd_nnepe_mean',\n",
+    "        'fwd_nnepe_sum',\n",
+    "        'fwd_nnepe_50th',\n",
+    "        'fwd_nnepe_75th',\n",
+    "        'fwd_nnepe_95th',\n",
+    "        \n",
+    "        'fwd_nnepe_hist',\n",
+    "            # Histogram [bin edges, bin counts] of forward nearest-neighbor end-point-errors\n",
+    "        \n",
+    "        'bkd_nnepe_mean',\n",
+    "        'bkd_nnepe_sum',\n",
+    "#         'icp_fwd_nn_end_point_error',\n",
+    "#         'icp_bkd_nn_end_point_error',\n",
+    "#         'icp_chamfer_distance',\n",
+    "        \n",
+    "#         'chamfer_distance',\n",
+    "#         'icp_chamfer_distance',\n",
+    "        \n",
+    "\n",
+    "        \n",
+    "        'icp_fitness',\n",
+    "        'icp_inlier_rmse',\n",
+    "        \n",
+    "        'opair',\n",
+    "    )\n",
+    "    \n",
+    "    def get_rowdata(self):\n",
+    "        KEYS = (\n",
+    "            'sf_norm_var',\n",
+    "            'fwd_nn_dist_to_sf_dist',\n",
+    "        \n",
+    "            'fwd_nnepe_mean',\n",
+    "            'fwd_nnepe_50th',\n",
+    "            'fwd_nnepe_75th',\n",
+    "            'fwd_nnepe_95th',\n",
+    "            \n",
+    "            'bkd_nnepe_mean',\n",
+    "        \n",
+    "            'icp_fitness',\n",
+    "            'icp_inlier_rmse',\n",
+    "        )\n",
+    "        rowdata = dict(\n",
+    "            (k, getattr(self, k, None) or float('nan'))\n",
+    "            for k in KEYS)\n",
+    "        return rowdata\n",
+    "    \n",
+    "    def __init__(self, **kwargs):\n",
+    "        for k in self.slots:\n",
+    "            setattr(self, k, kwargs.get(k))\n",
+    "    \n",
+    "#     opair = attr.ib(default=OpticalFlowPair())\n",
+    "#     \"\"\"The original `OpticalFlowPair` with the source of the data for this reconstruction result.\"\"\"\n",
+    "    \n",
+    "#     coverage = attr.ib(default=0)\n",
+    "#     \"\"\"Fraction of the image with valid flow\"\"\"\n",
+    "    \n",
+    "#     fwd_nnepes = attr.ib(default=[np.array([]), np.array([])])\n",
+    "#     \"\"\"\"\"\"\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def create_from(cls, oflow_pair: OpticalFlowPair):\n",
+    "        import numpy as np\n",
+    "        fp = oflow_pair\n",
+    "        \n",
+    "        import time\n",
+    "        start = time.time()\n",
+    "        print('start', fp.uri)\n",
+    "\n",
+    "        uvd1 = fp.uvdviz_im1[:, :3]\n",
+    "        uvd2 = fp.uvdviz_im2[:, :3]\n",
+    "        print('uvd1 shape', uvd1.shape[0])\n",
+    "        print('uvd2 shape', uvd2.shape[0])\n",
+    "#         visible_either = ((uvd1[:, -1] == 1) | (uvd2[:, -1] == 1))\n",
+    "        xyz1 = uvd_to_xyzrgb(uvd1, fp.K)[:, :3]\n",
+    "        xyz2 = uvd_to_xyzrgb(uvd2, fp.K)[:, :3]\n",
+    "        fwd_nn = get_nearest_neighbors(xyz1, xyz2)\n",
+    "        print('got fwd nn', fwd_nn.shape)\n",
+    "        fwd_nn_dist = np.linalg.norm(fwd_nn - xyz1, axis=1)\n",
+    "        fwd_sf_dist = np.linalg.norm(xyz2 - xyz1, axis=1)\n",
+    "        fwd_nn_dist_to_sf_dist = fwd_nn_dist.sum() / fwd_sf_dist.sum()\n",
+    "        fwd_nn_end_point_error = np.linalg.norm(fwd_nn - xyz2, axis=1)\n",
+    "        \n",
+    "        bkd_nn = get_nearest_neighbors(xyz2, xyz1)\n",
+    "        bkd_nn_end_point_error = np.linalg.norm(bkd_nn - xyz1, axis=1)\n",
+    "        \n",
+    "        fwd_nnepes = sorted(fwd_nn_end_point_error.tolist())\n",
+    "        def percentile(slst, p):\n",
+    "            idx = int(p * len(slst))\n",
+    "            return slst[idx]\n",
+    "        \n",
+    "        bin_counts, bin_edges = np.histogram(fwd_nn_end_point_error, bins=1000)\n",
+    "        fwd_nnepe_hist = bin_edges, bin_counts\n",
+    "        \n",
+    "        bin_counts, bin_edges = np.histogram(fwd_nn_dist, bins=1000)\n",
+    "        fwd_nn_hist = bin_edges, bin_counts\n",
+    "        \n",
+    "        bin_counts, bin_edges = np.histogram(fwd_sf_dist, bins=1000)\n",
+    "        sf_norm_hist = bin_edges, bin_counts\n",
+    "        sf_norm_var = np.var(fwd_sf_dist)\n",
+    "        \n",
+    "        icp_fitness, icp_inlier_rmse = get_icp_results(xyz1, xyz2)\n",
+    "        \n",
+    "        print('end', fp.uri, time.time() - start)\n",
+    "        return SFlowStats(\n",
+    "                 opair=oflow_pair,\n",
+    "                 sf_norm_hist=sf_norm_hist,\n",
+    "                 sf_norm_var=sf_norm_var,\n",
+    "\n",
+    "                 fwd_nn_xyz=fwd_nn,\n",
+    "                 fwd_nn_hist=fwd_nn_hist,\n",
+    "                 \n",
+    "                 fwd_nn_dist_to_sf_dist=fwd_nn_dist_to_sf_dist,\n",
+    "                 \n",
+    "                 fwd_nnepe_mean=np.mean(fwd_nn_end_point_error),\n",
+    "                 fwd_nnepe_sum=np.sum(fwd_nn_end_point_error),\n",
+    "                 fwd_nnepe_50th=percentile(fwd_nnepes, 0.5),\n",
+    "                 fwd_nnepe_75th=percentile(fwd_nnepes, 0.75),\n",
+    "                 fwd_nnepe_95th=percentile(fwd_nnepes, 0.95),\n",
+    "                 \n",
+    "                 bkd_nnepe_mean=np.mean(bkd_nn_end_point_error),\n",
+    "                 bkd_nnepe_sum=np.sum(bkd_nn_end_point_error),\n",
+    "            \n",
+    "                 fwd_nnepe_hist=fwd_nnepe_hist,\n",
+    "            \n",
+    "                 icp_fitness=icp_fitness,\n",
+    "                 icp_inlier_rmse=icp_inlier_rmse)\n",
+    "                 \n",
+    "    def to_html(self):\n",
+    "        import numpy as np\n",
+    "        \n",
+    "        fwd_nn_xyz = self.fwd_nn_xyz\n",
+    "        fp = self.opair\n",
+    "        uvd2 = fp.uvdviz_im2[:, :3]\n",
+    "        xyzrgb2 = uvd_to_xyzrgb(uvd2, fp.K, imgs=[fp.get_img2()])\n",
+    "        fwd_nn_xyzrgb = np.ones((fwd_nn_xyz.shape[0] + xyzrgb2.shape[0], 3 + 3)) * 110.\n",
+    "        fwd_nn_xyzrgb[:fwd_nn_xyz.shape[0], :3] = fwd_nn_xyz[:, :3]\n",
+    "        fwd_nn_xyzrgb[fwd_nn_xyz.shape[0]:, :6] = xyzrgb2[:, :6]\n",
+    "        \n",
+    "        fwd_nn_html = create_xyzrgb_3d_plot_html(fwd_nn_xyzrgb)\n",
+    "        \n",
+    "        \n",
+    "        import matplotlib.pyplot as plt\n",
+    "        fig = plt.figure()\n",
+    "        bin_edges, bin_counts = self.sf_norm_hist\n",
+    "        \n",
+    "        plt.bar(bin_edges[:-1], bin_counts)\n",
+    "        plt.xlim(left=0)\n",
+    "        plt.title(\"Histogram of Scene Flow Vector L2 Norms\")\n",
+    "        plt.xlabel('L2 Norm (meters)')\n",
+    "        plt.ylabel('Count')\n",
+    "\n",
+    "        sf_norm_hist_img = matplotlib_fig_to_img(fig)\n",
+    "        sf_norm_hist_html = img_to_data_uri(matplotlib_fig_to_img(sf_norm_hist_img))\n",
+    "                \n",
+    "        \n",
+    "        fig = plt.figure()\n",
+    "        bin_edges, bin_counts = self.fwd_nn_hist\n",
+    "        \n",
+    "        plt.bar(bin_edges[:-1], bin_counts)\n",
+    "        plt.xlim(left=0)\n",
+    "        plt.title(\"Histogram of Nearest-Neighbor Distances (L2 Norms)\")\n",
+    "        plt.xlabel('Distance (meters)')\n",
+    "        plt.ylabel('Count')\n",
+    "\n",
+    "        fwd_nn_hist_img = matplotlib_fig_to_img(fig)\n",
+    "        fwd_nn_hist_html = img_to_data_uri(matplotlib_fig_to_img(fwd_nn_hist_img))\n",
+    "        \n",
+    "        \n",
+    "        fig = plt.figure()\n",
+    "        bin_edges, bin_counts = self.fwd_nnepe_hist\n",
+    "        \n",
+    "        plt.bar(bin_edges[:-1], bin_counts)\n",
+    "        plt.xlim(left=0)\n",
+    "        plt.title(\"Histogram of Forward Nearest-Neighbor End-Point-Errors\")\n",
+    "        plt.xlabel('End Point Error (meters)')\n",
+    "        plt.ylabel('Count')\n",
+    "\n",
+    "        fepe_hist_img = matplotlib_fig_to_img(fig)\n",
+    "        fepe_hist_html = img_to_data_uri(matplotlib_fig_to_img(fepe_hist_img))\n",
+    "        \n",
+    "        html = \"\"\"\n",
+    "            <table>\n",
+    "            <tr><td><img src=\"{sf_norm_hist}\" width=\"100%\" /></td></tr>\n",
+    "            \n",
+    "            <tr>\n",
+    "              <td style=\"text-align:left\"><b>Variance of Scene Flow Displacements:</b> {sf_norm_var}</td>\n",
+    "            </tr>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Forwards Nearest Neighbor End Point Error</b></td></tr>\n",
+    "            <tr>\n",
+    "                <td style=\"text-align:left\">\n",
+    "                Ratio of total nearest-neighbor distance to total scene flow displacement:\n",
+    "                  {fwd_nn_dist_to_sf_dist}\n",
+    "                </td>\n",
+    "            </tr>\n",
+    "            <tr><td>\n",
+    "                <img src=\"{fwd_nn_hist_html}\" width=\"100%\" /><br />\n",
+    "                (Compare with Scene Flow Displacement histogram above)</td>\n",
+    "            </tr>\n",
+    "            <tr><td><img src=\"{fepe_hist}\" width=\"100%\" /></td></tr>\n",
+    "            <tr>\n",
+    "              <td style=\"text-align:left\">\n",
+    "                Mean: {fwd_nnepe_mean}<br/>\n",
+    "                Sum: {fwd_nnepe_sum}<br/>\n",
+    "                75th percentile: {fwd_nnepe_50th}<br/>\n",
+    "                50th percentile: {fwd_nnepe_75th}<br/>\n",
+    "                95th percentile: {fwd_nnepe_95th}<br/>\n",
+    "              </td>\n",
+    "            <tr>\n",
+    "            \n",
+    "            <tr>\n",
+    "                <td style=\"text-align:left\">\n",
+    "                  Nearest Neighbor Cloud (Grey) vs Target Cloud (Colors)<br /> {fwd_nn_html}\n",
+    "                </td>\n",
+    "            </tr> \n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>Backwards Nearest Neighbor End Point Error</b></td></tr>\n",
+    "            <tr>\n",
+    "              <td style=\"text-align:left\">\n",
+    "                Mean: {bkd_nnepe_mean}<br/>\n",
+    "                Sum: {bkd_nnepe_sum}<br/>\n",
+    "              </td>\n",
+    "            <tr>\n",
+    "            \n",
+    "            <tr><td style=\"text-align:left\"><b>ICP (point-to-point) results:</b></td></tr>\n",
+    "            <tr>\n",
+    "              <td style=\"text-align:left\">\n",
+    "                Fitness: {icp_fitness}<br/>\n",
+    "                Inlier RMSE: {icp_inlier_rmse}<br/>\n",
+    "              </td>\n",
+    "            <tr>\n",
+    "            \n",
+    "            </table>\n",
+    "        \"\"\".format(\n",
+    "                sf_norm_hist=sf_norm_hist_html,\n",
+    "                sf_norm_var=self.sf_norm_var,\n",
+    "\n",
+    "                fwd_nn_hist_html=fwd_nn_hist_html,\n",
+    "                fwd_nn_dist_to_sf_dist=self.fwd_nn_dist_to_sf_dist,\n",
+    "                fepe_hist=fepe_hist_html,\n",
+    "                fwd_nnepe_mean=self.fwd_nnepe_mean, fwd_nnepe_sum=self.fwd_nnepe_sum,\n",
+    "                fwd_nnepe_50th=self.fwd_nnepe_50th, fwd_nnepe_75th=self.fwd_nnepe_75th,\n",
+    "                fwd_nnepe_95th=self.fwd_nnepe_95th,\n",
+    "                fwd_nn_html=fwd_nn_html,\n",
+    "                \n",
+    "                bkd_nnepe_mean=self.bkd_nnepe_mean,\n",
+    "                bkd_nnepe_sum=self.bkd_nnepe_sum,\n",
+    "        \n",
+    "                icp_fitness=self.icp_fitness,\n",
+    "                icp_inlier_rmse=self.icp_inlier_rmse)\n",
+    "        return html\n",
+    "    \n",
+    "if SHOW_DEMO_OUTPUT:\n",
+    "    %matplotlib agg\n",
+    "    for fp in DEMO_FPS:\n",
+    "        print('fixme html')\n",
+    "        continue\n",
+    "        sfstats = SFlowStats.create_from(fp)\n",
+    "        show_html(fp.to_html() + \"<br />\" + sfstats.to_html() + \"</br></br></br>\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analysis on Full Datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "analysis_uris_full 200\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/pyspark-3.0.1-py3.8.egg/pyspark/sql/session.py:401: UserWarning: Using RDD of dict to inferSchema is deprecated. Use pyspark.sql.Row instead\n",
+      "  warnings.warn(\"Using RDD of dict to inferSchema is deprecated. \"\n",
+      "2021-04-28 15:45:43,788\toarph 723307 : Progress for \n",
+      "run_analysis [Pid:723307 Id:140496910224640]\n",
+      "-----------------------  ---------------------------\n",
+      "Thruput\n",
+      "N thru                   200 (of 200)\n",
+      "N chunks                 1\n",
+      "Total time               6 minutes and 12.74 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       1\n",
+      "Progress\n",
+      "Percent Complete         100.000000\n",
+      "Est. Time To Completion  0 seconds\n",
+      "-----------------------  ---------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "# import sys\n",
+    "# sys.path.append('/opt/psegs')\n",
+    "\n",
+    "# from oarphpy.spark import NBSpark\n",
+    "# NBSpark.SRC_ROOT = os.path.join(ALIB_SRC_DIR, 'cheap_optical_flow_eval_analysis')\n",
+    "# NBSpark.CONF_KV.update({\n",
+    "#     'spark.driver.maxResultSize': '2g',\n",
+    "#     'spark.driver.memory': '16g',\n",
+    "#   })\n",
+    "# spark = NBSpark.getOrCreate()\n",
+    "\n",
+    "\n",
+    "from oarphpy.spark import RowAdapter\n",
+    "\n",
+    "from pyspark import Row\n",
+    "\n",
+    "\n",
+    "def flow_pair_to_full_row(fp):\n",
+    "    from threadpoolctl import threadpool_limits\n",
+    "    with threadpool_limits(limits=1, user_api='blas'):\n",
+    "        recon = FlowReconstructedImagePair.create_from(fp)\n",
+    "        fstats = OFlowStats.create_from(fp)\n",
+    "        errors = OFlowReconErrors(recon)\n",
+    "\n",
+    "#         assert False, 'fixme look for nulls in diff_time_sec' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+    "        rowdata = dict(\n",
+    "                fp_dataset=fp.dataset,\n",
+    "                fp_id1=fp.id1,\n",
+    "                fp_id2=fp.id2,\n",
+    "                fp_uri=str(fp.uri),\n",
+    "                flow_coverage=fstats.coverage,\n",
+    "                diff_time_sec=fp.diff_time_sec,\n",
+    "                translation_meters=fp.translation_meters,\n",
+    "        )\n",
+    "        rowdata.update(\n",
+    "            ('Forwards_' + k, float(v))\n",
+    "            for k, v in errors.forward_stats.items())\n",
+    "        rowdata.update(\n",
+    "            ('Backwards_' + k, float(v))\n",
+    "            for k, v in errors.backward_stats.items())\n",
+    "        \n",
+    "        if fp.has_scene_flow():\n",
+    "            with threadpool_limits(limits=1, user_api='openmp'):\n",
+    "                sflow = SFlowStats.create_from(fp)\n",
+    "        else:\n",
+    "            sflow = SFlowStats()\n",
+    "        rowdata['has_scene_flow'] = fp.has_scene_flow()\n",
+    "        rowdata.update(\n",
+    "            ('SceneFlow_' + k, float(v))\n",
+    "            for k, v in sflow.get_rowdata().items())\n",
+    "        \n",
+    "        return RowAdapter.to_row(rowdata)\n",
+    "\n",
+    "\n",
+    "# analysis_uris_demo = MiddFactory.list_fp_uris(spark) + PSEGS_SYNTHFLOW_DEMO_URIS + KITTI_SF15_DEMO_URIS + DD_DEMO_URIS\n",
+    "\n",
+    "\n",
+    "class UnionFactory(FlowPairUnionFactory):\n",
+    "    #FACTORIES = ALL_FP_FACTORY_CLSS\n",
+    "    FACTORIES = [KITTISF15Factory]\n",
+    "\n",
+    "analysis_uris_full = UnionFactory.list_fp_uris(spark)\n",
+    "# analysis_uris_full = analysis_uris_full[4900:]\n",
+    "print('analysis_uris_full', len(analysis_uris_full))\n",
+    "\n",
+    "ANALYSIS_FIXTURE_PATH = '/outer_root/media/rocket4q/SFTEST_222_flow_pq_eval_test.parquet'\n",
+    "\n",
+    "from oarphpy import util as oputil\n",
+    "thru = oputil.ThruputObserver(name='run_analysis', n_total=len(analysis_uris_full))\n",
+    "for uri_chunk in oputil.ichunked(analysis_uris_full, 500):\n",
+    "    thru.start_block()\n",
+    "    fp_rdd = UnionFactory.get_fp_rdd_for_uris(spark, uri_chunk)\n",
+    "    fp_rdd = fp_rdd.coalesce(len(uri_chunk))\n",
+    "    result_rdd = fp_rdd.map(flow_pair_to_full_row)\n",
+    "    df = spark.createDataFrame(result_rdd)\n",
+    "    df.write.save(\n",
+    "            mode='append',\n",
+    "            path=ANALYSIS_FIXTURE_PATH,\n",
+    "            format='parquet',\n",
+    "            compression='lz4')\n",
+    "    thru.stop_block(n=len(uri_chunk))\n",
+    "    thru.maybe_log_progress(every_n=1)\n",
+    "\n",
+    "\n",
+    "# if True:#RUN_FULL_ANALYSIS:\n",
+    "# #     spark = Spark.getOrCreate()\n",
+    "    \n",
+    "# #     for p in ALL_FPS:\n",
+    "# #         import cloudpickle\n",
+    "# #         try:\n",
+    "# #             cloudpickle.dumps(p)\n",
+    "# #         except Exception:\n",
+    "# #             assert False, p\n",
+    "# #     print('all good')\n",
+    "    \n",
+    "#     import pickle\n",
+    "#     fp_rdd = spark.sparkContext.parallelize(ALL_FPS, numSlices=200)\n",
+    "# #     print(fp_rdd.count())\n",
+    "#     df = spark.createDataFrame(fp_rdd.map(flow_pair_to_full_row)).persist()\n",
+    "\n",
+    "#     print(df.count())\n",
+    "#     df.show(10)\n",
+    "#     df.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-------------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------------+------------------+-------------------+------------------------+--------------------------------+------------------------+------------------------+------------------------+------------------------+---------------------+-------------------------+---------------------+-------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------+------------------+\n",
+      "|Backwards_Edges_MSE|     Backwards_MSE|    Backwards_PSNR|    Backwards_RMSE|     Backwards_SSIM|Forwards_Edges_MSE|      Forwards_MSE|     Forwards_PSNR|     Forwards_RMSE|      Forwards_SSIM|SceneFlow_bkd_nnepe_mean|SceneFlow_fwd_nn_dist_to_sf_dist|SceneFlow_fwd_nnepe_50th|SceneFlow_fwd_nnepe_75th|SceneFlow_fwd_nnepe_95th|SceneFlow_fwd_nnepe_mean|SceneFlow_icp_fitness|SceneFlow_icp_inlier_rmse|SceneFlow_sf_norm_var|diff_time_sec|      flow_coverage|          fp_dataset|              fp_id1|              fp_id2|              fp_uri|has_scene_flow|translation_meters|\n",
+      "+-------------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------------+------------------+-------------------+------------------------+--------------------------------+------------------------+------------------------+------------------------+------------------------+---------------------+-------------------------+---------------------+-------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------+------------------+\n",
+      "|  811.1008911132812| 9.284229254872242| 38.45334504946084| 3.047003323738299| 0.8823270396357886| 515.6905517578125| 9.222926339982596| 38.48211620761724| 3.036927121282728| 0.8722998622459163|    0.023502385567686242|              0.4516372489822675|    0.014841745615961136|    0.024736306419134813|     0.05026109576087432|    0.028856742108864318|   0.8067577140409549|     0.004805716307626801| 0.010959513758672133|         -1.0|0.19759440104166667|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|     2964.068359375| 39.99987510927938| 32.11021725525835|6.3245454468506574| 0.7057834568107098| 820.1018676757812| 46.10823565958159| 31.49301856691616|  6.79030453364071| 0.7448881323281962|     0.16218178381959694|             0.31058247480887224|     0.16187639188822683|      0.2163551324582083|     0.37717035943338906|     0.18744157253820032|  0.18164106406893968|      0.00529975903075521| 0.018885630223170274|         -1.0|     0.130322265625|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|   841.026123046875|12.563004591241588| 37.13986842378698| 3.544432901218697| 0.8649651113808204| 301.0058898925781| 13.87650966183575| 36.70800118468567| 3.725118744662477| 0.8638773591682978|      0.0416027806325623|              0.3427769611977106|     0.03977029917456017|     0.05434765927010286|     0.07864340424807686|       0.048838723266916|   0.7085797031495112|     0.005095339620014852| 0.010854373824340754|         -1.0|0.18883138020833334|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|  1218.396240234375| 20.62229952301895|34.987432705083414| 4.541178208683177| 0.7975913597542976| 424.5088195800781|18.048768225238813| 35.56632792921845| 4.248384189929015| 0.8384507638352278|     0.16479757296645556|              0.3563456133831688|     0.15643705211157988|     0.19412013346111456|      1.1270000000000002|     0.20365815534450135|  0.30726539647402956|     0.005464643926824096|  0.05375319144929807|         -1.0|0.20587565104166666|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|  1160.928955078125|15.126672039495359| 36.33336969781971|  3.88930225612453| 0.8488052477374051|373.55743408203125|16.481696752920136|35.960784415833615|4.0597656031992955| 0.8549989854192298|     0.06307232934447705|             0.35707238217651704|     0.05673035821949333|     0.08148884110857045|      0.1374408793957658|     0.07083283699413889|  0.48496875142557366|     0.005062560968521223| 0.010254700555098145|         -1.0|     0.214072265625|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "| 1232.4525146484375| 17.28339321171024| 35.75451350245138| 4.157330058067346| 0.8213380732564264|407.11614990234375|16.061909416748126|36.072831885513104| 4.007731205650914| 0.8379804186744038|     0.13428528596140207|             0.31016282011084756|     0.13859244222132086|      0.1688770567699868|                   1.124|     0.19660213566418516|  0.23232810239980092|     0.005520291301752163|  0.06399654538920176|         -1.0|0.20930013020833332|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|   2938.28369140625| 38.97537996964865| 32.22290003064316|  6.24302650720375| 0.7136208460719717| 539.3043212890625|  45.2356259267324|31.575997568276566| 6.725743522223577| 0.7455977243336881|      0.1607040935800729|             0.31927145888526687|     0.17201262714590435|     0.21648267078678415|     0.36462003935020865|     0.19686560577746462|  0.13873032747105452|     0.005664573355736924| 0.026858167818412396|         -1.0|0.14085611979166668|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|  795.0248413085938|   8.0046786389414|39.097364597326354|2.8292540781876414| 0.8875598688394287| 493.8609924316406|  8.89937916734674| 38.63720650133104|2.9831827244315323| 0.8865101394120161|    0.047291449265690004|             0.28268190621942585|    0.046239654106448834|     0.05383414539624179|     0.06329266511259705|     0.06432518773485871|   0.9445179584120983|     0.004211624207165231| 0.023595913685847434|         -1.0|        0.206640625|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "| 1131.6527099609375| 39.44406089105714| 32.17098740303875|   6.2804506917145| 0.7675057374982521|   727.43115234375| 32.44271829652387| 33.01963125325038| 5.695850972113287| 0.7718167834219257|      0.1455562253904747|             0.24662527761763414|      0.1661670005873874|     0.20609033672308022|                   1.133|     0.22566200209285456|  0.13541978548406983|     0.005351537087237936|  0.06683862344720685|         -1.0|0.18482747395833332|DeepDeform Semi-S...|train/seq068/colo...|train/seq068/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|   3216.49951171875| 67.74295022431104| 29.82216254502043| 8.230610562061058|  0.685408356189637|1905.8311767578125| 67.27773191074982| 29.85209018911129| 8.202300403591044| 0.6966224374466261|      0.1688901095044044|              0.2719197900886001|     0.16141998256836712|       0.173826047890295|     0.21791458536344246|     0.17920375398565777|   0.2561952574236274|     0.005668034228763772| 0.034557186135361885|         -1.0|0.12190104166666667|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|    6519.4736328125| 74.08821127411912|  29.4333125121382|   8.6074509161609|0.48458539947756063|    3023.216796875|  73.5401947957552| 29.46555584959631| 8.575557987428876| 0.5136199172522997|      0.1910381140827987|              0.2710407557278911|     0.12251644728962513|     0.16203463758251307|      0.7300688520687051|     0.19273014934306956|  0.30956105912566045|      0.00584906694422502|  0.06448818590919994|         -1.0|0.11027669270833333|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|   7788.15771484375| 88.44043186895011| 28.66429506225775| 9.404277317739526|0.19300380281828838|  4099.98095703125| 91.44351723072168|28.519274387068776| 9.562610377439922|0.09829187815518842|     0.13658909124346802|              0.5391316954564738|     0.36354717915720386|      0.4566319732861115|      0.5944895163735789|     0.38178959271812757|                  NaN|                      NaN|  0.11459052623464928|         -1.0|      0.02623046875|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|    9436.3056640625| 91.74412416851442|28.505021018174233| 9.578315309516304|0.30051534452024053|  3246.80126953125| 91.09141025641026|28.536029350927706| 9.544182010859299| 0.3147208946682977|     0.20335464775531636|             0.49674938686159087|     0.24995580062352463|      0.3092917718256536|      0.5407073359690687|      0.3009189066546911| 0.019733924611973392|     0.006005780245235893|  0.08598015133721634|         -1.0|      0.04404296875|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|    3839.8916015625| 79.55337361530715| 29.12421759393853| 8.919269791597692|0.49169733508235275|  2455.31396484375|  80.1065876549823| 29.09412128621797| 8.950228357700283|0.46999112793603814|     0.12212842181062031|              0.6909344527962076|     0.04993886356411133|     0.08510125378226202|      0.2765094936303424|      0.0884927925560589|  0.40334515521695347|     0.005107325498483401|  0.03553895376990518|         -1.0|      0.14869140625|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|      4418.88671875| 88.88582903040735|28.642478334687173| 9.427928140922976|0.38070384152906617|     2897.48828125|  90.9848260016275| 28.54111391989162| 9.538596647391454| 0.3402721466383742|     0.24684961944770897|              0.5655846371817075|     0.14840460308037087|     0.24910934050285977|      0.5145269709949031|      0.2147309809650449|  0.11087420042643924|     0.005920711904998024|  0.07170448630484601|         -1.0|0.12671549479166666|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|  3796.372314453125| 82.39300515158745| 28.97190017532608| 9.077059278840666| 0.5105871529312278|  2179.34619140625| 82.60027051613521|  28.9609889122946| 9.088469096395455| 0.5166696991870318|      0.1042429189892928|             0.45681108907227674|     0.08879959268648942|     0.12880484054410712|      0.2793154777670644|     0.14152953557241993|   0.3157755215650929|     0.005482685391001725| 0.057774115408553976|         -1.0|0.12279622395833334|DeepDeform Semi-S...|train/seq069/colo...|train/seq069/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|  3857.495849609375|  72.2214663643235| 29.54414058894673| 8.498321385092677| 0.5897218015370079|  2612.37060546875| 70.68163544895117|29.637737711050896| 8.407237087709087| 0.5828058539238902|     0.19763100518199742|              0.2726893416854092|     0.15324284221907497|      0.2936212418189004|      0.6647001189281929|      0.2468005671292193|  0.22448979591836735|      0.00588505825366651|  0.08314662919092253|         -1.0|      0.12345703125|DeepDeform Semi-S...|train/seq070/colo...|train/seq070/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|    4106.9931640625| 75.43478699472405|  29.3550869250729| 8.685320201047515| 0.5555172081135071| 3242.009521484375| 74.27530868956302|29.422358954525215| 8.618312403804065| 0.5811231667996389|     0.09316757027690378|              0.3289237338531239|     0.06748676030639669|     0.10039278300272479|      0.5777851362480886|      0.1551991860686198|  0.38800956329631087|     0.005674513951334...|  0.10457658125305022|         -1.0|0.10756184895833333|DeepDeform Semi-S...|train/seq070/colo...|train/seq070/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|       5265.4140625|  77.7496444091306|29.223819494263413| 8.817575880542826| 0.5451489078405483| 3410.913818359375| 76.29038751233702| 29.30610539994793| 8.734436874369006| 0.5593972845410912|     0.21756224158711382|             0.29214377003642267|     0.14948533856885693|      0.2821403276061806|      0.8665706260469409|     0.26773200653237783|   0.2988728995615197|     0.005845719541346...|   0.0969860672916381|         -1.0|     0.110615234375|DeepDeform Semi-S...|train/seq070/colo...|train/seq070/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "|   4437.24169921875|   81.190022453797|29.035776993546147| 9.010550618791118| 0.5852403691867897|        2196.78125| 79.28582702020202|29.138848002468457| 8.904258925941114| 0.6019345513446139|      0.1698272598806962|              0.3029748979407533|     0.12957951603405538|     0.19861782079604023|      0.3357093955491163|     0.17143672923380832|   0.2523461339167482|       0.0059681274930809| 0.045513160452832706|         -1.0|0.11307942708333334|DeepDeform Semi-S...|train/seq070/colo...|train/seq070/colo...|psegs://dataset=d...|          true|              -1.0|\n",
+      "+-------------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------------+------------------+-------------------+------------------------+--------------------------------+------------------------+------------------------+------------------------+------------------------+---------------------+-------------------------+---------------------+-------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------+------------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Backwards_Edges_MSE': 811.1008911132812,\n",
+      " 'Backwards_MSE': 9.284229254872242,\n",
+      " 'Backwards_PSNR': 38.45334504946084,\n",
+      " 'Backwards_RMSE': 3.047003323738299,\n",
+      " 'Backwards_SSIM': 0.8823270396357886,\n",
+      " 'Forwards_Edges_MSE': 515.6905517578125,\n",
+      " 'Forwards_MSE': 9.222926339982596,\n",
+      " 'Forwards_PSNR': 38.48211620761724,\n",
+      " 'Forwards_RMSE': 3.036927121282728,\n",
+      " 'Forwards_SSIM': 0.8722998622459163,\n",
+      " 'SceneFlow_bkd_nnepe_mean': 0.023502385567686242,\n",
+      " 'SceneFlow_fwd_nn_dist_to_sf_dist': 0.4516372489822675,\n",
+      " 'SceneFlow_fwd_nnepe_50th': 0.014841745615961136,\n",
+      " 'SceneFlow_fwd_nnepe_75th': 0.024736306419134813,\n",
+      " 'SceneFlow_fwd_nnepe_95th': 0.05026109576087432,\n",
+      " 'SceneFlow_fwd_nnepe_mean': 0.028856742108864318,\n",
+      " 'SceneFlow_icp_fitness': 0.8067577140409549,\n",
+      " 'SceneFlow_icp_inlier_rmse': 0.004805716307626801,\n",
+      " 'SceneFlow_sf_norm_var': 0.010959513758672133,\n",
+      " 'diff_time_sec': -1.0,\n",
+      " 'flow_coverage': 0.19759440104166667,\n",
+      " 'fp_dataset': 'DeepDeform Semi-Synthetic Optical Flow',\n",
+      " 'fp_id1': 'train/seq068/color/000200.jpg',\n",
+      " 'fp_id2': 'train/seq068/color/000400.jpg',\n",
+      " 'fp_uri': 'psegs://dataset=deep_deform&extra.dd.K=train/seq068/scene_flow/../intrinsics.txt&extra.dd.expected_out=train/seq068/color/000400.jpg&extra.dd.flow_gt=train/seq068/optical_flow/Jacket2_000200_000400.oflow&extra.dd.input=train/seq068/color/000200.jpg&extra.dd.sf_gt=train/seq068/scene_flow/Jacket2_000200_000400.sflow',\n",
+      " 'has_scene_flow': True,\n",
+      " 'translation_meters': -1.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "5474"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# results_df = spark.read.parquet(ANALYSIS_FIXTURE_PATH)\n",
+    "\n",
+    "# from oarphpy import util as oputil\n",
+    "# ANALYSIS_FIXTURE_PATH = '/outer_root/media/rocket4q/SFTEST_222_flow_pq_eval_test.parquet'\n",
+    "results_dfs = []\n",
+    "for path in oputil.all_files_recursive(ANALYSIS_FIXTURE_PATH, pattern='*.lz4.parquet'):\n",
+    "    df = spark.read.parquet(path)\n",
+    "    df = df.withColumn('diff_time_sec', df.diff_time_sec.cast('float'))\n",
+    "    results_dfs.append(df)\n",
+    "\n",
+    "from oarphpy import spark as S\n",
+    "results_df = S.union_dfs(*results_dfs)\n",
+    "\n",
+    "# def add_dataset(row):\n",
+    "#     from psegs import datum\n",
+    "#     row = row.asDict()\n",
+    "#     uri = datum.URI.from_str(row['fp_uri'])\n",
+    "#     row['fp_dataset'] = uri.dataset\n",
+    "#     return row\n",
+    "\n",
+    "# results_df = spark.createDataFrame(results_df.rdd.map(add_dataset))\n",
+    "results_df = results_df.persist()\n",
+    "\n",
+    "results_df.show()\n",
+    "import pprint\n",
+    "pprint.pprint(results_df.take(1)[0].asDict())\n",
+    "results_df.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rendering 22 histograms\n",
+      "Working on Backwards_Edges_MSE\n",
+      "total chosen_fp_uris, added 512 512\n",
+      "Working on Backwards_MSE\n",
+      "total chosen_fp_uris, added 708 631\n",
+      "Working on Backwards_PSNR\n",
+      "total chosen_fp_uris, added 738 565\n",
+      "Working on Backwards_RMSE\n",
+      "total chosen_fp_uris, added 752 622\n",
+      "Working on Backwards_SSIM\n",
+      "total chosen_fp_uris, added 760 636\n",
+      "Working on Forwards_Edges_MSE\n",
+      "total chosen_fp_uris, added 762 543\n",
+      "Working on Forwards_MSE\n",
+      "total chosen_fp_uris, added 762 630\n",
+      "Working on Forwards_PSNR\n",
+      "total chosen_fp_uris, added 762 570\n",
+      "Working on Forwards_RMSE\n",
+      "total chosen_fp_uris, added 762 620\n",
+      "Working on Forwards_SSIM\n",
+      "total chosen_fp_uris, added 762 649\n",
+      "Working on SceneFlow_bkd_nnepe_mean\n",
+      "total chosen_fp_uris, added 762 451\n",
+      "Working on SceneFlow_fwd_nn_dist_to_sf_dist\n",
+      "total chosen_fp_uris, added 762 491\n",
+      "Working on SceneFlow_fwd_nnepe_50th\n",
+      "total chosen_fp_uris, added 762 407\n",
+      "Working on SceneFlow_fwd_nnepe_75th\n",
+      "total chosen_fp_uris, added 762 386\n",
+      "Working on SceneFlow_fwd_nnepe_95th\n",
+      "total chosen_fp_uris, added 762 394\n",
+      "Working on SceneFlow_fwd_nnepe_mean\n",
+      "total chosen_fp_uris, added 762 433\n",
+      "Working on SceneFlow_icp_fitness\n",
+      "total chosen_fp_uris, added 762 353\n",
+      "Working on SceneFlow_icp_inlier_rmse\n",
+      "total chosen_fp_uris, added 762 319\n",
+      "Working on SceneFlow_sf_norm_var\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:17:03,177\tps   1807391 : FlowRecTable: Reading parquet from /outer_root/media/rocket4q/psegs_flow_records_FULL_fixed \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total chosen_fp_uris, added 762 136\n",
+      "Working on diff_time_sec\n",
+      "total chosen_fp_uris, added 764 188\n",
+      "Working on flow_coverage\n",
+      "total chosen_fp_uris, added 764 624\n",
+      "Working on translation_meters\n",
+      "total chosen_fp_uris, added 764 523\n",
+      "Rendering 764 histogram bucket pages\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:17:05,517\tps   1807391 : FlowRecTable: Have 3 StampedDatumTables\n",
+      "2021-04-28 21:24:57,221\tps   1807391 : Building union DF for 42 segments ...\n",
+      "2021-04-28 21:24:57,334\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0283\n",
+      "2021-04-28 21:24:58,330\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:25:03,059\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:25:03,063\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:25:07,867\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:25:07,867\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:25:07,868\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:25:12,344\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0283\n",
+      "2021-04-28 21:25:12,354\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ----------------------------\n",
+      "Thruput\n",
+      "N thru                   1 (of 42)\n",
+      "N chunks                 1\n",
+      "Total time               15.12 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         2.380952\n",
+      "Est. Time To Completion  10 minutes and 20.01 seconds\n",
+      "-----------------------  ----------------------------\n",
+      "2021-04-28 21:25:12,420\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-1013\n",
+      "2021-04-28 21:25:13,329\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:25:17,818\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:25:17,822\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:25:32,082\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:25:32,083\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:25:32,083\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:25:36,240\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-1013\n",
+      "2021-04-28 21:25:36,258\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   2 (of 42)\n",
+      "N chunks                 2\n",
+      "Total time               39.01 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         4.761905\n",
+      "Est. Time To Completion  13 minutes and 0.16 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      19 seconds, 503 milliseconds, 955 microseconds and 364.23 nanoseconds\n",
+      "p50                      19 seconds, 503 milliseconds, 955 microseconds and 364.23 nanoseconds\n",
+      "p95                      23 seconds, 447 milliseconds, 626 microseconds and 161.58 nanoseconds\n",
+      "p99                      23 seconds, 798 milliseconds, 174 microseconds and 676.9 nanoseconds\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "2021-04-28 21:25:36,324\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0901\n",
+      "2021-04-28 21:25:37,171\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:25:41,319\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:25:41,322\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:25:58,093\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:25:58,093\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:25:58,094\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:26:02,335\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0901\n",
+      "2021-04-28 21:26:02,341\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   3 (of 42)\n",
+      "N chunks                 3\n",
+      "Total time               1 minute and 5.08 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         7.142857\n",
+      "Est. Time To Completion  14 minutes and 6.1 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      21 seconds, 694 milliseconds, 882 microseconds and 710.77 nanoseconds\n",
+      "p50                      23 seconds, 885 milliseconds, 811 microseconds and 805.73 nanoseconds\n",
+      "p95                      25 seconds, 857 milliseconds, 644 microseconds and 844.06 nanoseconds\n",
+      "p99                      26 seconds, 32 milliseconds, 918 microseconds and 891.91 nanoseconds\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "2021-04-28 21:26:02,411\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-1069\n",
+      "2021-04-28 21:26:03,256\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:26:07,405\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:26:07,408\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:26:30,429\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:26:30,429\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:26:30,430\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:26:34,811\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-1069\n",
+      "2021-04-28 21:26:34,816\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   4 (of 42)\n",
+      "N chunks                 4\n",
+      "Total time               1 minute and 37.55 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         9.523810\n",
+      "Est. Time To Completion  15 minutes and 26.77 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      24 seconds, 388 milliseconds, 715 microseconds and 386.39 nanoseconds\n",
+      "p50                      24 seconds, 981 milliseconds, 274 microseconds and 604.8 nanoseconds\n",
+      "p95                      31 seconds, 511 milliseconds, 192 microseconds and 11.83 nanoseconds\n",
+      "p99                      32 seconds, 278 milliseconds, 409 microseconds and 132.96 nanoseconds\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "2021-04-28 21:26:34,865\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0683\n",
+      "2021-04-28 21:26:35,690\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:26:39,896\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:26:39,899\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:27:08,593\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:27:08,594\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:27:08,594\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:27:13,081\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0683\n",
+      "2021-04-28 21:27:13,087\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   5 (of 42)\n",
+      "N chunks                 5\n",
+      "Total time               2 minutes and 15.82 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         11.904762\n",
+      "Est. Time To Completion  16 minutes and 45.07 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      27 seconds, 164 milliseconds, 24 microseconds and 162.29 nanoseconds\n",
+      "p50                      26 seconds, 76 milliseconds, 737 microseconds and 403.87 nanoseconds\n",
+      "p95                      37 seconds, 106 milliseconds, 250 microseconds and 95.37 nanoseconds\n",
+      "p99                      38 seconds, 33 milliseconds, 457 microseconds and 431.79 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:27:13,141\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0174\n",
+      "2021-04-28 21:27:14,012\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:27:18,281\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:27:18,285\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:28:00,110\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:28:00,110\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:28:00,111\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:28:04,564\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0174\n",
+      "2021-04-28 21:28:04,568\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   6 (of 42)\n",
+      "N chunks                 6\n",
+      "Total time               3 minutes and 7.3 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         14.285714\n",
+      "Est. Time To Completion  18 minutes and 43.78 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      31 seconds, 216 milliseconds, 205 microseconds and 477.71 nanoseconds\n",
+      "p50                      29 seconds, 273 milliseconds, 475 microseconds and 408.55 nanoseconds\n",
+      "p95                      48 seconds, 174 milliseconds, 148 microseconds and 857.59 nanoseconds\n",
+      "p99                      50 seconds, 816 milliseconds, 519 microseconds and 415.38 nanoseconds\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "2021-04-28 21:28:04,614\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0582\n",
+      "2021-04-28 21:28:05,483\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:28:09,587\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:28:09,591\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:29:00,658\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:29:00,659\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:29:00,659\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:29:05,054\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0582\n",
+      "2021-04-28 21:29:05,058\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   7 (of 42)\n",
+      "N chunks                 7\n",
+      "Total time               4 minutes and 7.78 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         16.666667\n",
+      "Est. Time To Completion  20 minutes and 38.91 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      35 seconds, 397 milliseconds, 565 microseconds and 398.9 nanoseconds\n",
+      "p50                      32 seconds, 470 milliseconds, 213 microseconds and 413.24 nanoseconds\n",
+      "p95                      57 seconds, 783 milliseconds, 141 microseconds and 64.64 nanoseconds\n",
+      "p99                      59 seconds, 945 milliseconds, 208 microseconds and 153.72 nanoseconds\n",
+      "-----------------------  ---------------------------------------------------------------------\n",
+      "2021-04-28 21:29:05,059\tps   1807391 : Building DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0005_sync\n",
+      "2021-04-28 21:29:05,902\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:29:10,093\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:29:10,094\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:29:14,391\tps   1807391 : ... seq 2013_05_28_drive_0005_sync has 71770 URIs spanning 705 sec, creating 1121 slices ...\n",
+      "2021-04-28 21:29:14,785\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:31:40,748\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:31:40,749\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:31:40,749\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:31:45,170\tps   1807391 : Added DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0005_sync\n",
+      "2021-04-28 21:31:45,175\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   8 (of 42)\n",
+      "N chunks                 8\n",
+      "Total time               6 minutes and 47.89 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         19.047619\n",
+      "Est. Time To Completion  28 minutes and 53.55 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      50 seconds, 986 milliseconds, 830 microseconds and 711.36 nanoseconds\n",
+      "p50                      35 seconds, 367 milliseconds, 736 microseconds and 339.57 nanoseconds\n",
+      "p95                      2 minutes, 5 seconds, 242 milliseconds, 600 microseconds and 858.21 nanoseconds\n",
+      "p99                      2 minutes, 33 seconds, 137 milliseconds, 870 microseconds and 490.55 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:31:45,226\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0997\n",
+      "2021-04-28 21:31:46,099\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:31:50,288\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:31:50,291\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:32:58,737\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:32:58,738\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:32:58,738\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:33:02,962\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0997\n",
+      "2021-04-28 21:33:02,967\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   9 (of 42)\n",
+      "N chunks                 9\n",
+      "Total time               8 minutes and 5.68 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         21.428571\n",
+      "Est. Time To Completion  29 minutes and 40.83 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      53 seconds, 964 milliseconds, 628 microseconds and 828.9 nanoseconds\n",
+      "p50                      38 seconds, 265 milliseconds, 259 microseconds and 265.9 nanoseconds\n",
+      "p95                      2 minutes, 7 seconds, 181 milliseconds, 818 microseconds and 246.84 nanoseconds\n",
+      "p99                      2 minutes, 33 seconds, 525 milliseconds, 713 microseconds and 968.28 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:33:03,015\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0704\n",
+      "2021-04-28 21:33:03,859\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:33:07,966\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:33:07,970\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:34:26,467\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:34:26,468\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:34:26,468\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:34:30,924\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0704\n",
+      "2021-04-28 21:34:30,929\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   10 (of 42)\n",
+      "N chunks                 10\n",
+      "Total time               9 minutes and 33.64 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         23.809524\n",
+      "Est. Time To Completion  30 minutes and 35.65 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      57 seconds, 363 milliseconds, 916 microseconds and 683.2 nanoseconds\n",
+      "p50                      44 seconds, 871 milliseconds, 185 microseconds and 660.36 nanoseconds\n",
+      "p95                      2 minutes, 7 seconds, 642 milliseconds, 306 microseconds and 661.61 nanoseconds\n",
+      "p99                      2 minutes, 33 seconds, 617 milliseconds, 811 microseconds and 651.23 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:34:30,974\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0716\n",
+      "2021-04-28 21:34:31,778\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:34:36,035\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:34:36,038\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:34:57,956\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:34:57,957\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:34:57,957\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:02,093\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0716\n",
+      "2021-04-28 21:35:02,098\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   11 (of 42)\n",
+      "N chunks                 11\n",
+      "Total time               10 minutes and 4.8 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         26.190476\n",
+      "Est. Time To Completion  28 minutes and 24.45 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      54 seconds, 982 milliseconds, 100 microseconds and 876.89 nanoseconds\n",
+      "p50                      38 seconds, 265 milliseconds, 259 microseconds and 265.9 nanoseconds\n",
+      "p95                      2 minutes, 4 seconds, 34 milliseconds, 597 microseconds and 635.27 nanoseconds\n",
+      "p99                      2 minutes, 32 seconds, 896 milliseconds, 269 microseconds and 845.96 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:35:02,143\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0270\n",
+      "2021-04-28 21:35:02,982\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:07,217\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:35:07,221\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:35:11,265\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:35:11,265\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:35:11,266\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:15,507\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0270\n",
+      "2021-04-28 21:35:15,514\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   12 (of 42)\n",
+      "N chunks                 12\n",
+      "Total time               10 minutes and 18.21 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         28.571429\n",
+      "Est. Time To Completion  25 minutes and 45.53 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      51 seconds, 517 milliseconds, 712 microseconds and 255.32 nanoseconds\n",
+      "p50                      35 seconds, 367 milliseconds, 736 microseconds and 339.57 nanoseconds\n",
+      "p95                      2 minutes, 426 milliseconds, 888 microseconds and 608.93 nanoseconds\n",
+      "p99                      2 minutes, 32 seconds, 174 milliseconds, 728 microseconds and 40.7 nanoseconds\n",
+      "-----------------------  ------------------------------------------------------------------------------\n",
+      "2021-04-28 21:35:15,562\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0129\n",
+      "2021-04-28 21:35:16,365\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:20,618\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:35:20,622\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:35:24,749\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:35:24,750\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:35:24,750\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:28,838\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0129\n",
+      "2021-04-28 21:35:28,842\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   13 (of 42)\n",
+      "N chunks                 13\n",
+      "Total time               10 minutes and 31.54 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         30.952381\n",
+      "Est. Time To Completion  23 minutes and 28.81 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      48 seconds, 579 milliseconds, 754 microseconds and 242.53 nanoseconds\n",
+      "p50                      32 seconds, 470 milliseconds, 213 microseconds and 413.24 nanoseconds\n",
+      "p95                      1 minute, 56 seconds, 819 milliseconds, 179 microseconds and 582.6 nanoseconds\n",
+      "p99                      2 minutes, 31 seconds, 453 milliseconds, 186 microseconds and 235.43 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:35:28,888\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0514\n",
+      "2021-04-28 21:35:29,723\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:33,736\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:35:33,739\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:35:37,918\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:35:37,919\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:35:37,919\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:42,156\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0514\n",
+      "2021-04-28 21:35:42,161\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   14 (of 42)\n",
+      "N chunks                 14\n",
+      "Total time               10 minutes and 44.85 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         33.333333\n",
+      "Est. Time To Completion  21 minutes and 29.7 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      46 seconds, 60 milliseconds, 741 microseconds and 748.13 nanoseconds\n",
+      "p50                      31 seconds, 817 milliseconds, 78 microseconds and 113.56 nanoseconds\n",
+      "p95                      1 minute, 53 seconds, 211 milliseconds, 470 microseconds and 556.26 nanoseconds\n",
+      "p99                      2 minutes, 30 seconds, 731 milliseconds, 644 microseconds and 430.16 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:35:42,205\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0998\n",
+      "2021-04-28 21:35:43,008\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:47,109\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:35:47,113\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:35:51,434\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:35:51,435\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:35:51,435\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:35:55,579\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0998\n",
+      "2021-04-28 21:35:55,584\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   15 (of 42)\n",
+      "N chunks                 15\n",
+      "Total time               10 minutes and 58.27 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         35.714286\n",
+      "Est. Time To Completion  19 minutes and 44.88 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      43 seconds, 884 milliseconds, 610 microseconds and 748.29 nanoseconds\n",
+      "p50                      31 seconds, 163 milliseconds, 942 microseconds and 813.87 nanoseconds\n",
+      "p95                      1 minute, 49 seconds, 603 milliseconds, 761 microseconds and 529.92 nanoseconds\n",
+      "p99                      2 minutes, 30 seconds, 10 milliseconds, 102 microseconds and 624.89 nanoseconds\n",
+      "-----------------------  -------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:35:55,679\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0393\n",
+      "2021-04-28 21:35:56,461\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:00,444\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:36:00,447\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:36:04,484\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:36:04,485\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:36:04,485\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:08,445\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0393\n",
+      "2021-04-28 21:36:08,450\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   16 (of 42)\n",
+      "N chunks                 16\n",
+      "Total time               11 minutes and 11.13 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         38.095238\n",
+      "Est. Time To Completion  18 minutes and 10.59 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      41 seconds, 945 milliseconds, 633 microseconds and 515.72 nanoseconds\n",
+      "p50                      28 seconds, 620 milliseconds, 340 microseconds and 108.87 nanoseconds\n",
+      "p95                      1 minute, 45 seconds, 996 milliseconds, 52 microseconds and 503.59 nanoseconds\n",
+      "p99                      2 minutes, 29 seconds, 288 milliseconds, 560 microseconds and 819.63 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:36:08,495\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0248\n",
+      "2021-04-28 21:36:09,254\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:13,281\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:36:13,285\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:36:17,075\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:36:17,075\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:36:17,075\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:21,123\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0248\n",
+      "2021-04-28 21:36:21,129\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   17 (of 42)\n",
+      "N chunks                 17\n",
+      "Total time               11 minutes and 23.8 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         40.476190\n",
+      "Est. Time To Completion  16 minutes and 45.59 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      40 seconds, 223 milliseconds, 721 microseconds and 588.36 nanoseconds\n",
+      "p50                      26 seconds, 76 milliseconds, 737 microseconds and 403.87 nanoseconds\n",
+      "p95                      1 minute, 42 seconds, 388 milliseconds, 343 microseconds and 477.25 nanoseconds\n",
+      "p99                      2 minutes, 28 seconds, 567 milliseconds, 19 microseconds and 14.36 nanoseconds\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "2021-04-28 21:36:21,130\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0007_sync\n",
+      "2021-04-28 21:36:22,003\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:25,954\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:36:25,955\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:36:30,257\tps   1807391 : ... seq 2013_05_28_drive_0007_sync has 29299 URIs spanning 355 sec, creating 114 slices ...\n",
+      "2021-04-28 21:36:30,406\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:36:38,133\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:36:38,134\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:36:38,134\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:42,330\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0007_sync\n",
+      "2021-04-28 21:36:42,334\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   18 (of 42)\n",
+      "N chunks                 18\n",
+      "Total time               11 minutes and 45 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         42.857143\n",
+      "Est. Time To Completion  15 minutes and 40.01 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      39 seconds, 166 milliseconds, 892 microseconds and 329.85 nanoseconds\n",
+      "p50                      24 seconds, 981 milliseconds, 274 microseconds and 604.8 nanoseconds\n",
+      "p95                      1 minute, 38 seconds, 780 milliseconds, 634 microseconds and 450.91 nanoseconds\n",
+      "p99                      2 minutes, 27 seconds, 845 milliseconds, 477 microseconds and 209.09 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:36:42,380\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0073\n",
+      "2021-04-28 21:36:43,160\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:47,387\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:36:47,390\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:36:51,165\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:36:51,165\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:36:51,165\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:55,154\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0073\n",
+      "2021-04-28 21:36:55,159\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   19 (of 42)\n",
+      "N chunks                 19\n",
+      "Total time               11 minutes and 57.82 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         45.238095\n",
+      "Est. Time To Completion  14 minutes and 28.94 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      37 seconds, 780 milliseconds, 195 microseconds and 863.62 nanoseconds\n",
+      "p50                      23 seconds, 885 milliseconds, 811 microseconds and 805.73 nanoseconds\n",
+      "p95                      1 minute, 35 seconds, 172 milliseconds, 925 microseconds and 424.58 nanoseconds\n",
+      "p99                      2 minutes, 27 seconds, 123 milliseconds, 935 microseconds and 403.82 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:36:55,159\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0002_sync\n",
+      "2021-04-28 21:36:55,951\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:36:59,963\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:36:59,963\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:37:06,930\tps   1807391 : ... seq 2013_05_28_drive_0002_sync has 153214 URIs spanning 2013 sec, creating 598 slices ...\n",
+      "2021-04-28 21:37:07,712\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:37:42,287\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:37:42,287\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:37:42,288\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:37:46,828\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0002_sync\n",
+      "2021-04-28 21:37:46,833\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   20 (of 42)\n",
+      "N chunks                 20\n",
+      "Total time               12 minutes and 49.49 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         47.619048\n",
+      "Est. Time To Completion  14 minutes and 6.44 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      38 seconds, 474 milliseconds, 642 microseconds and 348.29 nanoseconds\n",
+      "p50                      24 seconds, 981 milliseconds, 274 microseconds and 604.8 nanoseconds\n",
+      "p95                      1 minute, 31 seconds, 565 milliseconds, 216 microseconds and 398.24 nanoseconds\n",
+      "p99                      2 minutes, 26 seconds, 402 milliseconds, 393 microseconds and 598.56 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:37:46,879\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0029\n",
+      "2021-04-28 21:37:47,715\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:37:51,699\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:37:51,702\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:37:55,703\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:37:55,704\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:37:55,704\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:37:59,894\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0029\n",
+      "2021-04-28 21:37:59,898\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   21 (of 42)\n",
+      "N chunks                 21\n",
+      "Total time               13 minutes and 2.55 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         50.000000\n",
+      "Est. Time To Completion  13 minutes and 2.55 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      37 seconds, 264 milliseconds, 479 microseconds and 671.21 nanoseconds\n",
+      "p50                      23 seconds, 885 milliseconds, 811 microseconds and 805.73 nanoseconds\n",
+      "p95                      1 minute, 27 seconds, 957 milliseconds, 507 microseconds and 371.9 nanoseconds\n",
+      "p99                      2 minutes, 25 seconds, 680 milliseconds, 851 microseconds and 793.29 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:37:59,944\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0583\n",
+      "2021-04-28 21:38:00,761\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:04,851\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:38:04,854\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:38:08,783\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:38:08,783\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:38:08,784\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:12,920\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0583\n",
+      "2021-04-28 21:38:12,926\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   22 (of 42)\n",
+      "N chunks                 22\n",
+      "Total time               13 minutes and 15.58 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         52.380952\n",
+      "Est. Time To Completion  12 minutes and 3.25 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      36 seconds, 162 milliseconds, 542 microseconds and 386.49 nanoseconds\n",
+      "p50                      22 seconds, 543 milliseconds, 303 microseconds and 370.48 nanoseconds\n",
+      "p95                      1 minute, 27 seconds, 448 milliseconds, 982 microseconds and 691.76 nanoseconds\n",
+      "p99                      2 minutes, 24 seconds, 959 milliseconds, 309 microseconds and 988.02 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:38:12,927\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0003_sync\n",
+      "2021-04-28 21:38:13,708\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:17,796\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:38:17,797\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:38:18,509\tps   1807391 : ... seq 2013_05_28_drive_0003_sync has 9175 URIs spanning 108 sec, creating 35 slices ...\n",
+      "2021-04-28 21:38:18,556\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:38:21,157\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:38:21,157\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:38:21,158\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:25,059\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0003_sync\n",
+      "2021-04-28 21:38:25,064\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   23 (of 42)\n",
+      "N chunks                 23\n",
+      "Total time               13 minutes and 27.71 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         54.761905\n",
+      "Est. Time To Completion  11 minutes and 7.24 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      35 seconds, 117 milliseconds, 760 microseconds and 347.28 nanoseconds\n",
+      "p50                      21 seconds, 200 milliseconds, 794 microseconds and 935.23 nanoseconds\n",
+      "p95                      1 minute, 26 seconds, 940 milliseconds, 458 microseconds and 11.63 nanoseconds\n",
+      "p99                      2 minutes, 24 seconds, 237 milliseconds, 768 microseconds and 182.75 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:38:25,108\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0259\n",
+      "2021-04-28 21:38:25,902\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:29,812\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:38:29,815\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:38:33,697\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:38:33,698\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:38:33,698\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:37,786\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0259\n",
+      "2021-04-28 21:38:37,791\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   24 (of 42)\n",
+      "N chunks                 24\n",
+      "Total time               13 minutes and 40.43 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         57.142857\n",
+      "Est. Time To Completion  10 minutes and 15.32 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      34 seconds, 184 milliseconds, 624 microseconds and 532.86 nanoseconds\n",
+      "p50                      18 seconds, 161 milliseconds, 446 microseconds and 928.98 nanoseconds\n",
+      "p95                      1 minute, 26 seconds, 431 milliseconds, 933 microseconds and 331.49 nanoseconds\n",
+      "p99                      2 minutes, 23 seconds, 516 milliseconds, 226 microseconds and 377.49 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:38:37,835\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0105\n",
+      "2021-04-28 21:38:38,597\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:42,635\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:38:42,638\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:38:46,860\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:38:46,861\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:38:46,861\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:50,798\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0105\n",
+      "2021-04-28 21:38:50,802\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   25 (of 42)\n",
+      "N chunks                 25\n",
+      "Total time               13 minutes and 53.44 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         59.523810\n",
+      "Est. Time To Completion  9 minutes and 26.74 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      33 seconds, 337 milliseconds, 527 microseconds and 74.81 nanoseconds\n",
+      "p50                      15 seconds, 122 milliseconds, 98 microseconds and 922.73 nanoseconds\n",
+      "p95                      1 minute, 25 seconds, 923 milliseconds, 408 microseconds and 651.35 nanoseconds\n",
+      "p99                      2 minutes, 22 seconds, 794 milliseconds, 684 microseconds and 572.22 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:38:50,803\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0004_sync\n",
+      "2021-04-28 21:38:51,560\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:38:55,452\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:38:55,452\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:39:00,203\tps   1807391 : ... seq 2013_05_28_drive_0004_sync has 99660 URIs spanning 1211 sec, creating 389 slices ...\n",
+      "2021-04-28 21:39:00,714\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:39:23,976\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:39:23,977\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:39:23,977\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:28,251\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0004_sync\n",
+      "2021-04-28 21:39:28,256\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   26 (of 42)\n",
+      "N chunks                 26\n",
+      "Total time               14 minutes and 30.89 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         61.904762\n",
+      "Est. Time To Completion  8 minutes and 55.93 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      33 seconds, 495 milliseconds, 650 microseconds and 181.4 nanoseconds\n",
+      "p50                      18 seconds, 161 milliseconds, 446 microseconds and 928.98 nanoseconds\n",
+      "p95                      1 minute, 25 seconds, 414 milliseconds, 883 microseconds and 971.21 nanoseconds\n",
+      "p99                      2 minutes, 22 seconds, 73 milliseconds, 142 microseconds and 766.95 nanoseconds\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "2021-04-28 21:39:28,302\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0002\n",
+      "2021-04-28 21:39:29,119\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:33,299\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:39:33,302\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:39:37,299\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:39:37,300\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:39:37,300\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:41,433\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0002\n",
+      "2021-04-28 21:39:41,438\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   27 (of 42)\n",
+      "N chunks                 27\n",
+      "Total time               14 minutes and 44.06 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         64.285714\n",
+      "Est. Time To Completion  8 minutes and 11.15 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      32 seconds, 743 milliseconds, 113 microseconds and 685.54 nanoseconds\n",
+      "p50                      15 seconds, 122 milliseconds, 98 microseconds and 922.73 nanoseconds\n",
+      "p95                      1 minute, 24 seconds, 906 milliseconds, 359 microseconds and 291.08 nanoseconds\n",
+      "p99                      2 minutes, 21 seconds, 351 milliseconds, 600 microseconds and 961.69 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:39:41,483\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0276\n",
+      "2021-04-28 21:39:42,253\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:46,276\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:39:46,279\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:39:50,503\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:39:50,503\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:39:50,504\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:54,498\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0276\n",
+      "2021-04-28 21:39:54,503\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   28 (of 42)\n",
+      "N chunks                 28\n",
+      "Total time               14 minutes and 57.12 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         66.666667\n",
+      "Est. Time To Completion  7 minutes and 28.56 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      32 seconds, 40 milliseconds, 156 microseconds and 628.4 nanoseconds\n",
+      "p50                      14 seconds, 270 milliseconds, 437 microseconds and 836.65 nanoseconds\n",
+      "p95                      1 minute, 24 seconds, 397 milliseconds, 834 microseconds and 610.94 nanoseconds\n",
+      "p99                      2 minutes, 20 seconds, 630 milliseconds, 59 microseconds and 156.42 nanoseconds\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "2021-04-28 21:39:54,549\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0273\n",
+      "2021-04-28 21:39:55,327\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:39:59,214\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:39:59,217\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:40:03,073\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:40:03,074\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:40:03,074\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:07,083\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0273\n",
+      "2021-04-28 21:40:07,088\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   29 (of 42)\n",
+      "N chunks                 29\n",
+      "Total time               15 minutes and 9.7 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         69.047619\n",
+      "Est. Time To Completion  6 minutes and 47.8 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      31 seconds, 369 milliseconds, 105 microseconds and 347.27 nanoseconds\n",
+      "p50                      13 seconds, 418 milliseconds, 776 microseconds and 750.56 nanoseconds\n",
+      "p95                      1 minute, 23 seconds, 889 milliseconds, 309 microseconds and 930.8 nanoseconds\n",
+      "p99                      2 minutes, 19 seconds, 908 milliseconds, 517 microseconds and 351.15 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:40:07,133\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0094\n",
+      "2021-04-28 21:40:07,936\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:11,864\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:40:11,867\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:40:15,815\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:40:15,816\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:40:15,817\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:19,850\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=val&segment_id=scene-0094\n",
+      "2021-04-28 21:40:19,855\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   30 (of 42)\n",
+      "N chunks                 30\n",
+      "Total time               15 minutes and 22.47 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         71.428571\n",
+      "Est. Time To Completion  6 minutes and 8.99 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      30 seconds, 748 milliseconds, 883 microseconds and 716.27 nanoseconds\n",
+      "p50                      13 seconds, 414 milliseconds, 107 microseconds and 84.27 nanoseconds\n",
+      "p95                      1 minute, 23 seconds, 380 milliseconds, 785 microseconds and 250.66 nanoseconds\n",
+      "p99                      2 minutes, 19 seconds, 186 milliseconds, 975 microseconds and 545.88 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:40:19,900\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0067\n",
+      "2021-04-28 21:40:20,728\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:24,742\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:40:24,745\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:40:28,226\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:40:28,227\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:40:28,227\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:32,354\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0067\n",
+      "2021-04-28 21:40:32,359\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   31 (of 42)\n",
+      "N chunks                 31\n",
+      "Total time               15 minutes and 34.97 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         73.809524\n",
+      "Est. Time To Completion  5 minutes and 31.76 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      30 seconds, 160 milliseconds, 165 microseconds and 102.25 nanoseconds\n",
+      "p50                      13 seconds, 409 milliseconds, 437 microseconds and 417.98 nanoseconds\n",
+      "p95                      1 minute, 22 seconds, 872 milliseconds, 260 microseconds and 570.53 nanoseconds\n",
+      "p99                      2 minutes, 18 seconds, 465 milliseconds, 433 microseconds and 740.62 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:40:32,404\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0397\n",
+      "2021-04-28 21:40:33,197\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:37,203\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:40:37,211\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:40:41,026\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:40:41,027\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:40:41,027\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:45,084\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0397\n",
+      "2021-04-28 21:40:45,089\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   32 (of 42)\n",
+      "N chunks                 32\n",
+      "Total time               15 minutes and 47.69 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         76.190476\n",
+      "Est. Time To Completion  4 minutes and 56.15 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      29 seconds, 615 milliseconds, 337 microseconds and 878.47 nanoseconds\n",
+      "p50                      13 seconds, 366 milliseconds, 847 microseconds and 753.52 nanoseconds\n",
+      "p95                      1 minute, 22 seconds, 363 milliseconds, 735 microseconds and 890.39 nanoseconds\n",
+      "p99                      2 minutes, 17 seconds, 743 milliseconds, 891 microseconds and 935.35 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:40:45,134\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0373\n",
+      "2021-04-28 21:40:45,889\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:50,046\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:40:50,050\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:40:54,159\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:40:54,159\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:40:54,160\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:40:58,023\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0373\n",
+      "2021-04-28 21:40:58,028\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   33 (of 42)\n",
+      "N chunks                 33\n",
+      "Total time               16 minutes and 0.63 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         78.571429\n",
+      "Est. Time To Completion  4 minutes and 21.99 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      29 seconds, 109 milliseconds, 855 microseconds and 536.26 nanoseconds\n",
+      "p50                      13 seconds, 324 milliseconds, 258 microseconds and 89.07 nanoseconds\n",
+      "p95                      1 minute, 21 seconds, 855 milliseconds, 211 microseconds and 210.25 nanoseconds\n",
+      "p99                      2 minutes, 17 seconds, 22 milliseconds, 350 microseconds and 130.08 nanoseconds\n",
+      "-----------------------  -------------------------------------------------------------------------------\n",
+      "2021-04-28 21:40:58,029\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync\n",
+      "2021-04-28 21:40:58,777\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:41:02,756\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:41:02,756\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:41:09,409\tps   1807391 : ... seq 2013_05_28_drive_0000_sync has 101669 URIs spanning 1204 sec, creating 397 slices ...\n",
+      "2021-04-28 21:41:09,906\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:41:35,528\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:41:35,530\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:41:35,531\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:41:39,747\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync\n",
+      "2021-04-28 21:41:39,752\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   34 (of 42)\n",
+      "N chunks                 34\n",
+      "Total time               16 minutes and 42.34 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         80.952381\n",
+      "Est. Time To Completion  3 minutes and 55.85 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      29 seconds, 480 milliseconds, 720 microseconds and 884.66 nanoseconds\n",
+      "p50                      13 seconds, 366 milliseconds, 847 microseconds and 753.52 nanoseconds\n",
+      "p95                      1 minute, 21 seconds, 346 milliseconds, 686 microseconds and 530.11 nanoseconds\n",
+      "p99                      2 minutes, 16 seconds, 300 milliseconds, 808 microseconds and 324.81 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:41:39,752\tps   1807391 : Building DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0006_sync\n",
+      "2021-04-28 21:41:40,563\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:41:44,618\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:41:44,619\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:41:50,030\tps   1807391 : ... seq 2013_05_28_drive_0006_sync has 85875 URIs spanning 1014 sec, creating 335 slices ...\n",
+      "2021-04-28 21:41:50,489\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:42:10,376\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:42:10,376\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:42:10,377\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:42:14,789\tps   1807391 : Added DF for psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0006_sync\n",
+      "2021-04-28 21:42:14,794\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   35 (of 42)\n",
+      "N chunks                 35\n",
+      "Total time               17 minutes and 17.38 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         83.333333\n",
+      "Est. Time To Completion  3 minutes and 27.48 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      29 seconds, 639 milliseconds, 469 microseconds and 930.1 nanoseconds\n",
+      "p50                      13 seconds, 409 milliseconds, 437 microseconds and 417.98 nanoseconds\n",
+      "p95                      1 minute, 20 seconds, 838 milliseconds, 161 microseconds and 849.98 nanoseconds\n",
+      "p99                      2 minutes, 15 seconds, 579 milliseconds, 266 microseconds and 519.55 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:42:14,795\tps   1807391 : Building DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync\n",
+      "2021-04-28 21:42:15,601\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:42:19,646\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:42:19,647\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:42:27,116\tps   1807391 : ... seq 2013_05_28_drive_0000_sync has 122635 URIs spanning 1204 sec, creating 1916 slices ...\n",
+      "2021-04-28 21:42:27,753\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:44:10,573\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:44:10,574\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:44:10,574\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:44:14,858\tps   1807391 : Added DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync\n",
+      "2021-04-28 21:44:14,863\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  ------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   36 (of 42)\n",
+      "N chunks                 36\n",
+      "Total time               19 minutes and 17.45 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         85.714286\n",
+      "Est. Time To Completion  3 minutes and 12.91 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      32 seconds, 151 milliseconds, 266 microseconds and 819.9 nanoseconds\n",
+      "p50                      13 seconds, 414 milliseconds, 107 microseconds and 84.27 nanoseconds\n",
+      "p95                      1 minute, 35 seconds, 984 milliseconds, 170 microseconds and 19.63 nanoseconds\n",
+      "p99                      2 minutes, 26 seconds, 95 milliseconds, 52 microseconds and 421.09 nanoseconds\n",
+      "-----------------------  ------------------------------------------------------------------------------\n",
+      "2021-04-28 21:44:14,864\tps   1807391 : Building DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0004_sync\n",
+      "2021-04-28 21:44:15,716\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:44:19,953\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:44:19,953\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:44:25,535\tps   1807391 : ... seq 2013_05_28_drive_0004_sync has 115934 URIs spanning 1211 sec, creating 1811 slices ...\n",
+      "2021-04-28 21:44:26,152\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:46:06,936\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:46:06,937\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:46:06,937\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:11,326\tps   1807391 : Added DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0004_sync\n",
+      "2021-04-28 21:46:11,331\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   37 (of 42)\n",
+      "N chunks                 37\n",
+      "Total time               21 minutes and 13.91 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         88.095238\n",
+      "Est. Time To Completion  2 minutes and 52.15 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      34 seconds, 429 milliseconds, 972 microseconds and 725.95 nanoseconds\n",
+      "p50                      13 seconds, 418 milliseconds, 776 microseconds and 750.56 nanoseconds\n",
+      "p95                      1 minute, 57 seconds, 183 milliseconds, 539 microseconds and 867.4 nanoseconds\n",
+      "p99                      2 minutes, 25 seconds, 694 milliseconds, 577 microseconds and 121.73 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:46:11,376\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0669\n",
+      "2021-04-28 21:46:12,190\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:16,273\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:46:16,276\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:46:20,619\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:46:20,619\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:46:20,620\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:24,766\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0669\n",
+      "2021-04-28 21:46:24,771\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   38 (of 42)\n",
+      "N chunks                 38\n",
+      "Total time               21 minutes and 27.34 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         90.476190\n",
+      "Est. Time To Completion  2 minutes and 15.51 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      33 seconds, 877 milliseconds, 464 microseconds and 181.5 nanoseconds\n",
+      "p50                      13 seconds, 426 milliseconds, 712 microseconds and 393.76 nanoseconds\n",
+      "p95                      1 minute, 57 seconds, 3 milliseconds, 501 microseconds and 236.44 nanoseconds\n",
+      "p99                      2 minutes, 25 seconds, 294 milliseconds, 101 microseconds and 822.38 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:46:24,815\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0255\n",
+      "2021-04-28 21:46:25,642\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:29,719\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:46:29,723\tps   1807391 : ... checking existing URIs ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-04-28 21:46:33,465\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:46:33,466\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:46:33,466\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:37,587\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0255\n",
+      "2021-04-28 21:46:37,592\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   39 (of 42)\n",
+      "N chunks                 39\n",
+      "Total time               21 minutes and 40.16 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         92.857143\n",
+      "Est. Time To Completion  1 minute and 40.01 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      33 seconds, 337 milliseconds, 440 microseconds and 87.25 nanoseconds\n",
+      "p50                      13 seconds, 418 milliseconds, 776 microseconds and 750.56 nanoseconds\n",
+      "p95                      1 minute, 56 seconds, 823 milliseconds, 462 microseconds and 605.48 nanoseconds\n",
+      "p99                      2 minutes, 24 seconds, 893 milliseconds, 626 microseconds and 523.02 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:46:37,593\tps   1807391 : Building DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0002_sync\n",
+      "2021-04-28 21:46:38,364\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:46:42,411\tps   1807391 : Creating datums for KITTI-360 ...\n",
+      "2021-04-28 21:46:42,411\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:46:50,075\tps   1807391 : ... seq 2013_05_28_drive_0002_sync has 174992 URIs spanning 2013 sec, creating 2734 slices ...\n",
+      "2021-04-28 21:46:50,957\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:49:22,243\tps   1807391 : ... partitioned datums into 0 RDDs.\n",
+      "2021-04-28 21:49:22,243\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:49:22,244\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:49:26,515\tps   1807391 : Added DF for psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0002_sync\n",
+      "2021-04-28 21:49:26,520\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   40 (of 42)\n",
+      "N chunks                 40\n",
+      "Total time               24 minutes and 29.08 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         95.238095\n",
+      "Est. Time To Completion  1 minute and 13.45 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      36 seconds, 727 milliseconds, 68 microseconds and 823.58 nanoseconds\n",
+      "p50                      13 seconds, 426 milliseconds, 712 microseconds and 393.76 nanoseconds\n",
+      "p95                      2 minutes, 2 seconds, 66 milliseconds, 534 microseconds and 459.59 nanoseconds\n",
+      "p99                      2 minutes, 45 seconds, 486 milliseconds, 337 microseconds and 900.16 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:49:26,566\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501\n",
+      "2021-04-28 21:49:27,407\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:49:31,647\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:49:31,651\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:49:41,056\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:49:41,056\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:49:41,057\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:49:45,352\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501\n",
+      "2021-04-28 21:49:45,357\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   41 (of 42)\n",
+      "N chunks                 41\n",
+      "Total time               24 minutes and 47.92 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         97.619048\n",
+      "Est. Time To Completion  36.29 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      36 seconds, 290 milliseconds, 615 microseconds and 302.76 nanoseconds\n",
+      "p50                      13 seconds, 434 milliseconds, 648 microseconds and 36.96 nanoseconds\n",
+      "p95                      2 minutes, 64 milliseconds, 157 microseconds and 962.8 nanoseconds\n",
+      "p99                      2 minutes, 45 seconds, 398 milliseconds, 228 microseconds and 883.74 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:49:45,402\tps   1807391 : Building DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0767\n",
+      "2021-04-28 21:49:46,222\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:49:50,401\tps   1807391 : Filtering to only 1 segments\n",
+      "2021-04-28 21:49:50,405\tps   1807391 : ... checking existing URIs ...\n",
+      "2021-04-28 21:49:54,402\tps   1807391 : ... all datums already exist, skipping this chunk ...\n",
+      "2021-04-28 21:49:54,402\tps   1807391 : Going to write in 0 chunks ...\n",
+      "2021-04-28 21:49:54,402\tps   1807391 : Loading /opt/psegs/dataroot/stamped_datum/stamped_datums ...\n",
+      "2021-04-28 21:49:58,594\tps   1807391 : Added DF for psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0767\n",
+      "2021-04-28 21:49:58,599\toarph 1807391 : Progress for \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   42 (of 42)\n",
+      "N chunks                 42\n",
+      "Total time               25 minutes and 1.15 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         100.000000\n",
+      "Est. Time To Completion  0 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      35 seconds, 741 milliseconds, 724 microseconds and 752.24 nanoseconds\n",
+      "p50                      13 seconds, 426 milliseconds, 712 microseconds and 393.76 nanoseconds\n",
+      "p95                      1 minute, 59 seconds, 884 milliseconds, 119 microseconds and 331.84 nanoseconds\n",
+      "p99                      2 minutes, 45 seconds, 310 milliseconds, 119 microseconds and 867.32 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "2021-04-28 21:49:58,599\tps   1807391 : ... done building union DF for 42 segments\n",
+      "2021-04-28 21:50:04,255\toarph 1807391 : \n",
+      "get_or_build_datum_dfs [Pid:1807391 Id:140553816379056]\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "Thruput\n",
+      "N thru                   42 (of 42)\n",
+      "N chunks                 42\n",
+      "Total time               25 minutes and 1.15 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       0\n",
+      "Progress\n",
+      "Percent Complete         100.000000\n",
+      "Est. Time To Completion  0 seconds\n",
+      "Latency (per chunk)\n",
+      "Avg                      35 seconds, 741 milliseconds, 724 microseconds and 752.24 nanoseconds\n",
+      "p50                      13 seconds, 426 milliseconds, 712 microseconds and 393.76 nanoseconds\n",
+      "p95                      1 minute, 59 seconds, 884 milliseconds, 119 microseconds and 331.84 nanoseconds\n",
+      "p99                      2 minutes, 45 seconds, 310 milliseconds, 119 microseconds and 867.32 nanoseconds\n",
+      "-----------------------  --------------------------------------------------------------------------------\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "def fp_uri_to_fname(fp_uri):\n",
+    "    fp_uri = str(fp_uri)\n",
+    "    import urllib.parse\n",
+    "    fname = urllib.parse.quote(fp_uri)\n",
+    "    from slugify import slugify\n",
+    "    fname = slugify(fname)\n",
+    "    \n",
+    "    # Ubuntu seems to limit at 255 or so ...\n",
+    "    if len(fname) > 150:\n",
+    "        from oarphpy.util import stable_hash\n",
+    "        fname_hash = stable_hash(fname)\n",
+    "        fname = fname[:150] + str(fname_hash)\n",
+    "    \n",
+    "    return fname\n",
+    "\n",
+    "def extract_fp_uris_from_html(html):\n",
+    "    import re\n",
+    "    matches = list(set(re.findall(r'alt=\\\\\"(.*?)\\\\\"', html)))\n",
+    "    import html\n",
+    "    return set(html.unescape(s) for s in matches)\n",
+    "\n",
+    "FLOW_EVAL_REPORT_BASEDIR = '/opt/psegs/flow_eval_hists_222/'\n",
+    "from oarphpy import util as oputil\n",
+    "oputil.mkdir(FLOW_EVAL_REPORT_BASEDIR)\n",
+    "\n",
+    "from oarphpy import plotting as pl\n",
+    "class Plotter(pl.HistogramWithExamplesPlotter):\n",
+    "    NUM_BINS = 50\n",
+    "    ROWS_TO_DISPLAY_PER_BUCKET = 5\n",
+    "    SUB_PIVOT_COL = 'fp_dataset'\n",
+    "\n",
+    "    def display_bucket(self, sub_pivot, bucket_id, irows):\n",
+    "        from oarphpy.spark import RowAdapter\n",
+    "        from psegs import datum\n",
+    "        \n",
+    "        # Sample from irows using reservior sampling\n",
+    "        import random\n",
+    "        rows = []\n",
+    "        for i, row in enumerate(irows):\n",
+    "            r = random.randint(0, i)\n",
+    "            if r < self.ROWS_TO_DISPLAY_PER_BUCKET:\n",
+    "                if i < self.ROWS_TO_DISPLAY_PER_BUCKET:\n",
+    "                    rows.insert(r, row)\n",
+    "                else:\n",
+    "                    rows[r] = row\n",
+    "        \n",
+    "        # Now render each row to HTML\n",
+    "        row_htmls = []\n",
+    "        for row in rows:\n",
+    "            rowdata = RowAdapter.from_row(row)\n",
+    "            \n",
+    "            fp_datset = rowdata['fp_dataset']\n",
+    "            fp_uri_str = rowdata['fp_uri']\n",
+    "            fp_uri = datum.URI.from_str(fp_uri_str)\n",
+    "            fp_page_uri = fp_uri_to_fname(fp_uri_str) + '.html'\n",
+    "            id1 = rowdata['fp_id1']\n",
+    "            id2 = rowdata['fp_id2']\n",
+    "            \n",
+    "            row_html = f\"\"\"\n",
+    "                <a href=\"{fp_page_uri}\" alt=\"{fp_uri_str}\">\n",
+    "                    {fp_datset} {fp_uri.split} {fp_uri.segment_id} {id1} -> {id2}\n",
+    "                </a><br />\"\"\"\n",
+    "            row_htmls.append(row_html)\n",
+    "        \n",
+    "        HTML = \"\"\"\n",
+    "        <b>Pivot: {spv} Bucket: {bucket_id} </b> <br/>\n",
+    "        \n",
+    "        {row_bodies}\n",
+    "        \"\"\".format(\n",
+    "              spv=sub_pivot,\n",
+    "              bucket_id=bucket_id,\n",
+    "              row_bodies=\"<br/><br/><br/>\".join(row_htmls))\n",
+    "        \n",
+    "        return bucket_id, HTML\n",
+    "\n",
+    "plotter = Plotter()\n",
+    "\n",
+    "chosen_fp_uris = set()\n",
+    "histogram_htmls = []\n",
+    "\n",
+    "SKIP_COLS = (\n",
+    "    'fp_uri',\n",
+    "    'fp_dataset',\n",
+    "    'fp_id1',\n",
+    "    'fp_id2',\n",
+    "    'has_scene_flow',\n",
+    ")\n",
+    "\n",
+    "cols = [col for col in results_df.columns if col not in SKIP_COLS]\n",
+    "print(\"Rendering %s histograms\" % len(cols))\n",
+    "for col in cols:\n",
+    "    print(\"Working on %s\" % col)\n",
+    "    cur_df = results_df\n",
+    "#     if col == 'diff_time_sec':\n",
+    "#         # fixme hacks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+    "#         cur_df = cur_df.filter(cur_df.diff_time_sec.isNotNull())\n",
+    "    if 'SceneFlow' in col:\n",
+    "        cur_df = cur_df.filter(cur_df.has_scene_flow == True)\n",
+    "#     fig = plotter.run(cur_df, col)\n",
+    "    dest = os.path.join(FLOW_EVAL_REPORT_BASEDIR, '%s.html' % col)\n",
+    "#     pl.save_bokeh_fig(fig, dest)\n",
+    "    \n",
+    "    with open(dest, 'r') as f:\n",
+    "        cur_chosen_fp_uris = extract_fp_uris_from_html(f.read())\n",
+    "#     cur_chosen_fp_uris = set(u for u in cur_chosen_fp_uris if ('deep_deform' not in u and 'kitti_sf15' not in u))\n",
+    "    \n",
+    "    chosen_fp_uris |= cur_chosen_fp_uris\n",
+    "    print('total chosen_fp_uris, added', len(chosen_fp_uris), len(cur_chosen_fp_uris))\n",
+    "\n",
+    "print(\"Rendering %s histogram bucket pages\" % len(chosen_fp_uris))\n",
+    "class UnionFactory(FlowPairUnionFactory):\n",
+    "    FACTORIES = ALL_FP_FACTORY_CLSS\n",
+    "\n",
+    "# analysis_uris_full = UnionFactory.list_fp_uris(spark)\n",
+    "\n",
+    "fp_rdd = UnionFactory.get_fp_rdd_for_uris(spark, chosen_fp_uris)\n",
+    "def render_and_save(fp):\n",
+    "    from threadpoolctl import threadpool_limits\n",
+    "    with threadpool_limits(limits=1, user_api='blas'):\n",
+    "        import os\n",
+    "        recon = FlowReconstructedImagePair.create_from(fp)\n",
+    "        fstats = OFlowStats.create_from(fp)\n",
+    "        errors = OFlowReconErrors(recon)\n",
+    "        \n",
+    "        if fp.has_scene_flow():\n",
+    "            with threadpool_limits(limits=1, user_api='openmp'):\n",
+    "                sflow = SFlowStats.create_from(fp)\n",
+    "            sflow_html = sflow.to_html()\n",
+    "        else:\n",
+    "            sflow_html = '(no SceneFlow data)'\n",
+    "        \n",
+    "        page_html = \"<br/>\".join(\n",
+    "            (PLOTLY_INIT_HTML, fp.to_html(), recon.to_html(), fstats.to_html(), errors.to_html(), sflow_html))\n",
+    "\n",
+    "        dest = os.path.join(FLOW_EVAL_REPORT_BASEDIR, fp_uri_to_fname(fp.uri) + '.html')\n",
+    "        with open(dest, 'w') as f:\n",
+    "            f.write(page_html)\n",
+    "\n",
+    "fp_rdd.foreach(render_and_save)\n",
+    "    \n",
+    "    \n",
+    "# from bokeh.io import output_notebook\n",
+    "# output_notebook()\n",
+    "# from bokeh.plotting import show\n",
+    "# show(fig)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from oarphpy import util as oputil\n",
+    "# paths = oputil.all_files_recursive('/opt/psegs/flow_eval_hists_222/')\n",
+    "# for path in paths:\n",
+    "#     if 'psegs-3a' in path:\n",
+    "#         with open(path, 'r') as f:\n",
+    "#             data = f.read()\n",
+    "#         data = \"\"\"<script src=\"https://cdn.plot.ly/plotly-latest.min.js\"></script>\"\"\" + data\n",
+    "#         with open(path, 'w') as f:\n",
+    "#             f.write(data)\n",
+    "#         print(path)\n",
+    "        \n",
+    "\n",
+    "# PLOTLY_INIT_HTML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/cheap_optical_flow_from_fused_lidar.ipynb b/notebooks/cheap_optical_flow_from_fused_lidar.ipynb
new file mode 100644
index 0000000..56d5837
--- /dev/null
+++ b/notebooks/cheap_optical_flow_from_fused_lidar.ipynb
@@ -0,0 +1,797 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Synthetic Optical Flow from Fused Lidar\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from psegs.exp.fused_lidar_flow import FusedLidarCloudTableBase\n",
+    "from psegs.exp.fused_lidar_flow import TaskLidarCuboidCameraDFFactory\n",
+    "from psegs.exp.fused_lidar_flow import OpticalFlowRenderBase\n",
+    "\n",
+    "import IPython.display\n",
+    "import PIL.Image\n",
+    "\n",
+    "\n",
+    "## General Notebook Utilities\n",
+    "    \n",
+    "def imshow(x):\n",
+    "    IPython.display.display(PIL.Image.fromarray(x))\n",
+    "\n",
+    "def show_html(x):\n",
+    "    from IPython.core.display import display, HTML\n",
+    "    display(HTML(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SemanticKITTI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from psegs.exp.semantic_kitti import SemanticKITTISDTable\n",
+    "\n",
+    "class SemanticKITTILCCDFFactory(TaskLidarCuboidCameraDFFactory):\n",
+    "    \n",
+    "    SRC_SD_TABLE = SemanticKITTISDTable\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def build_df_for_segment(cls, spark, segment_uri):\n",
+    "        seg_rdd = cls.SRC_SD_TABLE.get_segment_datum_rdd(spark, segment_uri)\n",
+    "        \n",
+    "        def to_task_row(scan_id_iter_sds):\n",
+    "            scan_id, iter_sds = scan_id_iter_sds\n",
+    "            camera_images = []\n",
+    "            point_clouds = []\n",
+    "            for sd in iter_sds:\n",
+    "                if sd.camera_image is not None:\n",
+    "                    camera_images.append(sd)\n",
+    "                elif sd.point_cloud is not None:\n",
+    "                    point_clouds.append(sd)\n",
+    "            \n",
+    "            from pyspark import Row\n",
+    "            r = Row(\n",
+    "                    task_id=int(scan_id),\n",
+    "                    pc_sds=point_clouds,\n",
+    "                    cuboids_sds=[], # SemanticKITTI has no cuboids\n",
+    "                    ci_sds=camera_images) \n",
+    "            from oarphpy.spark import RowAdapter\n",
+    "            return RowAdapter.to_row(r)\n",
+    "            \n",
+    "        grouped = seg_rdd.groupBy(lambda sd: sd.uri.extra['semantic_kitti.scan_id'])\n",
+    "        row_rdd = grouped.map(to_task_row)\n",
+    "\n",
+    "        df = spark.createDataFrame(row_rdd, schema=cls.table_schema())\n",
+    "        df = df.persist()\n",
+    "        return df\n",
+    "\n",
+    "class SemanticKITTIFusedWorldCloudTable(FusedLidarCloudTableBase):\n",
+    "    TASK_DF_FACTORY = SemanticKITTILCCDFFactory\n",
+    "\n",
+    "    # SemanticKITTI has no cuboids, so we skip this step.\n",
+    "    HAS_OBJ_CLOUDS = False\n",
+    "\n",
+    "class SemanticKITTIOFlowRenderer(OpticalFlowRenderBase):\n",
+    "    FUSED_LIDAR_SD_TABLE = SemanticKITTIFusedWorldCloudTable"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## KITTI-360"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from psegs.datasets.kitti_360 import KITTI360SDTable\n",
+    "class KITTI360OurFusedClouds(KITTI360SDTable):\n",
+    "    INCLUDE_FISHEYES = False\n",
+    "    INCLUDE_FUSED_CLOUDS = False  # Use our own fused clouds\n",
+    "\n",
+    "class KITTI360LCCDFFactory(TaskLidarCuboidCameraDFFactory):\n",
+    "    \n",
+    "    SRC_SD_TABLE = KITTI360OurFusedClouds\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def build_df_for_segment(cls, spark, segment_uri):\n",
+    "        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)\n",
+    "        datum_df.registerTempTable('datums')\n",
+    "        print('Building tasks table for %s ...' % segment_uri.segment_id)\n",
+    "        \n",
+    "        spark.catalog.dropTempView('kitti360_tasks_df')\n",
+    "        task_data_df = spark.sql(\"\"\"\n",
+    "            CACHE TABLE kitti360_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "            SELECT \n",
+    "              INT(uri.extra.`kitti-360.frame_id`) AS task_id,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds\n",
+    "            FROM datums\n",
+    "            WHERE (\n",
+    "              uri.topic LIKE '%cuboid%' OR\n",
+    "              uri.topic LIKE '%lidar%' OR\n",
+    "              uri.topic LIKE '%camera%'\n",
+    "            ) AND (\n",
+    "              camera_image is NULL OR (camera_image.extra.`kitti-360.has-valid-ego-pose` = 'True')\n",
+    "            ) AND (\n",
+    "              point_cloud is NULL OR (point_cloud.extra.`kitti-360.has-valid-ego-pose` = 'True')\n",
+    "            )\n",
+    "            GROUP BY task_id\n",
+    "        \"\"\")\n",
+    "        \n",
+    "        tasks_df = spark.sql('SELECT * FROM kitti360_tasks_df')\n",
+    "        print('... done.')\n",
+    "        return tasks_df\n",
+    "        \n",
+    "    \n",
+    "class KITTI360WorldCloudTableBase(FusedLidarCloudTableBase):\n",
+    "    TASK_DF_FACTORY = KITTI360LCCDFFactory\n",
+    "        \n",
+    "class KITTI360OFlowRenderer(OpticalFlowRenderBase):\n",
+    "    FUSED_LIDAR_SD_TABLE = KITTI360WorldCloudTableBase\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "# class KITTI360OurFusedWorldCloudTable(FusedLidarCloudTableBase):\n",
+    "#     SRC_SD_TABLE = KITTI360OurFusedClouds\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):\n",
+    "#         datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)\n",
+    "#         datum_df.registerTempTable('datums')\n",
+    "#         spark.catalog.dropTempView('culi_tasks_df')\n",
+    "#         print('Building tasks table for %s ...' % segment_uri.segment_id)\n",
+    "#         spark.sql(\"\"\"\n",
+    "#           CACHE TABLE culi_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "#           SELECT \n",
+    "#               CONCAT(uri.segment_id, '.', uri.extra.`kitti-360.frame_id`) AS task_id,\n",
+    "#               FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, \n",
+    "#               COLLECT_LIST(point_cloud) AS point_clouds\n",
+    "#           FROM datums\n",
+    "#           WHERE \n",
+    "#               uri.topic LIKE '%cuboid%' OR uri.topic LIKE '%lidar%'\n",
+    "#           GROUP BY task_id\n",
+    "#         \"\"\")\n",
+    "        \n",
+    "        \n",
+    "#         # TODO! for lidar and camera image!\n",
+    "#         #         both_have_ego_pose = (\n",
+    "#         #             ci1.extra.get('kitti-360.has-valid-ego-pose') and\n",
+    "#         #             ci2.extra.get('kitti-360.has-valid-ego-pose'))\n",
+    "        \n",
+    "#         tasks_df = spark.sql('SELECT * FROM culi_tasks_df')\n",
+    "#         print('... done.')\n",
+    "#         return tasks_df.rdd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## NuScenes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# !pip3 install nuscenes-devkit==1.1.2\n",
+    "from psegs.datasets.nuscenes import NuscStampedDatumTableBase\n",
+    "from psegs.datasets.nuscenes import NuscStampedDatumTableLabelsAllFrames\n",
+    "\n",
+    "\n",
+    "class NuscKFOnlyLCCDFFactory(TaskLidarCuboidCameraDFFactory):\n",
+    "    \n",
+    "    SRC_SD_TABLE = NuscStampedDatumTableBase\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def build_df_for_segment(cls, spark, segment_uri):\n",
+    "        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)\n",
+    "        datum_df.registerTempTable('datums')\n",
+    "        print('Building tasks table for %s ...' % segment_uri.segment_id)\n",
+    "        \n",
+    "        # Nusc doesn't have numerical task_ids so we'll have to induce\n",
+    "        # one via lidar timestamp.\n",
+    "        # NB: for Nusc: can group by nuscenes-sample-token FOR KEYFRAMES-ONLY DATA\n",
+    "        task_data_df = spark.sql(\"\"\"\n",
+    "            SELECT \n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,\n",
+    "              MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,\n",
+    "              FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token\n",
+    "            FROM datums\n",
+    "            WHERE \n",
+    "            uri.extra.`nuscenes-is-keyframe` = 'True' AND (\n",
+    "              uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "              uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "            ) AND (\n",
+    "              uri.topic LIKE '%cuboid%' OR\n",
+    "              uri.topic LIKE '%lidar%' OR\n",
+    "              uri.topic LIKE '%camera%'\n",
+    "            )\n",
+    "            GROUP BY uri.extra.`nuscenes-sample-token`\n",
+    "            ORDER BY lidar_time\n",
+    "        \"\"\")\n",
+    "        sample_tokens_ordered = [r.sample_token for r in task_data_df.select('sample_token').collect()]\n",
+    "        task_to_stoken = [\n",
+    "            {'task_id': task_id, 'sample_token': sample_token}\n",
+    "            for task_id, sample_token in enumerate(sample_tokens_ordered)\n",
+    "        ]\n",
+    "        task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)\n",
+    "        task_id_df = spark.createDataFrame(task_id_rdd)\n",
+    "        tasks_df = task_data_df.join(task_id_df, on=['sample_token'], how='inner')\n",
+    "        tasks_df = tasks_df.persist()\n",
+    "        print('... done.')\n",
+    "        return tasks_df\n",
+    "\n",
+    "\n",
+    "class NuscAllFramesLCCDFFactory(TaskLidarCuboidCameraDFFactory):\n",
+    "    \n",
+    "    SRC_SD_TABLE = NuscStampedDatumTableLabelsAllFrames\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def build_df_for_segment(cls, spark, segment_uri):\n",
+    "        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)\n",
+    "        datum_df.registerTempTable('datums')\n",
+    "        print('Building tasks table for %s ...' % segment_uri.segment_id)\n",
+    "        \n",
+    "        task_data_df = spark.sql(\"\"\"\n",
+    "            SELECT \n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,\n",
+    "              MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,\n",
+    "              FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token\n",
+    "            FROM datums\n",
+    "            WHERE \n",
+    "            (\n",
+    "              uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "              uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "            ) AND (\n",
+    "              uri.topic LIKE '%cuboid%' OR\n",
+    "              uri.topic LIKE '%lidar%' OR\n",
+    "              uri.topic LIKE '%camera%'\n",
+    "            )\n",
+    "            GROUP BY uri.extra.`nuscenes-sample-token`\n",
+    "            ORDER BY lidar_time\n",
+    "        \"\"\")\n",
+    "        sample_tokens_ordered = [r.sample_token for r in task_data_df.select('sample_token').collect()]\n",
+    "        task_to_stoken = [\n",
+    "            {'task_id': task_id, 'sample_token': sample_token}\n",
+    "            for task_id, sample_token in enumerate(sample_tokens_ordered)\n",
+    "        ]\n",
+    "        task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)\n",
+    "        task_id_df = spark.createDataFrame(task_id_rdd)\n",
+    "        tasks_df = task_data_df.join(task_id_df, on=['sample_token'], how='inner')\n",
+    "        tasks_df = tasks_df.persist()\n",
+    "        print('... done.')\n",
+    "        return tasks_df\n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "#         # Nusc doesn't have numerical task_ids so we'll have to induce\n",
+    "#         # one via lidar timestamp.\n",
+    "#         task_data_df = spark.sql(\"\"\"\n",
+    "#             SELECT \n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,\n",
+    "#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,\n",
+    "#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token\n",
+    "#             FROM datums\n",
+    "#             WHERE \n",
+    "#              (\n",
+    "#                uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#                uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#              ) AND (\n",
+    "#                uri.topic LIKE '%cuboid%' OR\n",
+    "#                uri.topic LIKE '%lidar%' OR\n",
+    "#                uri.topic LIKE '%camera%'\n",
+    "#              )\n",
+    "            \n",
+    "#            SELECT \n",
+    "#                CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,\n",
+    "#                FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, \n",
+    "#                COLLECT_LIST(point_cloud) AS point_clouds\n",
+    "#            FROM datums\n",
+    "           \n",
+    "#            WHERE \n",
+    "#              (\n",
+    "#                uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#                uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#              ) AND (\n",
+    "#                uri.topic LIKE '%cuboid%' OR\n",
+    "#                uri.topic LIKE '%lidar%'\n",
+    "#              )\n",
+    "#            GROUP BY task_id\n",
+    "#            HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0\n",
+    "#          \"\"\")\n",
+    "#         # #             spark.sql(\"\"\"\n",
+    "#         # #               CACHE TABLE culi_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "#         # #               SELECT \n",
+    "#         # #                   CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,\n",
+    "#         # #                   FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, \n",
+    "#         # #                   COLLECT_LIST(point_cloud) AS point_clouds\n",
+    "#         # #               FROM datums\n",
+    "#         # #               WHERE \n",
+    "#         # #                 (\n",
+    "#         # #                   uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#         # #                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#         # #                 ) AND (\n",
+    "#         # #                   uri.topic LIKE '%cuboid%' OR\n",
+    "#         # #                   uri.topic LIKE '%lidar%'\n",
+    "#         # #                 )\n",
+    "#         # #               GROUP BY task_id\n",
+    "#         # #               HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0\n",
+    "#         # #             \"\"\")\n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "#         task_data_df = spark.sql(\"\"\"\n",
+    "#             SELECT \n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,\n",
+    "#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,\n",
+    "#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token\n",
+    "#             FROM datums\n",
+    "#             WHERE \n",
+    "#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (\n",
+    "#               uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#             ) AND (\n",
+    "#               uri.topic LIKE '%cuboid%' OR\n",
+    "#               uri.topic LIKE '%lidar%' OR\n",
+    "#               uri.topic LIKE '%camera%'\n",
+    "#             )\n",
+    "#             GROUP BY uri.extra.`nuscenes-sample-token`\n",
+    "#             ORDER BY lidar_time\n",
+    "#         \"\"\")\n",
+    "#         sample_tokens_ordered = [r.sample_token for r in task_data_df.select('sample_token').collect()]\n",
+    "#         task_to_stoken = [\n",
+    "#             {'task_id': task_id, 'sample_token': sample_token}\n",
+    "#             for task_id, sample_token in enumerate(sample_tokens_ordered)\n",
+    "#         ]\n",
+    "#         task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)\n",
+    "#         task_id_df = spark.createDataFrame(task_id_rdd)\n",
+    "#         tasks_df = task_data_df.join(task_id_df, on=['sample_token'], how='inner')\n",
+    "#         tasks_df = tasks_df.persist()\n",
+    "#         print('... done.')\n",
+    "#         return tasks_df\n",
+    "    \n",
+    "class NuscWorldCloudTableBase(FusedLidarCloudTableBase):\n",
+    "    SPLITS = ['train_detect', 'train_track']\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _filter_ego_vehicle(cls, cloud_ego):\n",
+    "        # Note: NuScenes authors have already corrected clouds for ego motion:\n",
+    "        # https://github.com/nutonomy/nuscenes-devkit/issues/481#issuecomment-716250423\n",
+    "        # But have not filtered out ego self-returns\n",
+    "        cloud_ego = cloud_ego[np.where(  ~(\n",
+    "                        (cloud_ego[:, 0] <= 1.5) & (cloud_ego[:, 0] >= -1.5) &  # Nusc lidar +x is +right\n",
+    "                        (cloud_ego[:, 1] <= 2.5) & (cloud_ego[:, 0] >= -2.5) &  # Nusc lidar +y is +forward\n",
+    "                        (cloud_ego[:, 1] <= 1.5) & (cloud_ego[:, 0] >= -1.5)    # Nusc lidar +z is +up\n",
+    "        ))]\n",
+    "        return cloud_ego\n",
+    "    \n",
+    "class NuscKFOnlyFusedWorldCloudTable(NuscWorldCloudTableBase):\n",
+    "    TASK_DF_FACTORY = NuscKFOnlyLCCDFFactory\n",
+    "\n",
+    "class NuscAllFramesFusedWorldCloudTable(NuscWorldCloudTableBase):\n",
+    "    TASK_DF_FACTORY = NuscAllFramesLCCDFFactory\n",
+    "    \n",
+    "    \n",
+    "#     task_id=int(scan_id),\n",
+    "#                     pc_sds=point_clouds,\n",
+    "#                     cuboids_sds=[], # SemanticKITTI has no cuboids\n",
+    "#                     ci_sds=camera_images\n",
+    "#     @classmethod\n",
+    "#     def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):\n",
+    "#         datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)\n",
+    "#         datum_df.registerTempTable('datums')\n",
+    "#         spark.catalog.dropTempView('nusc_task_df')\n",
+    "#         print('Building tasks table for %s ...' % segment_uri.segment_id)\n",
+    "        \n",
+    "#         # Nusc doesn't have numerical task_ids so we'll have to induce\n",
+    "#         # one via lidar timestamp.\n",
+    "#         if cls.SRC_SD_TABLE.LABELS_KEYFRAMES_ONLY:\n",
+    "#             # For Nusc: group by nuscenes-sample-token WITH KEYFRAMES\n",
+    "#             spark.sql(\"\"\"\n",
+    "#               CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "#               SELECT \n",
+    "#                   MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS task_id,\n",
+    "#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%cam%') AS ci_sds\n",
+    "#               FROM datums\n",
+    "#               WHERE \n",
+    "#                 uri.extra.`nuscenes-is-keyframe` = 'True' AND (\n",
+    "#                   uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%' OR\n",
+    "#                   uri.extra['nuscenes-label-channel'] LIKE '%CAM%'\n",
+    "#                 ) AND (\n",
+    "#                   uri.topic LIKE '%cuboid%' OR\n",
+    "#                   uri.topic LIKE '%lidar%' OR\n",
+    "#                   uri.topic LIKE '%cam%'\n",
+    "#                 )\n",
+    "#               GROUP BY task_id\n",
+    "#             \"\"\")\n",
+    "#         else:\n",
+    "#             # For Nusc: group by nuscenes-sample-token WITH ALL FRAMES\n",
+    "#             spark.sql(\"\"\"\n",
+    "#               CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "#               SELECT \n",
+    "#                   CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,\n",
+    "#                   FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, \n",
+    "#                   COLLECT_LIST(point_cloud) AS point_clouds\n",
+    "#               FROM datums\n",
+    "#               WHERE \n",
+    "#                 (\n",
+    "#                   uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#                 ) AND (\n",
+    "#                   uri.topic LIKE '%cuboid%' OR\n",
+    "#                   uri.topic LIKE '%lidar%'\n",
+    "#                 )\n",
+    "#               GROUP BY task_id\n",
+    "#               HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0\n",
+    "#             \"\"\")\n",
+    "# #             spark.sql(\"\"\"\n",
+    "# #               CACHE TABLE culi_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "# #               SELECT \n",
+    "# #                   CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,\n",
+    "# #                   FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, \n",
+    "# #                   COLLECT_LIST(point_cloud) AS point_clouds\n",
+    "# #               FROM datums\n",
+    "# #               WHERE \n",
+    "# #                 (\n",
+    "# #                   uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "# #                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "# #                 ) AND (\n",
+    "# #                   uri.topic LIKE '%cuboid%' OR\n",
+    "# #                   uri.topic LIKE '%lidar%'\n",
+    "# #                 )\n",
+    "# #               GROUP BY task_id\n",
+    "# #               HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0\n",
+    "# #             \"\"\")\n",
+    "        \n",
+    "#         tasks_df = spark.sql('SELECT * FROM nusc_task_df')\n",
+    "#         print('... done.')\n",
+    "#         return tasks_df.rdd\n",
+    "\n",
+    "# class NuscFusedWorldCloudKeyframesOnlyTable(NuscFusedWorldCloudTableBase):\n",
+    "#     SRC_SD_TABLE = NuscStampedDatumTableBase\n",
+    "\n",
+    "# class NuscFusedWorldCloudAllFramesTable(NuscFusedWorldCloudTableBase):\n",
+    "#     SRC_SD_TABLE = NuscStampedDatumTableLabelsAllFrames\n",
+    "    \n",
+    "class NuscKeyframesOFlowRenderer(OpticalFlowRenderBase):\n",
+    "    FUSED_LIDAR_SD_TABLE = NuscKFOnlyFusedWorldCloudTable\n",
+    "\n",
+    "class NuscAllFramesOFlowRenderer(OpticalFlowRenderBase):\n",
+    "    FUSED_LIDAR_SD_TABLE = NuscAllFramesFusedWorldCloudTable"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Start Spark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-02-23 07:03:01,363\toarph 328266 : Using source root /opt/psegs/psegs \n",
+      "2021-02-23 07:03:01,363\toarph 328266 : Using source root /opt/psegs \n",
+      "2021-02-23 07:03:01,446\toarph 328266 : Generating egg to /tmp/tmp7bai_og5_oarphpy_eggbuild ...\n",
+      "2021-02-23 07:03:01,554\toarph 328266 : ... done.  Egg at /tmp/tmp7bai_og5_oarphpy_eggbuild/psegs-0.0.0-py3.8.egg\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psegs.spark import NBSpark\n",
+    "spark = NBSpark.getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build Fused Lidar Assets\n",
+    "\n",
+    "```\n",
+    "docker --context default run -it --name=potree_viewer --rm --net=host -v `pwd`:/shared  jonazpiazu/potree\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# T = KITTI360OurFusedWorldCloudTable\n",
+    "# rdds = T._create_datum_rdds(spark)\n",
+    "# print([r.count() for r in rdds])\n",
+    "\n",
+    "# seg_uris = T.get_all_segment_uris()\n",
+    "# samp = T.get_sample(seg_uris[0], spark=spark)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# print([lc.sensor_name for lc in samp.lidar_clouds][:10])\n",
+    "# c = samp.lidar_clouds[0]#[lc for lc in samp.lidar_clouds if lc.sensor_name == '11002'][0]\n",
+    "# print(c.get_cloud().shape)\n",
+    "# imshow(c.get_bev_debug_image(x_bounds_meters=None, y_bounds_meters=None))\n",
+    "# imshow(c.get_front_rv_debug_image(y_bounds_meters=None, z_bounds_meters=None))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compute Candidate Optical Flow Pairs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Render Optical Flow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-02-23 10:46:44,720\tps   328266 : Creating datums for KITTI-360 ...\n",
+      "2021-02-23 10:46:44,722\tps   328266 : Filtering to only 1 segments\n",
+      "2021-02-23 10:46:52,801\tps   328266 : ... seq 2013_05_28_drive_0000_sync has 101669 URIs spanning 1204 sec, creating 397 slices ...\n",
+      "2021-02-23 10:46:53,323\tps   328266 : ... partitioned datums into 1 RDDs.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building tasks table for 2013_05_28_drive_0000_sync ...\n",
+      "... done.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-02-23 10:49:55,647\tps   328266 : Filtering to only 1 segments\n",
+      "2021-02-23 10:49:55,648\tps   328266 : KITTI360WorldCloudTableBase building fused clouds ...\n",
+      "2021-02-23 10:49:55,648\tps   328266 : ... have 1 segments to fuse ...\n",
+      "2021-02-23 10:49:55,648\tps   328266 : ... working on 2013_05_28_drive_0000_sync ...\n",
+      "2021-02-23 10:49:55,667\tps   328266 : ... skipping 2013_05_28_drive_0000_sync; world and obj clouds done\n",
+      "2021-02-23 10:49:55,668\tps   328266 : World Cloud: /opt/psegs/dataroot/fused_world_clouds/naive_cuboid_scrubber/kitti-360/train/2013_05_28_drive_0000_sync/fused_world.ply\n",
+      "2021-02-23 10:49:55,668\tps   328266 : Obj Clouds: /opt/psegs/dataroot/fused_obj_clouds/naive_cuboid_scrubber/kitti-360/train/2013_05_28_drive_0000_sync\n",
+      "2021-02-23 10:49:55,670\toarph 328266 : Progress for \n",
+      "FuseEachSegment [Pid:328266 Id:139868735179264]\n",
+      "-----------------------  ---------------\n",
+      "Thruput\n",
+      "N thru                   1 (of 1)\n",
+      "N chunks                 1\n",
+      "Total time               0.02 seconds\n",
+      "Total thru               0 bytes\n",
+      "Rate                     0.0 bytes / sec\n",
+      "Hz                       50\n",
+      "Progress\n",
+      "Percent Complete         100.000000\n",
+      "Est. Time To Completion  0 seconds\n",
+      "-----------------------  ---------------\n",
+      "2021-02-23 10:49:55,671\tps   328266 : ... KITTI360WorldCloudTableBase done fusing clouds.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "num tasks 11497\n",
+      "restrict to 2\n",
+      "oflow_task_df 2\n",
+      "coalesc to  1\n",
+      "saved to /tmp/oflow_out/oflow_0.pkl\n",
+      "saved to /tmp/oflow_out/oflow_1.pkl\n",
+      "saved to /tmp/oflow_out/oflow_2.pkl\n",
+      "saved to /tmp/oflow_out/oflow_3.pkl\n"
+     ]
+    }
+   ],
+   "source": [
+    "# R = NuscKeyframesOFlowRenderer\n",
+    "# R.MAX_TASKS_PER_SEGMENT = 5\n",
+    "# R = SemanticKITTIOFlowRenderer\n",
+    "# R.MAX_TASKS_PER_SEGMENT = 5\n",
+    "R = KITTI360OFlowRenderer\n",
+    "# R.MAX_TASKS_PER_SEGMENT = 2\n",
+    "seg_uris = R.FUSED_LIDAR_SD_TABLE.get_all_segment_uris()\n",
+    "R.build(spark=spark, only_segments=[seg_uris[0]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# assert False, spark.sql(\"\"\"select uri.topic t from datums group by t\"\"\").show(truncate=False)\n",
+    "\n",
+    "# spark.sql(\"\"\"\n",
+    "#           CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS\n",
+    "#           SELECT \n",
+    "#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS task_id,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) FILTER (WHERE uri.topic LIKE '%cam%') AS ci_sds\n",
+    "#           FROM datums\n",
+    "#           WHERE \n",
+    "#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (\n",
+    "#               uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#             ) AND (\n",
+    "#               uri.topic LIKE '%cuboid%' OR\n",
+    "#               uri.topic LIKE '%lidar%' OR\n",
+    "#               uri.topic LIKE '%camera%'\n",
+    "#             )\n",
+    "#           GROUP BY uri.extra.`nuscenes-sample-token`\n",
+    "#         \"\"\").show()\n",
+    "# df = spark.sql(\"\"\"\n",
+    "#           SELECT \n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,\n",
+    "#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) \n",
+    "#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,\n",
+    "#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,\n",
+    "#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token\n",
+    "#           FROM datums\n",
+    "#           WHERE \n",
+    "#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (\n",
+    "#               uri.extra['nuscenes-label-channel'] is NULL OR \n",
+    "#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'\n",
+    "#             ) AND (\n",
+    "#               uri.topic LIKE '%cuboid%' OR\n",
+    "#               uri.topic LIKE '%lidar%' OR\n",
+    "#               uri.topic LIKE '%camera%'\n",
+    "#             )\n",
+    "#           GROUP BY uri.extra.`nuscenes-sample-token`\n",
+    "#           ORDER BY lidar_time\n",
+    "#         \"\"\")\n",
+    "# sample_tokens_ordered = [r.sample_token for r in df.select('sample_token').collect()]\n",
+    "# task_to_stoken = [\n",
+    "#     {'task_id': task_id, 'sample_token': sample_token}\n",
+    "#     for task_id, sample_token in enumerate(sample_tokens_ordered)\n",
+    "# ]\n",
+    "# task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)\n",
+    "# task_id_df = spark.createDataFrame(task_id_rdd)\n",
+    "# df.join(task_id_df, on=['sample_token'], how='inner').orderBy('task_id').show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/ios_lidar_explory.ipynb b/notebooks/ios_lidar_explory.ipynb
new file mode 100644
index 0000000..415788d
--- /dev/null
+++ b/notebooks/ios_lidar_explory.ipynb
@@ -0,0 +1,5973 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "149\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CameraImage(sensor_name='camera_front', image_jpeg=bytearray(b''), image_png=bytearray(b''), image_factory=CloudpickeledCallable(_func_pyclass=psegs.datasets.ios_lidar.<lambda>), width=1920, height=1440, timestamp=853720759521708, ego_pose=Transform(rotation=array([[-0.16402921,  0.36367464, -0.91697067],\n",
+       "       [-0.04168281,  0.92617565,  0.37478161],\n",
+       "       [ 0.98557436,  0.09969707, -0.13676088]]), translation=array([[-2.92258954],\n",
+       "       [ 0.93101001],\n",
+       "       [ 0.19710636]]), src_frame='ego', dest_frame='world'), ego_to_sensor=Transform(rotation=array([[ 1,  0,  0],\n",
+       "       [ 0, -1,  0],\n",
+       "       [ 0,  0, -1]]), translation=array([[0.],\n",
+       "       [0.],\n",
+       "       [0.]]), src_frame='camera_front', dest_frame='ego'), K=array([[1.44565613e+03, 0.00000000e+00, 9.69324890e+02],\n",
+       "       [0.00000000e+00, 1.44565613e+03, 6.90660400e+02],\n",
+       "       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]), extra={'threeDScannerApp.cameraGrain': '0', 'threeDScannerApp.time': '853720.7595217085', 'threeDScannerApp.frame_index': '0', 'threeDScannerApp.motionQuality': '1', 'threeDScannerApp.projectionMatrix': '[1.5058917999267578, 0, -0.010234236717224121, 0, 0, 2.0078556537628174, -0.040055036544799805, 0, 0, 0, -0.9999997615814209, -0.0009999998146668077, 0, 0, -1, 0]', 'threeDScannerApp.intrinsics': '[1445.6561279296875, 0, 969.3248901367188, 0, 1445.6561279296875, 690.660400390625, 0, 0, 1]', 'threeDScannerApp.averageVelocity': '0', 'threeDScannerApp.averageAngularVelocity': '0', 'threeDScannerApp.exposureDuration': '0.016393441706895828'})"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "from psegs.datasets import ios_lidar\n",
+    "\n",
+    "base_dir = '/outer_root/home/au/lidarphone_scans/2021_06_27_12_37_38'\n",
+    "# base_dir = '/outer_root/home/au/lidarphone_scans/landscape_home_button_right_07_09_49'\n",
+    "\n",
+    "from oarphpy import util as oputil\n",
+    "json_paths = oputil.all_files_recursive(base_dir, pattern='frame*.json')\n",
+    "json_paths = sorted(json_paths)\n",
+    "cis = [ios_lidar.threeDScannerApp_create_camera_image(p) for p in json_paths]\n",
+    "\n",
+    "print(len(cis))\n",
+    "cis[0]\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from psegs.mesh2rgb import pytorch3d_camera_images_to_rgbd_debug\n",
+    "\n",
+    "# mesh_path = os.path.join(base_dir, 'export_refined.obj')\n",
+    "# outpath = os.path.join(base_dir, 'pytorch3d_rgbd_debug.mp4')\n",
+    "\n",
+    "# pytorch3d_camera_images_to_rgbd_debug(cis, )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from plotly.offline import init_notebook_mode, iplot\n",
+    "from plotly.graph_objs import *\n",
+    "\n",
+    "init_notebook_mode(connected=False)         # initiate notebook for offline plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import open3d as o3d\n",
+    "mesh = o3d.io.read_triangle_mesh(os.path.join(base_dir, 'export_refined.obj'))\n",
+    "vertices = np.asarray(mesh.vertices)\n",
+    "print(vertices.shape)\n",
+    "\n",
+    "import numpy as np\n",
+    "v_sub = vertices[np.random.choice(vertices.shape[0], 40000, replace=False)]\n",
+    "print(v_sub.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import plotly\n",
+    "import plotly.graph_objects as go\n",
+    "\n",
+    "plots = [ci.to_plotly_world_frame_3d() for ci in cis[:1]]\n",
+    "\n",
+    "import pandas as pd\n",
+    "cloud_df = pd.DataFrame(v_sub, columns=['x', 'y', 'z'])\n",
+    "from psegs.util.plotting import rgb_for_distance\n",
+    "cloud_df['color'] = [\n",
+    "  rgb_for_distance(np.linalg.norm(pt), period_meters=1.)\n",
+    "  for pt in cloud_df[['x', 'y', 'z']].values\n",
+    "]\n",
+    "scatter = go.Scatter3d(\n",
+    "                x=cloud_df['x'], y=cloud_df['y'], z=cloud_df['z'],\n",
+    "                mode='markers',\n",
+    "                marker=dict(size=2, color=cloud_df['color'], opacity=0.5),)\n",
+    "\n",
+    "plots.append(scatter)\n",
+    "\n",
+    "\n",
+    "fig = go.Figure(data=plots)\n",
+    "\n",
+    "fig.update_layout(\n",
+    "  width=1000, height=700,\n",
+    "  scene_aspectmode='data')\n",
+    "  # scene_camera=dict(\n",
+    "  #   up=dict(x=0, y=0, z=1),\n",
+    "  #   eye=dict(x=0, y=0, z=0),\n",
+    "  #   center=dict(x=1, y=0, z=0),\n",
+    "  # ))\n",
+    "    \n",
+    "iplot(fig)\n",
+    "    \n",
+    "    \n",
+    "# plot_str = plotly.offline.plot(fig, output_type='div')\n",
+    "\n",
+    "# html += '<br/><br/>' + plot_str\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert False\n",
+    "# # https://chart-studio.plotly.com/~empet/15040/plotly-mesh3d-from-a-wavefront-obj-f/#/\n",
+    "\n",
+    "\n",
+    "# def obj_data_to_mesh3d(odata):\n",
+    "#     # odata is the string read from an obj file\n",
+    "#     vertices = []\n",
+    "#     faces = []\n",
+    "#     lines = odata.splitlines()   \n",
+    "   \n",
+    "#     for line in lines:\n",
+    "#         slist = line.split()\n",
+    "#         if slist:\n",
+    "#             if slist[0] == 'v':\n",
+    "#                 vertex = np.array(slist[1:], dtype=float)\n",
+    "#                 vertices.append(vertex)\n",
+    "#             elif slist[0] == 'f':\n",
+    "#                 face = []\n",
+    "#                 for k in range(1, len(slist)):\n",
+    "#                     face.append([int(s) for s in slist[k].replace('//','/').split('/')])\n",
+    "#                 if len(face) > 3: # triangulate the n-polyonal face, n>3\n",
+    "#                     faces.extend([[face[0][0]-1, face[k][0]-1, face[k+1][0]-1] for k in range(1, len(face)-1)])\n",
+    "#                 else:    \n",
+    "#                     faces.append([face[j][0]-1 for j in range(len(face))])\n",
+    "#             else: pass\n",
+    "    \n",
+    "    \n",
+    "#     return np.array(vertices), np.array(faces)\n",
+    "\n",
+    "# import os\n",
+    "# with open(os.path.join(base_dir, 'export_refined.obj'), 'rb') as f:\n",
+    "#     obj_data = f.read().decode('utf-8')\n",
+    "# vertices, faces = obj_data_to_mesh3d(obj_data)\n",
+    "# print('vertices.shape', vertices.shape)\n",
+    "# print('faces.shape', faces.shape)\n",
+    "\n",
+    "\n",
+    "# x, y, z = vertices[:,:3].T\n",
+    "# I, J, K = faces.T\n",
+    "\n",
+    "# mesh = go.Mesh3d(\n",
+    "#             x=-x,\n",
+    "#             y=-y,\n",
+    "#             z=z,\n",
+    "# #             vertexcolor=vertices[:, 3:], #the color codes must be triplets of floats  in [0,1]!!                      \n",
+    "#             i=I,\n",
+    "#             j=J,\n",
+    "#             k=K,\n",
+    "#             name='',\n",
+    "#             showscale=False)\n",
+    "\n",
+    "# layout = go.Layout(width=900,\n",
+    "#                    height=800,\n",
+    "#                    scene=dict(xaxis=dict(visible=False),\n",
+    "#                               yaxis=dict(visible=False),  \n",
+    "#                               zaxis=dict(visible=False), \n",
+    "#                               aspectratio=dict(x=1.5,\n",
+    "#                                                y=0.9,\n",
+    "#                                                z=0.5\n",
+    "#                                          ),\n",
+    "#                               camera=dict(eye=dict(x=1., y=1., z=0.5)),\n",
+    "#                         ),\n",
+    "#                   ) \n",
+    "\n",
+    "# fig = go.Figure(data=[mesh], layout=layout)\n",
+    "# iplot(fig)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import open3d as o3d\n",
+    "# mesh = o3d.io.read_triangle_mesh(os.path.join(base_dir, 'export_refined.obj'))\n",
+    "# vertices = np.asarray(mesh.vertices)\n",
+    "# print(vertices.shape)\n",
+    "\n",
+    "# import numpy as np\n",
+    "# v_sub = vertices[np.random.choice(vertices.shape[0], 10000, replace=False)]\n",
+    "# print(v_sub.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "cloud_raw = vertices\n",
+    "\n",
+    "import imageio\n",
+    "writer = imageio.get_writer('/outer_root/home/au/lidarphone_scans/test_video.mp4', fps=5)\n",
+    "\n",
+    "for i in range(len(cis)):\n",
+    "    cloud_ego = cis[i].ego_pose.get_inverse().apply(cloud_raw).T\n",
+    "\n",
+    "    from psegs import datum\n",
+    "    pc = datum.PointCloud(cloud=cloud_ego)\n",
+    "\n",
+    "    debug = cis[i].get_debug_image(clouds=[pc], period_meters=0.1)\n",
+    "    writer.append_data(debug)\n",
+    "    print(i)\n",
+    "writer.close()\n",
+    "\n",
+    "#     from io import BytesIO\n",
+    "#     import IPython.display\n",
+    "#     import numpy as np\n",
+    "#     import PIL.Image\n",
+    "#     def showarray(a, fmt='png'):\n",
+    "#         a = np.uint8(a)\n",
+    "#         f = BytesIO()\n",
+    "#         PIL.Image.fromarray(a).save(f, fmt)\n",
+    "#         IPython.display.display(IPython.display.Image(data=f.getvalue()))\n",
+    "\n",
+    "#     showarray(debug)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "ROOT = '/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/'\n",
+    "\n",
+    "for d in os.listdir(ROOT):\n",
+    "    if '.DS_Store' in d:\n",
+    "        continue\n",
+    "    base_dir = os.path.join(ROOT, d)\n",
+    "    print(base_dir)\n",
+    "    \n",
+    "\n",
+    "    from psegs.datasets import ios_lidar\n",
+    "\n",
+    "\n",
+    "    from oarphpy import util as oputil\n",
+    "    json_paths = oputil.all_files_recursive(base_dir, pattern='frame*.json')\n",
+    "    json_paths = sorted(json_paths)\n",
+    "    \n",
+    "    try:\n",
+    "        cis = [ios_lidar.threeDScannerApp_create_camera_image(p) for p in json_paths]\n",
+    "    except AssertionError as e:\n",
+    "        continue\n",
+    "\n",
+    "    print(len(cis))\n",
+    "    \n",
+    "    import os\n",
+    "    import numpy as np\n",
+    "    import open3d as o3d\n",
+    "    mesh = o3d.io.read_triangle_mesh(os.path.join(base_dir, 'export.obj'))\n",
+    "    vertices = np.asarray(mesh.vertices)\n",
+    "    print(vertices.shape)\n",
+    "\n",
+    "    cloud_raw = vertices\n",
+    "\n",
+    "    outpath = os.path.join(ROOT, d + '.mp4')\n",
+    "    \n",
+    "    import imageio\n",
+    "    writer = imageio.get_writer(outpath, fps=5)\n",
+    "\n",
+    "    for i in range(len(cis)):\n",
+    "        cloud_ego = cis[i].ego_pose.get_inverse().apply(cloud_raw).T\n",
+    "        cloud_ego[:, 0] *= -1\n",
+    "\n",
+    "        from psegs import datum\n",
+    "        pc = datum.PointCloud(cloud=cloud_ego)\n",
+    "\n",
+    "        debug = cis[i].get_debug_image(clouds=[pc], period_meters=0.1)\n",
+    "        writer.append_data(debug)\n",
+    "        print(i)\n",
+    "    writer.close()\n",
+    "    \n",
+    "    print('done', outpath)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # !apt-get install -y python3-pybind11\n",
+    "# # !cd /opt && git clone https://github.com/NVIDIA/cub\n",
+    "# # !CUB_HOME=/opt/cub pip3 install \"git+https://github.com/facebookresearch/pytorch3d.git@stable\"\n",
+    "\n",
+    "# # import os\n",
+    "# # !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz\n",
+    "# # !tar xzf 1.10.0.tar.gz\n",
+    "# # os.environ[\"CUB_HOME\"] = os.getcwd() + \"/cub-1.10.0\"\n",
+    "# # !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'`\n",
+    "\n",
+    "\n",
+    "# # !pip3 install torch torchvision torchaudio\n",
+    "# # !pip3 install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu102_pyt190/download.html\n",
+    "\n",
+    "# import os\n",
+    "# import sys\n",
+    "# import torch\n",
+    "\n",
+    "# import pytorch3d\n",
+    "\n",
+    "# import os\n",
+    "# import torch\n",
+    "# import matplotlib.pyplot as plt\n",
+    "\n",
+    "# from pytorch3d.utils import ico_sphere\n",
+    "# import numpy as np\n",
+    "# from tqdm.notebook import tqdm\n",
+    "\n",
+    "# # Util function for loading meshes\n",
+    "# from pytorch3d.io import load_objs_as_meshes, save_obj\n",
+    "\n",
+    "# from pytorch3d.loss import (\n",
+    "#     chamfer_distance, \n",
+    "#     mesh_edge_loss, \n",
+    "#     mesh_laplacian_smoothing, \n",
+    "#     mesh_normal_consistency,\n",
+    "# )\n",
+    "\n",
+    "# # Data structures and functions for rendering\n",
+    "# from pytorch3d.structures import Meshes\n",
+    "# from pytorch3d.renderer import (\n",
+    "#     look_at_view_transform,\n",
+    "#     OpenGLPerspectiveCameras, \n",
+    "#     PointLights, \n",
+    "#     DirectionalLights, \n",
+    "#     Materials, \n",
+    "#     RasterizationSettings, \n",
+    "#     MeshRenderer, \n",
+    "#     MeshRasterizer,  \n",
+    "#     SoftPhongShader,\n",
+    "#     SoftSilhouetteShader,\n",
+    "#     SoftPhongShader,\n",
+    "#     TexturesVertex\n",
+    "# )\n",
+    "\n",
+    "# # add path for demo utils functions \n",
+    "# import sys\n",
+    "# import os\n",
+    "# sys.path.append(os.path.abspath(''))\n",
+    "\n",
+    "# !wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py\n",
+    "# from plot_image_grid import image_grid\n",
+    "\n",
+    "# !mkdir -p data/cow_mesh\n",
+    "# !wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
+    "# !wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
+    "# !wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png\n",
+    "    \n",
+    "\n",
+    "    \n",
+    "# if torch.cuda.is_available():\n",
+    "#     device = torch.device(\"cuda:0\")\n",
+    "#     torch.cuda.set_device(device)\n",
+    "# else:\n",
+    "#     device = torch.device(\"cpu\")\n",
+    "\n",
+    "# # Set paths\n",
+    "# DATA_DIR = \"./data\"\n",
+    "# obj_filename = os.path.join(DATA_DIR, \"cow_mesh/cow.obj\")\n",
+    "\n",
+    "# # Load obj file\n",
+    "# mesh = load_objs_as_meshes([obj_filename], device=device)\n",
+    "\n",
+    "# # We scale normalize and center the target mesh to fit in a sphere of radius 1 \n",
+    "# # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh \n",
+    "# # to its original center and scale.  Note that normalizing the target mesh, \n",
+    "# # speeds up the optimization but is not necessary!\n",
+    "# verts = mesh.verts_packed()\n",
+    "# N = verts.shape[0]\n",
+    "# center = verts.mean(0)\n",
+    "# scale = max((verts - center).abs().max(0)[0])\n",
+    "# mesh.offset_verts_(-center)\n",
+    "# mesh.scale_verts_((1.0 / float(scale)))\n",
+    "\n",
+    "\n",
+    "# # the number of different viewpoints from which we want to render the mesh.\n",
+    "# num_views = 20\n",
+    "\n",
+    "# # Get a batch of viewing angles. \n",
+    "# elev = torch.linspace(0, 360, num_views)\n",
+    "# azim = torch.linspace(-180, 180, num_views)\n",
+    "\n",
+    "# # Place a point light in front of the object. As mentioned above, the front of \n",
+    "# # the cow is facing the -z direction. \n",
+    "# lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])\n",
+    "\n",
+    "# # Initialize an OpenGL perspective camera that represents a batch of different \n",
+    "# # viewing angles. All the cameras helper methods support mixed type inputs and \n",
+    "# # broadcasting. So we can view the camera from the a distance of dist=2.7, and \n",
+    "# # then specify elevation and azimuth angles for each viewpoint as tensors. \n",
+    "# R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)\n",
+    "# cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# # We arbitrarily choose one particular view that will be used to visualize \n",
+    "# # results\n",
+    "# camera = OpenGLPerspectiveCameras(device=device, R=R[None, 1, ...], \n",
+    "#                                   T=T[None, 1, ...]) \n",
+    "\n",
+    "# # Define the settings for rasterization and shading. Here we set the output \n",
+    "# # image to be of size 128X128. As we are rendering images for visualization \n",
+    "# # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to \n",
+    "# # rasterize_meshes.py for explanations of these parameters.  We also leave \n",
+    "# # bin_size and max_faces_per_bin to their default values of None, which sets \n",
+    "# # their values using heuristics and ensures that the faster coarse-to-fine \n",
+    "# # rasterization method is used.  Refer to docs/notes/renderer.md for an \n",
+    "# # explanation of the difference between naive and coarse-to-fine rasterization. \n",
+    "# raster_settings = RasterizationSettings(\n",
+    "#     image_size=128, \n",
+    "#     blur_radius=0.0, \n",
+    "#     faces_per_pixel=1, \n",
+    "# )\n",
+    "\n",
+    "# # Create a Phong renderer by composing a rasterizer and a shader. The textured \n",
+    "# # Phong shader will interpolate the texture uv coordinates for each vertex, \n",
+    "# # sample from a texture image and apply the Phong lighting model\n",
+    "# renderer = MeshRenderer(\n",
+    "#     rasterizer=MeshRasterizer(\n",
+    "#         cameras=camera, \n",
+    "#         raster_settings=raster_settings\n",
+    "#     ),\n",
+    "#     shader=SoftPhongShader(\n",
+    "#         device=device, \n",
+    "#         cameras=camera,\n",
+    "#         lights=lights\n",
+    "#     )\n",
+    "# )\n",
+    "\n",
+    "# # Create a batch of meshes by repeating the cow mesh and associated textures. \n",
+    "# # Meshes has a useful `extend` method which allows us do this very easily. \n",
+    "# # This also extends the textures. \n",
+    "# meshes = mesh.extend(num_views)\n",
+    "\n",
+    "# # Render the cow mesh from each viewing angle\n",
+    "# target_images = renderer(meshes, cameras=cameras, lights=lights)\n",
+    "\n",
+    "# # Our multi-view cow dataset will be represented by these 2 lists of tensors,\n",
+    "# # each of length num_views.\n",
+    "# target_rgb = [target_images[i, ..., :3] for i in range(num_views)]\n",
+    "# target_cameras = [OpenGLPerspectiveCameras(device=device, R=R[None, i, ...], \n",
+    "#                                            T=T[None, i, ...]) for i in range(num_views)]\n",
+    "\n",
+    "# image_grid(target_images.cpu().numpy(), rows=4, cols=5, rgb=True)\n",
+    "# plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from pytorch3d.renderer.mesh.rasterizer import MeshRasterizer\n",
+    "\n",
+    "# from torchvision.transforms import ToPILImage\n",
+    "\n",
+    "# to_img = ToPILImage()\n",
+    "\n",
+    "# rasterizer = MeshRasterizer(\n",
+    "#     cameras=cameras, \n",
+    "#     raster_settings=raster_settings\n",
+    "# )\n",
+    "\n",
+    "# fragments = rasterizer(meshes)\n",
+    "\n",
+    "# to_img(fragments.zbuf.cpu().squeeze())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import numpy as np\n",
+    "# IOS_CAM_TO_PYTORCH = np.array([\n",
+    "#     [ 0, -1,  0,  0],\n",
+    "#     [ 0,  0,  1,  0],\n",
+    "#     [-1,  0,  0,  0],\n",
+    "#     [ 0,  0,  0,  1],\n",
+    "#   ], dtype=np.float32)\n",
+    "\n",
+    "from pytorch3d.io import load_obj\n",
+    "obj_path = os.path.join(base_dir, 'export_refined.obj')\n",
+    "verts, faces_idx, _ = load_obj(obj_path)\n",
+    "faces = faces_idx.verts_idx\n",
+    "print('verts', verts.shape)\n",
+    "print('faces', faces.shape)\n",
+    "\n",
+    "nverts = verts.numpy()\n",
+    "nfaces = faces.numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "CI = cis[130]\n",
+    "\n",
+    "import numpy as np\n",
+    "# obj_path = os.path.join(base_dir, 'export_refined.obj')\n",
+    "fov_x, fov_y = CI.get_fov()\n",
+    "K = CI.K\n",
+    "height, width = CI.height, CI.width\n",
+    "pose = CI.ego_pose['ego', 'world'].get_inverse().get_transformation_matrix(homogeneous=True)\n",
+    "pose = pose.astype(np.float32)\n",
+    "K = K.astype(np.float32)\n",
+    "\n",
+    "\n",
+    "\n",
+    "import numpy as np\n",
+    "world2pytorch = np.array([\n",
+    "    [1, 0, 0, 0],\n",
+    "    [0, -1, 0, 0],\n",
+    "    [0, 0, -1, 0],\n",
+    "    [0, 0, 0, 1],\n",
+    "], dtype=np.float32)\n",
+    "\n",
+    "pose = world2pytorch @ pose\n",
+    "\n",
+    "# pose[0, 0] *= -1\n",
+    "# pose[1, 1] *= -1\n",
+    "# pose[2, 2] *= -1\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "\n",
+    "import pytorch3d\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from pytorch3d.utils import ico_sphere\n",
+    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Util function for loading meshes\n",
+    "from pytorch3d.io import load_objs_as_meshes, save_obj, load_obj\n",
+    "\n",
+    "from pytorch3d.loss import (\n",
+    "    chamfer_distance, \n",
+    "    mesh_edge_loss, \n",
+    "    mesh_laplacian_smoothing, \n",
+    "    mesh_normal_consistency,\n",
+    ")\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from pytorch3d.renderer import (\n",
+    "    look_at_view_transform,\n",
+    "    OpenGLPerspectiveCameras, \n",
+    "    PointLights, \n",
+    "    DirectionalLights, \n",
+    "    Materials, \n",
+    "    RasterizationSettings, \n",
+    "    MeshRenderer, \n",
+    "    MeshRasterizer,  \n",
+    "    SoftPhongShader,\n",
+    "    SoftSilhouetteShader,\n",
+    "    SoftPhongShader,\n",
+    "    TexturesVertex,\n",
+    "    AmbientLights\n",
+    ")\n",
+    "\n",
+    "# add path for demo utils functions \n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))\n",
+    "\n",
+    "\n",
+    "# io utils\n",
+    "from pytorch3d.io import load_obj\n",
+    "\n",
+    "# datastructures\n",
+    "from pytorch3d.structures import Meshes\n",
+    "\n",
+    "# 3D transformations functions\n",
+    "from pytorch3d.transforms import Rotate, Translate\n",
+    "\n",
+    "# rendering components\n",
+    "from pytorch3d.renderer import (\n",
+    "    FoVPerspectiveCameras, look_at_view_transform, look_at_rotation, \n",
+    "    RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,\n",
+    "    SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,\n",
+    "    HardGouraudShader, SoftGouraudShader,HardFlatShader,PerspectiveCameras,FoVOrthographicCameras,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "# device = torch.device(\"cpu\")\n",
+    "\n",
+    "# # Set paths\n",
+    "# DATA_DIR = \"./data\"\n",
+    "# obj_filename = os.path.join(DATA_DIR, \"cow_mesh/cow.obj\")\n",
+    "\n",
+    "# Load obj file\n",
+    "# mesh = load_objs_as_meshes([obj_path], device=device)\n",
+    "\n",
+    "# Load the obj and ignore the textures and materials.\n",
+    "# verts, faces_idx, _ = load_obj(obj_path)\n",
+    "# faces = faces_idx.verts_idx\n",
+    "# print('verts', verts.shape)\n",
+    "# print('faces', faces.shape)\n",
+    "import torch\n",
+    "verts = torch.from_numpy(nverts)\n",
+    "faces = torch.from_numpy(nfaces)\n",
+    "\n",
+    "# Initialize each vertex to be white in color.\n",
+    "verts_rgb = .9 * torch.ones_like(verts)[None]  # (1, V, 3)\n",
+    "textures = TexturesVertex(verts_features=verts_rgb.to(device))\n",
+    "\n",
+    "# Create a Meshes object for the teapot. Here we have only one mesh in the batch.\n",
+    "teapot_mesh = Meshes(\n",
+    "    verts=[verts.to(device)],   \n",
+    "    faces=[faces.to(device)], \n",
+    "    textures=textures,\n",
+    ")\n",
+    "print('teapot_mesh', teapot_mesh)\n",
+    "# teapot_mesh = load_objs_as_meshes([obj_path], device=device)\n",
+    "\n",
+    "\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "R = torch.from_numpy(pose[:3, :3].reshape([1, 3, 3])).to(device)\n",
+    "T = torch.from_numpy(pose[:3, 3].reshape([1, 3])).to(device)\n",
+    "\n",
+    "\n",
+    "# Select the viewpoint using spherical angles  \n",
+    "distance = 5   # distance from camera to the object\n",
+    "elevation = 50.0   # angle of elevation in degrees\n",
+    "azimuth = 0.0  # No rotation so the camera is positioned on the +Z axis. \n",
+    "\n",
+    "# Get the position of the camera based on the spherical angles\n",
+    "# R, T = look_at_view_transform(distance, elevation, azimuth, device=device)\n",
+    "print('R', R)\n",
+    "print('T', T)\n",
+    "\n",
+    "tK = np.eye(4).astype(np.float32)\n",
+    "tK[:3, :3] = K\n",
+    "\n",
+    "# # Great job pytorch3d!! \n",
+    "# # https://github.com/facebookresearch/pytorch3d/blob/103da63393d6bbb697835ddbfc86b07572ea4d0c/tests/test_camera_conversions.py#L116\n",
+    "# tK[0, 0] = 1.1 * K[0, 0]\n",
+    "# tK[1, 1] = 1.1 * K[1, 1]\n",
+    "# tK[2, 0] = 1.1 * K[2, 0]\n",
+    "# tK[2, 1] = 1.1 * K[2, 1]\n",
+    "\n",
+    "\n",
+    "tK = torch.from_numpy(tK.reshape([1, 4, 4])).to(device)\n",
+    "print('K', tK)\n",
+    "\n",
+    "image_size = torch.from_numpy(np.array([height, width]).reshape([1, 2])).to(device)\n",
+    "print('image_size', image_size)\n",
+    "\n",
+    "# https://github.com/facebookresearch/pytorch3d/issues/522\n",
+    "from pytorch3d.utils.camera_conversions import cameras_from_opencv_projection\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "cameras = cameras_from_opencv_projection(R, T, tK, image_size, device=device).cuda()\n",
+    "\n",
+    "# assert False, (cameras.R, cameras.T, cameras.get_world_to_view_transform().device)\n",
+    "# assert False, cameras.get_world_to_view_transform().device\n",
+    "# print('get_world_to_view_transform', xform.device, cameras.R, cameras.T)\n",
+    "\n",
+    "# cameras = FoVPerspectiveCameras(device=device, fov=fov_x, degrees=False)#, K=K)\n",
+    "\n",
+    "# # hack up cameras_from_opencv_projection\n",
+    "# camera_matrix = tK\n",
+    "tvec = T\n",
+    "# focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1)\n",
+    "# principal_point = camera_matrix[:, :2, 2]\n",
+    "\n",
+    "# # Retype the image_size correctly and flip to width, height.\n",
+    "# image_size_wh = image_size.to(R).flip(dims=(1,))\n",
+    "\n",
+    "# # Get the PyTorch3D focal length and principal point.\n",
+    "# focal_pytorch3d = focal_length / (0.5 * image_size_wh)\n",
+    "# p0_pytorch3d = -(principal_point / (0.5 * image_size_wh) - 1)\n",
+    "\n",
+    "# For R, T we flip x, y axes (opencv screen space has an opposite\n",
+    "# orientation of screen axes).\n",
+    "# We also transpose R (opencv multiplies points from the opposite=left side).\n",
+    "R_pytorch3d = R.clone().permute(0, 2, 1)\n",
+    "T_pytorch3d = tvec.clone()\n",
+    "R_pytorch3d[:, :, :2] *= -1\n",
+    "T_pytorch3d[:, :2] *= -1\n",
+    "# cameras = PerspectiveCameras(\n",
+    "#             device=device, R=R_pytorch3d,\n",
+    "#             T=T_pytorch3d,\n",
+    "#             focal_length=focal_pytorch3d,\n",
+    "#             principal_point=p0_pytorch3d, image_size=image_size, in_ndc=True)\n",
+    "\n",
+    "fov_x, fov_y = CI.get_fov()\n",
+    "cameras = FoVPerspectiveCameras(\n",
+    "    device=device, fov=fov_y, degrees=False, R=R_pytorch3d, T=T_pytorch3d, aspect_ratio=1.0)\n",
+    "\n",
+    "\n",
+    "# cameras = PerspectiveCameras(device=device, K=K, R=R, T=T, in_ndc=False, image_size=image_size)\n",
+    "\n",
+    "\n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=(height, width), \n",
+    "    faces_per_pixel=1, \n",
+    ")\n",
+    "lights = PointLights(\n",
+    "    device=device, \n",
+    "    location=cameras.get_world_to_view_transform().transform_points(torch.tensor([[0., 0., -1.]]).cuda()),\n",
+    ")\n",
+    "# lights = AmbientLights(device=device)\n",
+    "blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.1, 0.1, 0.1))\n",
+    "rasterizer = MeshRasterizer(\n",
+    "        cameras=cameras, \n",
+    "        raster_settings=raster_settings\n",
+    "    )\n",
+    "phong_renderer = MeshRenderer(\n",
+    "    rasterizer=rasterizer,\n",
+    "    shader=HardPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params)\n",
+    ")\n",
+    "\n",
+    "\n",
+    "image_ref = phong_renderer(meshes_world=teapot_mesh)\n",
+    "\n",
+    "\n",
+    "import torchvision.transforms.functional as F\n",
+    "import numpy as np\n",
+    "pil_img = F.to_pil_image((255.0*image_ref.cpu().numpy()[0]).astype(np.uint8))\n",
+    "\n",
+    "from IPython.display import display\n",
+    "display(pil_img)\n",
+    "\n",
+    "\n",
+    "# from pytorch3d.vis.plotly_vis import plot_batch_individually\n",
+    "# fig = plot_batch_individually([teapot_mesh, cameras])\n",
+    "\n",
+    "# from plotly.graph_objs import *\n",
+    "# fig.layout = Layout(showlegend=True)\n",
+    "# fig.show()\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# # Place a point light in front of the object. As mentioned above, the front of \n",
+    "# # the cow is facing the -z direction. \n",
+    "# lights = DirectionalLights()\n",
+    "\n",
+    "# # Initialize an OpenGL perspective camera that represents a batch of different \n",
+    "# # viewing angles. All the cameras helper methods support mixed type inputs and \n",
+    "# # broadcasting. So we can view the camera from the a distance of dist=2.7, and \n",
+    "# # then specify elevation and azimuth angles for each viewpoint as tensors. \n",
+    "\n",
+    "# # cameras = OpenGLPerspectiveCameras(device=device, R=pose[:3, :3], T=pose[:3, 3])\n",
+    "\n",
+    "# # We arbitrarily choose one particular view that will be used to visualize \n",
+    "# # results\n",
+    "# camera = OpenGLPerspectiveCameras(device=device, R=pose[:3, :3], T=pose[:3, 3])\n",
+    "\n",
+    "# # Define the settings for rasterization and shading. Here we set the output \n",
+    "# # image to be of size 128X128. As we are rendering images for visualization \n",
+    "# # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to \n",
+    "# # rasterize_meshes.py for explanations of these parameters.  We also leave \n",
+    "# # bin_size and max_faces_per_bin to their default values of None, which sets \n",
+    "# # their values using heuristics and ensures that the faster coarse-to-fine \n",
+    "# # rasterization method is used.  Refer to docs/notes/renderer.md for an \n",
+    "# # explanation of the difference between naive and coarse-to-fine rasterization. \n",
+    "# raster_settings = RasterizationSettings(\n",
+    "#     image_size=512, \n",
+    "#     blur_radius=0.0, \n",
+    "#     faces_per_pixel=1, \n",
+    "# )\n",
+    "\n",
+    "# # Create a Phong renderer by composing a rasterizer and a shader. The textured \n",
+    "# # Phong shader will interpolate the texture uv coordinates for each vertex, \n",
+    "# # sample from a texture image and apply the Phong lighting model\n",
+    "# renderer = MeshRenderer(\n",
+    "#     rasterizer=MeshRasterizer(\n",
+    "#         cameras=camera, \n",
+    "#         raster_settings=raster_settings\n",
+    "#     ),\n",
+    "#     shader=SoftPhongShader(\n",
+    "#         device=device, \n",
+    "#         cameras=camera,\n",
+    "#         lights=lights\n",
+    "#     )\n",
+    "# )\n",
+    "\n",
+    "# # # Create a batch of meshes by repeating the cow mesh and associated textures. \n",
+    "# # # Meshes has a useful `extend` method which allows us do this very easily. \n",
+    "# # # This also extends the textures. \n",
+    "# # meshes = mesh.extend(num_views)\n",
+    "\n",
+    "# # Render the cow mesh from each viewing angle\n",
+    "# target_images = renderer([mesh], cameras=[camera], lights=lights)\n",
+    "\n",
+    "\n",
+    "# target_images\n",
+    "# # # Our multi-view cow dataset will be represented by these 2 lists of tensors,\n",
+    "# # # each of length num_views.\n",
+    "# # target_rgb = [target_images[i, ..., :3] for i in range(num_views)]\n",
+    "# # target_cameras = [OpenGLPerspectiveCameras(device=device, R=R[None, i, ...], \n",
+    "# #                                            T=T[None, i, ...]) for i in range(num_views)]\n",
+    "\n",
+    "# # image_grid(target_images.cpu().numpy(), rows=4, cols=5, rgb=True)\n",
+    "# # plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pose"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "fragments = rasterizer(meshes_world=teapot_mesh)\n",
+    "\n",
+    "zbuf = fragments.zbuf\n",
+    "plt.imshow(zbuf[0, ..., 0].cpu().numpy())\n",
+    "plt.show()\n",
+    "print('zbuf', zbuf.min(), zbuf.max(), zbuf[zbuf > 0].min())\n",
+    "# display(F.to_pil_image(image_ref.cpu().numpy()[0].astype(np.uint8)[:, :, -1]))\n",
+    "# image_ref.cpu().numpy()[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zbuf[0, ..., 0].cpu().numpy().shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "debug = CI.image\n",
+    "depth = zbuf[0, ..., 0].cpu().numpy()\n",
+    "h, w = debug.shape[:2]\n",
+    "px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])\n",
+    "px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])\n",
+    "pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)\n",
+    "pyx = pyx.astype(np.float32)\n",
+    "\n",
+    "vud1 = np.dstack([pyx, depth]).reshape([-1, 3])\n",
+    "\n",
+    "vud1 = vud1[vud1[:, 2] > 0]\n",
+    "uvd = vud1[:, (1, 0, 2)]\n",
+    "\n",
+    "\n",
+    "from psegs.util.plotting import draw_xy_depth_in_image\n",
+    "draw_xy_depth_in_image(debug, uvd, period_meters=0.1)\n",
+    "\n",
+    "\n",
+    "import torchvision.transforms.functional as F\n",
+    "import numpy as np\n",
+    "pil_img = F.to_pil_image(debug.astype(np.uint8))\n",
+    "\n",
+    "from IPython.display import display\n",
+    "display(pil_img)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # Select the viewpoint using spherical angles  \n",
+    "# distance = 1.5   # distance from camera to the object\n",
+    "# elevation = 50.0   # angle of elevation in degrees\n",
+    "# azimuth = 0.0  # No rotation so the camera is positioned on the +Z axis. \n",
+    "\n",
+    "# # Get the position of the camera based on the spherical angles\n",
+    "# R, T = look_at_view_transform(distance, elevation, azimuth, device=device)\n",
+    "# print('R', R)\n",
+    "# print('T', T)\n",
+    "\n",
+    "# cameras = FoVPerspectiveCameras(device=device, K=K, znear=0.1, zfar=100)\n",
+    "\n",
+    "# raster_settings = RasterizationSettings(\n",
+    "#     image_size=256, \n",
+    "#     blur_radius=0.0, \n",
+    "#     faces_per_pixel=1, \n",
+    "# )\n",
+    "# lights = PointLights(\n",
+    "#     device=device, \n",
+    "#     location=[[0.0, 5.0, -10.0]], \n",
+    "#     diffuse_color=((0, 0, 0),),\n",
+    "#     specular_color=((0, 0, 0),),\n",
+    "# )\n",
+    "# blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.1, 0.1, 0.1))\n",
+    "# phong_renderer = MeshRenderer(\n",
+    "#     rasterizer=MeshRasterizer(\n",
+    "#         cameras=cameras, \n",
+    "#         raster_settings=raster_settings\n",
+    "#     ),\n",
+    "#     shader=HardPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params)\n",
+    "# )\n",
+    "\n",
+    "\n",
+    "# image_ref = phong_renderer(meshes_world=teapot_mesh, R=R, T=T)\n",
+    "# print('image_ref', image_ref)\n",
+    "\n",
+    "# import torchvision.transforms.functional as F\n",
+    "# import numpy as np\n",
+    "# pil_img = F.to_pil_image((255*image_ref.cpu().numpy()[0]).astype(np.uint8))\n",
+    "\n",
+    "# from IPython.display import display\n",
+    "# display(pil_img)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pytorch3d.vis.plotly_vis\n",
+    "dir(pytorch3d.vis.plotly_vis)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_31_01\n",
+      "err /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_31_01/frame_00001.jpg\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_35\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_35pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_35.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_35pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_55\n",
+      "err /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_33_55/frame_00001.jpg\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_36_35\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_36_35pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_36_35.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_36_35pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_39_50\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_39_50pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_39_50.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_39_50pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_40_05\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_40_05pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_40_05.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_09_16_40_05pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_51_37\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_51_37pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_51_37.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_51_37pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_07\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_07pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_07.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_07pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_25\n",
+      "err /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_13_53_25/frame_00001.jpg\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_01_25\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_01_25pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_01_25.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_01_25pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_03_43\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_03_43pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_03_43.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_03_43pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_18_46\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_18_46pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_18_46.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_14_18_46pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_19_11_53\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_19_11_53pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_19_11_53.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_19_11_53pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_22_30\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_22_30pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_22_30.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_22_30pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_23_24\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_23_24pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_23_24.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_23_24pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_24_07\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_24_07pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_24_07.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_24_07pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_25_51\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_25_51pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_25_51.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_25_51pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_27_18\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_27_18pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_27_18.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_27_18pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_30_41\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_30_41pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_30_41.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_12_23_30_41pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_06_28\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_06_28pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_06_28.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_06_28pytorch_rgbd_debug.mp4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_07_11\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_07_11pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_07_11.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_07_11pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_09_11\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_09_11pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_09_11.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_09_11pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_11_14\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_11_14pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_11_14.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_11_14pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_45_24\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_45_24pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_45_24.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_45_24pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_46_55\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_46_55pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_46_55.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_46_55pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_50_08\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_50_08pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_50_08.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_50_08pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_52_37\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_52_37pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_52_37.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_52_37pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_53_51\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_53_51pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_53_51.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_53_51pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_54_35\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_54_35pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_54_35.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_14_54_35pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_30\n",
+      "len(cis) 0\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_30pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_43\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_43pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_43.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_14_15_19_43pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_15_11_51_18\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_15_11_51_18pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_15_11_51_18.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_15_11_51_18pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_20\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_20pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_20.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_20pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_44\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_44pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_44.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_17_15_41_44pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_54_38\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_54_38pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_54_38.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_54_38pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_55_14\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_55_14pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_55_14.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_55_14pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_56_17\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_56_17pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_56_17.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_56_17pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_57_08\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_57_08pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_57_08.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_57_08pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_58_18\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_58_18pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_58_18.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_14_58_18pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_15_44_14\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_15_44_14pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_15_44_14.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_23_15_44_14pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_17_54_10\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_17_54_10pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_17_54_10.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_17_54_10pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_22_44_46\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_22_44_46pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_22_44_46.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_26_22_44_46pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_54_33\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_54_33pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_54_33.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_54_33pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_58_49\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_58_49pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_58_49.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_11_58_49pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_01_23\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_01_23pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_01_23.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_01_23pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_05_13\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_05_13pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_05_13.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_05_13pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_15_04\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_15_04pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_15_04.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_15_04pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_26_57\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_26_57pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_26_57.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_26_57pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_29_50\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_29_50pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_29_50.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_29_50pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_43\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_43pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_43.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_43pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_52\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_52pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_52.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_31_52pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_33_12\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_33_12pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_33_12.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_33_12pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_37_38\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_37_38pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_37_38.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_37_38pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_40_18\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_40_18pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_40_18.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_40_18pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_43_04\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_43_04pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_43_04.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_43_04pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_46_15\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_46_15pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_46_15.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_46_15pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_54_20\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_54_20pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_54_20.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_54_20pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_56_44\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_56_44pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_56_44.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_56_44pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_58_52\n",
+      "len(cis) 0\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_58_52pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_59_00\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_59_00pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_59_00.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_12_59_00pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_00_15\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_00_15pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_00_15.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_00_15pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_04_47\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_04_47pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_04_47.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_04_47pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_12_14\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_12_14pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_12_14.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_12_14pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_16_15\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_16_15pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_16_15.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_16_15pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_17_37\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_17_37pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_17_37.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_17_37pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_21_32\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_21_32pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_21_32.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_21_32pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_24_11\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_24_11pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_24_11.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_24_11pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_28_04\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_28_04pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_28_04.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_28_04pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_36_55\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_36_55pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_36_55.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_06_27_13_36_55pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_09_54_56\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_09_54_56pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_09_54_56.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_09_54_56pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_12_09_24\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_12_09_24pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_12_09_24.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_12_09_24pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_05_54\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_05_54pytorch_rgbd_debug.mp4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_05_54.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_05_54pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_09_56\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_09_56pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_09_56.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_09_56pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_25_46\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_25_46pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_25_46.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_25_46pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_35\n",
+      "err /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_35/frame_00001.jpg\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_44\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_44pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_44.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_01_13_26_44pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_03_20_03_33\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_03_20_03_33pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_03_20_03_33.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_03_20_03_33pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_24_00\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_24_00pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_24_00.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_24_00pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_25_04\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_25_04pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_25_04.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_09_25_04pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_12_44_21\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_12_44_21pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_12_44_21.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_12_44_21pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_03_03\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_03_03pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_03_03.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_03_03pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_06_01\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_06_01pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_06_01.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_06_01pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_15_45\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_15_45pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_15_45.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_09_13_15_45pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_15_08_02\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_15_08_02pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_15_08_02.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_15_08_02pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_18_10_20\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_18_10_20pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_18_10_20.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_18_10_20pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_20_18_15\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_20_18_15pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_20_18_15.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_10_20_18_15pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_12_18_50_54\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_12_18_50_54pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_12_18_50_54.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_12_18_50_54pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_16_16_30_59\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_16_16_30_59pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_16_16_30_59.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_16_16_30_59pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_09_47_10\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_09_47_10pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_09_47_10.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_09_47_10pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_15_54_17\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_15_54_17pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_15_54_17.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_24_15_54_17pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_25_23_58_28\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_25_23_58_28pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_25_23_58_28.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_25_23_58_28pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_26_15_59_23\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_26_15_59_23pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_26_15_59_23.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_07_26_15_59_23pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_51_23\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_51_23pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_51_23.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_51_23pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_54_30\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_54_30pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_54_30.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_54_30pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_58_23\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_58_23pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_58_23.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_13_58_23pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_11_55\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_11_55pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_11_55.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_11_55pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_17_29\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_17_29pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_17_29.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_17_29pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_28_12\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_28_12pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_28_12.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_28_12pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_34_21\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "len(cis) 343\n",
+      "verts torch.Size([331421, 3])\n",
+      "faces torch.Size([583659, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4dd041c10>\n",
+      "start batch\n",
+      "batch done 10.812266111373901\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 10.672507286071777\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 10.614536762237549\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 10.53771686553955\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 10.425113439559937\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 10.440918922424316\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 10.370381832122803\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 10.289090871810913\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 10.267695188522339\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 10.165318250656128\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 10.142006158828735\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 10.08971905708313\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 10.093392133712769\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 10.06546664237976\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 10.223875522613525\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 10.148885250091553\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 10.162606954574585\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 10.204710960388184\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 10.194063425064087\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 10.217355728149414\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 10.340668439865112\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 10.41577672958374\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 10.286909103393555\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 10.297301769256592\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 10.102678775787354\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 10.173439741134644\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 10.288838863372803\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 10.377828359603882\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 10.305512428283691\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 10.368940114974976\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 10.381835460662842\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 10.382489681243896\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 10.498017311096191\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 10.333954095840454\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 10.297916412353516\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 10.340736627578735\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 10.668111324310303\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 10.534757852554321\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 10.496770858764648\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 10.530281782150269\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 10.587252616882324\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 10.731087446212769\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 10.478532314300537\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 10.4753577709198\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 10.508673429489136\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 10.596078634262085\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 10.605007648468018\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 10.653883934020996\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 10.649008512496948\n",
+      "yielding 2\n",
+      "96\n",
+      "yielding 2\n",
+      "97\n",
+      "start batch\n",
+      "batch done 10.660351037979126\n",
+      "yielding 2\n",
+      "98\n",
+      "yielding 2\n",
+      "99\n",
+      "start batch\n",
+      "batch done 10.74641752243042\n",
+      "yielding 2\n",
+      "100\n",
+      "yielding 2\n",
+      "101\n",
+      "start batch\n",
+      "batch done 10.648048639297485\n",
+      "yielding 2\n",
+      "102\n",
+      "yielding 2\n",
+      "103\n",
+      "start batch\n",
+      "batch done 10.61981463432312\n",
+      "yielding 2\n",
+      "104\n",
+      "yielding 2\n",
+      "105\n",
+      "start batch\n",
+      "batch done 10.682960748672485\n",
+      "yielding 2\n",
+      "106\n",
+      "yielding 2\n",
+      "107\n",
+      "start batch\n",
+      "batch done 10.650567054748535\n",
+      "yielding 2\n",
+      "108\n",
+      "yielding 2\n",
+      "109\n",
+      "start batch\n",
+      "batch done 10.67724895477295\n",
+      "yielding 2\n",
+      "110\n",
+      "yielding 2\n",
+      "111\n",
+      "start batch\n",
+      "batch done 10.712986707687378\n",
+      "yielding 2\n",
+      "112\n",
+      "yielding 2\n",
+      "113\n",
+      "start batch\n",
+      "batch done 10.541314601898193\n",
+      "yielding 2\n",
+      "114\n",
+      "yielding 2\n",
+      "115\n",
+      "start batch\n",
+      "batch done 10.21712851524353\n",
+      "yielding 2\n",
+      "116\n",
+      "yielding 2\n",
+      "117\n",
+      "start batch\n",
+      "batch done 10.186456680297852\n",
+      "yielding 2\n",
+      "118\n",
+      "yielding 2\n",
+      "119\n",
+      "start batch\n",
+      "batch done 10.330952167510986\n",
+      "yielding 2\n",
+      "120\n",
+      "yielding 2\n",
+      "121\n",
+      "start batch\n",
+      "batch done 10.480542182922363\n",
+      "yielding 2\n",
+      "122\n",
+      "yielding 2\n",
+      "123\n",
+      "start batch\n",
+      "batch done 10.218474388122559\n",
+      "yielding 2\n",
+      "124\n",
+      "yielding 2\n",
+      "125\n",
+      "start batch\n",
+      "batch done 10.144986152648926\n",
+      "yielding 2\n",
+      "126\n",
+      "yielding 2\n",
+      "127\n",
+      "start batch\n",
+      "batch done 10.368767499923706\n",
+      "yielding 2\n",
+      "128\n",
+      "yielding 2\n",
+      "129\n",
+      "start batch\n",
+      "batch done 10.651745319366455\n",
+      "yielding 2\n",
+      "130\n",
+      "yielding 2\n",
+      "131\n",
+      "start batch\n",
+      "batch done 10.306091070175171\n",
+      "yielding 2\n",
+      "132\n",
+      "yielding 2\n",
+      "133\n",
+      "start batch\n",
+      "batch done 10.182046175003052\n",
+      "yielding 2\n",
+      "134\n",
+      "yielding 2\n",
+      "135\n",
+      "start batch\n",
+      "batch done 10.42016887664795\n",
+      "yielding 2\n",
+      "136\n",
+      "yielding 2\n",
+      "137\n",
+      "start batch\n",
+      "batch done 10.387367248535156\n",
+      "yielding 2\n",
+      "138\n",
+      "yielding 2\n",
+      "139\n",
+      "start batch\n",
+      "batch done 10.338572263717651\n",
+      "yielding 2\n",
+      "140\n",
+      "yielding 2\n",
+      "141\n",
+      "start batch\n",
+      "batch done 10.201208114624023\n",
+      "yielding 2\n",
+      "142\n",
+      "yielding 2\n",
+      "143\n",
+      "start batch\n",
+      "batch done 10.19029426574707\n",
+      "yielding 2\n",
+      "144\n",
+      "yielding 2\n",
+      "145\n",
+      "start batch\n",
+      "batch done 10.174992084503174\n",
+      "yielding 2\n",
+      "146\n",
+      "yielding 2\n",
+      "147\n",
+      "start batch\n",
+      "batch done 10.15193772315979\n",
+      "yielding 2\n",
+      "148\n",
+      "yielding 2\n",
+      "149\n",
+      "start batch\n",
+      "batch done 10.180257081985474\n",
+      "yielding 2\n",
+      "150\n",
+      "yielding 2\n",
+      "151\n",
+      "start batch\n",
+      "batch done 10.184347152709961\n",
+      "yielding 2\n",
+      "152\n",
+      "yielding 2\n",
+      "153\n",
+      "start batch\n",
+      "batch done 10.169536352157593\n",
+      "yielding 2\n",
+      "154\n",
+      "yielding 2\n",
+      "155\n",
+      "start batch\n",
+      "batch done 10.253390550613403\n",
+      "yielding 2\n",
+      "156\n",
+      "yielding 2\n",
+      "157\n",
+      "start batch\n",
+      "batch done 10.356451988220215\n",
+      "yielding 2\n",
+      "158\n",
+      "yielding 2\n",
+      "159\n",
+      "start batch\n",
+      "batch done 10.372767210006714\n",
+      "yielding 2\n",
+      "160\n",
+      "yielding 2\n",
+      "161\n",
+      "start batch\n",
+      "batch done 10.269939422607422\n",
+      "yielding 2\n",
+      "162\n",
+      "yielding 2\n",
+      "163\n",
+      "start batch\n",
+      "batch done 10.193761348724365\n",
+      "yielding 2\n",
+      "164\n",
+      "yielding 2\n",
+      "165\n",
+      "start batch\n",
+      "batch done 10.209200382232666\n",
+      "yielding 2\n",
+      "166\n",
+      "yielding 2\n",
+      "167\n",
+      "start batch\n",
+      "batch done 10.21337366104126\n",
+      "yielding 2\n",
+      "168\n",
+      "yielding 2\n",
+      "169\n",
+      "start batch\n",
+      "batch done 10.341712951660156\n",
+      "yielding 2\n",
+      "170\n",
+      "yielding 2\n",
+      "171\n",
+      "start batch\n",
+      "batch done 10.251879930496216\n",
+      "yielding 2\n",
+      "172\n",
+      "yielding 2\n",
+      "173\n",
+      "start batch\n",
+      "batch done 10.201728343963623\n",
+      "yielding 2\n",
+      "174\n",
+      "yielding 2\n",
+      "175\n",
+      "start batch\n",
+      "batch done 10.260551452636719\n",
+      "yielding 2\n",
+      "176\n",
+      "yielding 2\n",
+      "177\n",
+      "start batch\n",
+      "batch done 10.281386137008667\n",
+      "yielding 2\n",
+      "178\n",
+      "yielding 2\n",
+      "179\n",
+      "start batch\n",
+      "batch done 10.25133204460144\n",
+      "yielding 2\n",
+      "180\n",
+      "yielding 2\n",
+      "181\n",
+      "start batch\n",
+      "batch done 10.287282228469849\n",
+      "yielding 2\n",
+      "182\n",
+      "yielding 2\n",
+      "183\n",
+      "start batch\n",
+      "batch done 10.279671430587769\n",
+      "yielding 2\n",
+      "184\n",
+      "yielding 2\n",
+      "185\n",
+      "start batch\n",
+      "batch done 10.15222430229187\n",
+      "yielding 2\n",
+      "186\n",
+      "yielding 2\n",
+      "187\n",
+      "start batch\n",
+      "batch done 10.233346462249756\n",
+      "yielding 2\n",
+      "188\n",
+      "yielding 2\n",
+      "189\n",
+      "start batch\n",
+      "batch done 10.25902271270752\n",
+      "yielding 2\n",
+      "190\n",
+      "yielding 2\n",
+      "191\n",
+      "start batch\n",
+      "batch done 10.232307195663452\n",
+      "yielding 2\n",
+      "192\n",
+      "yielding 2\n",
+      "193\n",
+      "start batch\n",
+      "batch done 10.242522239685059\n",
+      "yielding 2\n",
+      "194\n",
+      "yielding 2\n",
+      "195\n",
+      "start batch\n",
+      "batch done 10.246991634368896\n",
+      "yielding 2\n",
+      "196\n",
+      "yielding 2\n",
+      "197\n",
+      "start batch\n",
+      "batch done 10.231813669204712\n",
+      "yielding 2\n",
+      "198\n",
+      "yielding 2\n",
+      "199\n",
+      "start batch\n",
+      "batch done 10.198202848434448\n",
+      "yielding 2\n",
+      "200\n",
+      "yielding 2\n",
+      "201\n",
+      "start batch\n",
+      "batch done 10.336204767227173\n",
+      "yielding 2\n",
+      "202\n",
+      "yielding 2\n",
+      "203\n",
+      "start batch\n",
+      "batch done 10.290581703186035\n",
+      "yielding 2\n",
+      "204\n",
+      "yielding 2\n",
+      "205\n",
+      "start batch\n",
+      "batch done 10.339200019836426\n",
+      "yielding 2\n",
+      "206\n",
+      "yielding 2\n",
+      "207\n",
+      "start batch\n",
+      "batch done 10.42916750907898\n",
+      "yielding 2\n",
+      "208\n",
+      "yielding 2\n",
+      "209\n",
+      "start batch\n",
+      "batch done 10.381308794021606\n",
+      "yielding 2\n",
+      "210\n",
+      "yielding 2\n",
+      "211\n",
+      "start batch\n",
+      "batch done 10.25186014175415\n",
+      "yielding 2\n",
+      "212\n",
+      "yielding 2\n",
+      "213\n",
+      "start batch\n",
+      "batch done 10.232118368148804\n",
+      "yielding 2\n",
+      "214\n",
+      "yielding 2\n",
+      "215\n",
+      "start batch\n",
+      "batch done 10.210456609725952\n",
+      "yielding 2\n",
+      "216\n",
+      "yielding 2\n",
+      "217\n",
+      "start batch\n",
+      "batch done 10.281997203826904\n",
+      "yielding 2\n",
+      "218\n",
+      "yielding 2\n",
+      "219\n",
+      "start batch\n",
+      "batch done 10.314443826675415\n",
+      "yielding 2\n",
+      "220\n",
+      "yielding 2\n",
+      "221\n",
+      "start batch\n",
+      "batch done 10.278319597244263\n",
+      "yielding 2\n",
+      "222\n",
+      "yielding 2\n",
+      "223\n",
+      "start batch\n",
+      "batch done 10.315698146820068\n",
+      "yielding 2\n",
+      "224\n",
+      "yielding 2\n",
+      "225\n",
+      "start batch\n",
+      "batch done 10.202920913696289\n",
+      "yielding 2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "226\n",
+      "yielding 2\n",
+      "227\n",
+      "start batch\n",
+      "batch done 10.14582109451294\n",
+      "yielding 2\n",
+      "228\n",
+      "yielding 2\n",
+      "229\n",
+      "start batch\n",
+      "batch done 10.141831636428833\n",
+      "yielding 2\n",
+      "230\n",
+      "yielding 2\n",
+      "231\n",
+      "start batch\n",
+      "batch done 10.17285418510437\n",
+      "yielding 2\n",
+      "232\n",
+      "yielding 2\n",
+      "233\n",
+      "start batch\n",
+      "batch done 10.291451930999756\n",
+      "yielding 2\n",
+      "234\n",
+      "yielding 2\n",
+      "235\n",
+      "start batch\n",
+      "batch done 10.246196269989014\n",
+      "yielding 2\n",
+      "236\n",
+      "yielding 2\n",
+      "237\n",
+      "start batch\n",
+      "batch done 10.259639501571655\n",
+      "yielding 2\n",
+      "238\n",
+      "yielding 2\n",
+      "239\n",
+      "start batch\n",
+      "batch done 10.334523439407349\n",
+      "yielding 2\n",
+      "240\n",
+      "yielding 2\n",
+      "241\n",
+      "start batch\n",
+      "batch done 10.376052618026733\n",
+      "yielding 2\n",
+      "242\n",
+      "yielding 2\n",
+      "243\n",
+      "start batch\n",
+      "batch done 10.355473756790161\n",
+      "yielding 2\n",
+      "244\n",
+      "yielding 2\n",
+      "245\n",
+      "start batch\n",
+      "batch done 10.373331069946289\n",
+      "yielding 2\n",
+      "246\n",
+      "yielding 2\n",
+      "247\n",
+      "start batch\n",
+      "batch done 10.383445978164673\n",
+      "yielding 2\n",
+      "248\n",
+      "yielding 2\n",
+      "249\n",
+      "start batch\n",
+      "batch done 10.373626232147217\n",
+      "yielding 2\n",
+      "250\n",
+      "yielding 2\n",
+      "251\n",
+      "start batch\n",
+      "batch done 10.381466627120972\n",
+      "yielding 2\n",
+      "252\n",
+      "yielding 2\n",
+      "253\n",
+      "start batch\n",
+      "batch done 10.441335201263428\n",
+      "yielding 2\n",
+      "254\n",
+      "yielding 2\n",
+      "255\n",
+      "start batch\n",
+      "batch done 10.35752820968628\n",
+      "yielding 2\n",
+      "256\n",
+      "yielding 2\n",
+      "257\n",
+      "start batch\n",
+      "batch done 10.37252426147461\n",
+      "yielding 2\n",
+      "258\n",
+      "yielding 2\n",
+      "259\n",
+      "start batch\n",
+      "batch done 10.370798826217651\n",
+      "yielding 2\n",
+      "260\n",
+      "yielding 2\n",
+      "261\n",
+      "start batch\n",
+      "batch done 10.288760900497437\n",
+      "yielding 2\n",
+      "262\n",
+      "yielding 2\n",
+      "263\n",
+      "start batch\n",
+      "batch done 10.283547163009644\n",
+      "yielding 2\n",
+      "264\n",
+      "yielding 2\n",
+      "265\n",
+      "start batch\n",
+      "batch done 10.27080488204956\n",
+      "yielding 2\n",
+      "266\n",
+      "yielding 2\n",
+      "267\n",
+      "start batch\n",
+      "batch done 10.22758674621582\n",
+      "yielding 2\n",
+      "268\n",
+      "yielding 2\n",
+      "269\n",
+      "start batch\n",
+      "batch done 10.239722967147827\n",
+      "yielding 2\n",
+      "270\n",
+      "yielding 2\n",
+      "271\n",
+      "start batch\n",
+      "batch done 10.373510599136353\n",
+      "yielding 2\n",
+      "272\n",
+      "yielding 2\n",
+      "273\n",
+      "start batch\n",
+      "batch done 10.406006574630737\n",
+      "yielding 2\n",
+      "274\n",
+      "yielding 2\n",
+      "275\n",
+      "start batch\n",
+      "batch done 10.439244747161865\n",
+      "yielding 2\n",
+      "276\n",
+      "yielding 2\n",
+      "277\n",
+      "start batch\n",
+      "batch done 10.42184853553772\n",
+      "yielding 2\n",
+      "278\n",
+      "yielding 2\n",
+      "279\n",
+      "start batch\n",
+      "batch done 10.290428638458252\n",
+      "yielding 2\n",
+      "280\n",
+      "yielding 2\n",
+      "281\n",
+      "start batch\n",
+      "batch done 10.249080896377563\n",
+      "yielding 2\n",
+      "282\n",
+      "yielding 2\n",
+      "283\n",
+      "start batch\n",
+      "batch done 10.199166774749756\n",
+      "yielding 2\n",
+      "284\n",
+      "yielding 2\n",
+      "285\n",
+      "start batch\n",
+      "batch done 10.299896240234375\n",
+      "yielding 2\n",
+      "286\n",
+      "yielding 2\n",
+      "287\n",
+      "start batch\n",
+      "batch done 10.261987447738647\n",
+      "yielding 2\n",
+      "288\n",
+      "yielding 2\n",
+      "289\n",
+      "start batch\n",
+      "batch done 10.262552738189697\n",
+      "yielding 2\n",
+      "290\n",
+      "yielding 2\n",
+      "291\n",
+      "start batch\n",
+      "batch done 10.273445129394531\n",
+      "yielding 2\n",
+      "292\n",
+      "yielding 2\n",
+      "293\n",
+      "start batch\n",
+      "batch done 10.31282639503479\n",
+      "yielding 2\n",
+      "294\n",
+      "yielding 2\n",
+      "295\n",
+      "start batch\n",
+      "batch done 10.294592142105103\n",
+      "yielding 2\n",
+      "296\n",
+      "yielding 2\n",
+      "297\n",
+      "start batch\n",
+      "batch done 10.296633243560791\n",
+      "yielding 2\n",
+      "298\n",
+      "yielding 2\n",
+      "299\n",
+      "start batch\n",
+      "batch done 10.334697484970093\n",
+      "yielding 2\n",
+      "300\n",
+      "yielding 2\n",
+      "301\n",
+      "start batch\n",
+      "batch done 10.329278230667114\n",
+      "yielding 2\n",
+      "302\n",
+      "yielding 2\n",
+      "303\n",
+      "start batch\n",
+      "batch done 10.279841899871826\n",
+      "yielding 2\n",
+      "304\n",
+      "yielding 2\n",
+      "305\n",
+      "start batch\n",
+      "batch done 10.266345262527466\n",
+      "yielding 2\n",
+      "306\n",
+      "yielding 2\n",
+      "307\n",
+      "start batch\n",
+      "batch done 10.276931285858154\n",
+      "yielding 2\n",
+      "308\n",
+      "yielding 2\n",
+      "309\n",
+      "start batch\n",
+      "batch done 10.287343502044678\n",
+      "yielding 2\n",
+      "310\n",
+      "yielding 2\n",
+      "311\n",
+      "start batch\n",
+      "batch done 10.20299506187439\n",
+      "yielding 2\n",
+      "312\n",
+      "yielding 2\n",
+      "313\n",
+      "start batch\n",
+      "batch done 10.138686418533325\n",
+      "yielding 2\n",
+      "314\n",
+      "yielding 2\n",
+      "315\n",
+      "start batch\n",
+      "batch done 10.15420937538147\n",
+      "yielding 2\n",
+      "316\n",
+      "yielding 2\n",
+      "317\n",
+      "start batch\n",
+      "batch done 10.263410806655884\n",
+      "yielding 2\n",
+      "318\n",
+      "yielding 2\n",
+      "319\n",
+      "start batch\n",
+      "batch done 10.388577461242676\n",
+      "yielding 2\n",
+      "320\n",
+      "yielding 2\n",
+      "321\n",
+      "start batch\n",
+      "batch done 10.406841039657593\n",
+      "yielding 2\n",
+      "322\n",
+      "yielding 2\n",
+      "323\n",
+      "start batch\n",
+      "batch done 10.329836130142212\n",
+      "yielding 2\n",
+      "324\n",
+      "yielding 2\n",
+      "325\n",
+      "start batch\n",
+      "batch done 10.316121816635132\n",
+      "yielding 2\n",
+      "326\n",
+      "yielding 2\n",
+      "327\n",
+      "start batch\n",
+      "batch done 10.225364446640015\n",
+      "yielding 2\n",
+      "328\n",
+      "yielding 2\n",
+      "329\n",
+      "start batch\n",
+      "batch done 10.296908617019653\n",
+      "yielding 2\n",
+      "330\n",
+      "yielding 2\n",
+      "331\n",
+      "start batch\n",
+      "batch done 10.268325090408325\n",
+      "yielding 2\n",
+      "332\n",
+      "yielding 2\n",
+      "333\n",
+      "start batch\n",
+      "batch done 10.258509635925293\n",
+      "yielding 2\n",
+      "334\n",
+      "yielding 2\n",
+      "335\n",
+      "start batch\n",
+      "batch done 10.248302936553955\n",
+      "yielding 2\n",
+      "336\n",
+      "yielding 2\n",
+      "337\n",
+      "start batch\n",
+      "batch done 10.255542516708374\n",
+      "yielding 2\n",
+      "338\n",
+      "yielding 2\n",
+      "339\n",
+      "start batch\n",
+      "batch done 10.270760297775269\n",
+      "yielding 2\n",
+      "340\n",
+      "yielding 2\n",
+      "341\n",
+      "start batch\n",
+      "batch done 10.334429502487183\n",
+      "yielding 2\n",
+      "342\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_34_21pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_34_21.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_40_28\n",
+      "len(cis) 255\n",
+      "verts torch.Size([323456, 3])\n",
+      "faces torch.Size([551176, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb451430310>\n",
+      "start batch\n",
+      "batch done 10.071701049804688\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 10.03772521018982\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 9.911691665649414\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 9.922966957092285\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 9.994035243988037\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 10.039134502410889\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 10.107273817062378\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 10.189578294754028\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 10.12785530090332\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 9.950327634811401\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 10.085782527923584\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 9.997617483139038\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 10.168825626373291\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 10.185485124588013\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 10.180322408676147\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 10.240355014801025\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 10.245813846588135\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 10.210472345352173\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 10.014606714248657\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 9.788607120513916\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 9.856653451919556\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 10.086619853973389\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 9.94607162475586\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 10.062425374984741\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 9.789291143417358\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 9.700851678848267\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 9.648274660110474\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 9.610943794250488\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 9.659968614578247\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 9.62335991859436\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 9.606178760528564\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 9.74783205986023\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 9.654750347137451\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 9.713043689727783\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 9.765546083450317\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 9.77006196975708\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 9.798383235931396\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 9.859451055526733\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 9.862020015716553\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 9.845524549484253\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 9.790536880493164\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 9.729489803314209\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 9.74006986618042\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 9.702442169189453\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 9.644651651382446\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 9.7020742893219\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 9.658132553100586\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 9.708035707473755\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 9.658500671386719\n",
+      "yielding 2\n",
+      "96\n",
+      "yielding 2\n",
+      "97\n",
+      "start batch\n",
+      "batch done 9.7077956199646\n",
+      "yielding 2\n",
+      "98\n",
+      "yielding 2\n",
+      "99\n",
+      "start batch\n",
+      "batch done 9.77261233329773\n",
+      "yielding 2\n",
+      "100\n",
+      "yielding 2\n",
+      "101\n",
+      "start batch\n",
+      "batch done 9.74751353263855\n",
+      "yielding 2\n",
+      "102\n",
+      "yielding 2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "103\n",
+      "start batch\n",
+      "batch done 9.615444421768188\n",
+      "yielding 2\n",
+      "104\n",
+      "yielding 2\n",
+      "105\n",
+      "start batch\n",
+      "batch done 9.590234994888306\n",
+      "yielding 2\n",
+      "106\n",
+      "yielding 2\n",
+      "107\n",
+      "start batch\n",
+      "batch done 9.759873628616333\n",
+      "yielding 2\n",
+      "108\n",
+      "yielding 2\n",
+      "109\n",
+      "start batch\n",
+      "batch done 9.795974969863892\n",
+      "yielding 2\n",
+      "110\n",
+      "yielding 2\n",
+      "111\n",
+      "start batch\n",
+      "batch done 9.807712078094482\n",
+      "yielding 2\n",
+      "112\n",
+      "yielding 2\n",
+      "113\n",
+      "start batch\n",
+      "batch done 9.774396657943726\n",
+      "yielding 2\n",
+      "114\n",
+      "yielding 2\n",
+      "115\n",
+      "start batch\n",
+      "batch done 9.803440570831299\n",
+      "yielding 2\n",
+      "116\n",
+      "yielding 2\n",
+      "117\n",
+      "start batch\n",
+      "batch done 9.771156549453735\n",
+      "yielding 2\n",
+      "118\n",
+      "yielding 2\n",
+      "119\n",
+      "start batch\n",
+      "batch done 9.844447612762451\n",
+      "yielding 2\n",
+      "120\n",
+      "yielding 2\n",
+      "121\n",
+      "start batch\n",
+      "batch done 9.818187475204468\n",
+      "yielding 2\n",
+      "122\n",
+      "yielding 2\n",
+      "123\n",
+      "start batch\n",
+      "batch done 9.808095216751099\n",
+      "yielding 2\n",
+      "124\n",
+      "yielding 2\n",
+      "125\n",
+      "start batch\n",
+      "batch done 9.786882162094116\n",
+      "yielding 2\n",
+      "126\n",
+      "yielding 2\n",
+      "127\n",
+      "start batch\n",
+      "batch done 9.749504089355469\n",
+      "yielding 2\n",
+      "128\n",
+      "yielding 2\n",
+      "129\n",
+      "start batch\n",
+      "batch done 9.76859188079834\n",
+      "yielding 2\n",
+      "130\n",
+      "yielding 2\n",
+      "131\n",
+      "start batch\n",
+      "batch done 9.752668619155884\n",
+      "yielding 2\n",
+      "132\n",
+      "yielding 2\n",
+      "133\n",
+      "start batch\n",
+      "batch done 9.711632490158081\n",
+      "yielding 2\n",
+      "134\n",
+      "yielding 2\n",
+      "135\n",
+      "start batch\n",
+      "batch done 9.702852725982666\n",
+      "yielding 2\n",
+      "136\n",
+      "yielding 2\n",
+      "137\n",
+      "start batch\n",
+      "batch done 9.726299047470093\n",
+      "yielding 2\n",
+      "138\n",
+      "yielding 2\n",
+      "139\n",
+      "start batch\n",
+      "batch done 9.797923564910889\n",
+      "yielding 2\n",
+      "140\n",
+      "yielding 2\n",
+      "141\n",
+      "start batch\n",
+      "batch done 9.68773627281189\n",
+      "yielding 2\n",
+      "142\n",
+      "yielding 2\n",
+      "143\n",
+      "start batch\n",
+      "batch done 9.825831651687622\n",
+      "yielding 2\n",
+      "144\n",
+      "yielding 2\n",
+      "145\n",
+      "start batch\n",
+      "batch done 9.904535293579102\n",
+      "yielding 2\n",
+      "146\n",
+      "yielding 2\n",
+      "147\n",
+      "start batch\n",
+      "batch done 9.920119047164917\n",
+      "yielding 2\n",
+      "148\n",
+      "yielding 2\n",
+      "149\n",
+      "start batch\n",
+      "batch done 10.053467035293579\n",
+      "yielding 2\n",
+      "150\n",
+      "yielding 2\n",
+      "151\n",
+      "start batch\n",
+      "batch done 10.026091814041138\n",
+      "yielding 2\n",
+      "152\n",
+      "yielding 2\n",
+      "153\n",
+      "start batch\n",
+      "batch done 10.040326356887817\n",
+      "yielding 2\n",
+      "154\n",
+      "yielding 2\n",
+      "155\n",
+      "start batch\n",
+      "batch done 10.131003379821777\n",
+      "yielding 2\n",
+      "156\n",
+      "yielding 2\n",
+      "157\n",
+      "start batch\n",
+      "batch done 10.08448600769043\n",
+      "yielding 2\n",
+      "158\n",
+      "yielding 2\n",
+      "159\n",
+      "start batch\n",
+      "batch done 10.053625345230103\n",
+      "yielding 2\n",
+      "160\n",
+      "yielding 2\n",
+      "161\n",
+      "start batch\n",
+      "batch done 10.055660486221313\n",
+      "yielding 2\n",
+      "162\n",
+      "yielding 2\n",
+      "163\n",
+      "start batch\n",
+      "batch done 10.085137844085693\n",
+      "yielding 2\n",
+      "164\n",
+      "yielding 2\n",
+      "165\n",
+      "start batch\n",
+      "batch done 10.088010787963867\n",
+      "yielding 2\n",
+      "166\n",
+      "yielding 2\n",
+      "167\n",
+      "start batch\n",
+      "batch done 10.1860032081604\n",
+      "yielding 2\n",
+      "168\n",
+      "yielding 2\n",
+      "169\n",
+      "start batch\n",
+      "batch done 10.13173246383667\n",
+      "yielding 2\n",
+      "170\n",
+      "yielding 2\n",
+      "171\n",
+      "start batch\n",
+      "batch done 10.156609296798706\n",
+      "yielding 2\n",
+      "172\n",
+      "yielding 2\n",
+      "173\n",
+      "start batch\n",
+      "batch done 10.110306024551392\n",
+      "yielding 2\n",
+      "174\n",
+      "yielding 2\n",
+      "175\n",
+      "start batch\n",
+      "batch done 10.185807466506958\n",
+      "yielding 2\n",
+      "176\n",
+      "yielding 2\n",
+      "177\n",
+      "start batch\n",
+      "batch done 9.990265607833862\n",
+      "yielding 2\n",
+      "178\n",
+      "yielding 2\n",
+      "179\n",
+      "start batch\n",
+      "batch done 9.922758102416992\n",
+      "yielding 2\n",
+      "180\n",
+      "yielding 2\n",
+      "181\n",
+      "start batch\n",
+      "batch done 10.039456605911255\n",
+      "yielding 2\n",
+      "182\n",
+      "yielding 2\n",
+      "183\n",
+      "start batch\n",
+      "batch done 10.039565086364746\n",
+      "yielding 2\n",
+      "184\n",
+      "yielding 2\n",
+      "185\n",
+      "start batch\n",
+      "batch done 10.030608415603638\n",
+      "yielding 2\n",
+      "186\n",
+      "yielding 2\n",
+      "187\n",
+      "start batch\n",
+      "batch done 9.994704246520996\n",
+      "yielding 2\n",
+      "188\n",
+      "yielding 2\n",
+      "189\n",
+      "start batch\n",
+      "batch done 10.02427864074707\n",
+      "yielding 2\n",
+      "190\n",
+      "yielding 2\n",
+      "191\n",
+      "start batch\n",
+      "batch done 10.006491899490356\n",
+      "yielding 2\n",
+      "192\n",
+      "yielding 2\n",
+      "193\n",
+      "start batch\n",
+      "batch done 10.113430738449097\n",
+      "yielding 2\n",
+      "194\n",
+      "yielding 2\n",
+      "195\n",
+      "start batch\n",
+      "batch done 10.17215085029602\n",
+      "yielding 2\n",
+      "196\n",
+      "yielding 2\n",
+      "197\n",
+      "start batch\n",
+      "batch done 9.980040550231934\n",
+      "yielding 2\n",
+      "198\n",
+      "yielding 2\n",
+      "199\n",
+      "start batch\n",
+      "batch done 10.099279165267944\n",
+      "yielding 2\n",
+      "200\n",
+      "yielding 2\n",
+      "201\n",
+      "start batch\n",
+      "batch done 9.904454469680786\n",
+      "yielding 2\n",
+      "202\n",
+      "yielding 2\n",
+      "203\n",
+      "start batch\n",
+      "batch done 9.800585508346558\n",
+      "yielding 2\n",
+      "204\n",
+      "yielding 2\n",
+      "205\n",
+      "start batch\n",
+      "batch done 9.879602909088135\n",
+      "yielding 2\n",
+      "206\n",
+      "yielding 2\n",
+      "207\n",
+      "start batch\n",
+      "batch done 9.9592866897583\n",
+      "yielding 2\n",
+      "208\n",
+      "yielding 2\n",
+      "209\n",
+      "start batch\n",
+      "batch done 9.930862426757812\n",
+      "yielding 2\n",
+      "210\n",
+      "yielding 2\n",
+      "211\n",
+      "start batch\n",
+      "batch done 9.882453441619873\n",
+      "yielding 2\n",
+      "212\n",
+      "yielding 2\n",
+      "213\n",
+      "start batch\n",
+      "batch done 9.830788373947144\n",
+      "yielding 2\n",
+      "214\n",
+      "yielding 2\n",
+      "215\n",
+      "start batch\n",
+      "batch done 9.858560562133789\n",
+      "yielding 2\n",
+      "216\n",
+      "yielding 2\n",
+      "217\n",
+      "start batch\n",
+      "batch done 9.839792251586914\n",
+      "yielding 2\n",
+      "218\n",
+      "yielding 2\n",
+      "219\n",
+      "start batch\n",
+      "batch done 9.896064519882202\n",
+      "yielding 2\n",
+      "220\n",
+      "yielding 2\n",
+      "221\n",
+      "start batch\n",
+      "batch done 9.862434148788452\n",
+      "yielding 2\n",
+      "222\n",
+      "yielding 2\n",
+      "223\n",
+      "start batch\n",
+      "batch done 9.869290590286255\n",
+      "yielding 2\n",
+      "224\n",
+      "yielding 2\n",
+      "225\n",
+      "start batch\n",
+      "batch done 9.900856018066406\n",
+      "yielding 2\n",
+      "226\n",
+      "yielding 2\n",
+      "227\n",
+      "start batch\n",
+      "batch done 9.85326862335205\n",
+      "yielding 2\n",
+      "228\n",
+      "yielding 2\n",
+      "229\n",
+      "start batch\n",
+      "batch done 9.905799388885498\n",
+      "yielding 2\n",
+      "230\n",
+      "yielding 2\n",
+      "231\n",
+      "start batch\n",
+      "batch done 9.835041522979736\n",
+      "yielding 2\n",
+      "232\n",
+      "yielding 2\n",
+      "233\n",
+      "start batch\n",
+      "batch done 9.81263256072998\n",
+      "yielding 2\n",
+      "234\n",
+      "yielding 2\n",
+      "235\n",
+      "start batch\n",
+      "batch done 9.753180742263794\n",
+      "yielding 2\n",
+      "236\n",
+      "yielding 2\n",
+      "237\n",
+      "start batch\n",
+      "batch done 9.810314893722534\n",
+      "yielding 2\n",
+      "238\n",
+      "yielding 2\n",
+      "239\n",
+      "start batch\n",
+      "batch done 9.895184516906738\n",
+      "yielding 2\n",
+      "240\n",
+      "yielding 2\n",
+      "241\n",
+      "start batch\n",
+      "batch done 9.834079027175903\n",
+      "yielding 2\n",
+      "242\n",
+      "yielding 2\n",
+      "243\n",
+      "start batch\n",
+      "batch done 9.835010290145874\n",
+      "yielding 2\n",
+      "244\n",
+      "yielding 2\n",
+      "245\n",
+      "start batch\n",
+      "batch done 9.630878925323486\n",
+      "yielding 2\n",
+      "246\n",
+      "yielding 2\n",
+      "247\n",
+      "start batch\n",
+      "batch done 9.601829528808594\n",
+      "yielding 2\n",
+      "248\n",
+      "yielding 2\n",
+      "249\n",
+      "start batch\n",
+      "batch done 9.664223432540894\n",
+      "yielding 2\n",
+      "250\n",
+      "yielding 2\n",
+      "251\n",
+      "start batch\n",
+      "batch done 9.64228105545044\n",
+      "yielding 2\n",
+      "252\n",
+      "yielding 2\n",
+      "253\n",
+      "start batch\n",
+      "batch done 9.670952558517456\n",
+      "yielding 2\n",
+      "254\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_40_28pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_05_14_40_28.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_06_17_53\n",
+      "len(cis) 49\n",
+      "verts torch.Size([235358, 3])\n",
+      "faces torch.Size([438713, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4514313d0>\n",
+      "start batch\n",
+      "batch done 8.11957335472107\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 8.062399864196777\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 8.040876388549805\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 8.07719874382019\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 8.020920276641846\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 8.000450849533081\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 8.065479516983032\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 8.053370952606201\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 8.080352306365967\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 8.115079879760742\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 8.04244351387024\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 7.959102392196655\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 7.972392559051514\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 7.952930450439453\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 7.933484792709351\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 7.899086236953735\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 7.832817316055298\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 7.874309778213501\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 7.850740194320679\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 7.873048543930054\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 7.875843286514282\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 7.908046245574951\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 7.867485523223877\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 7.853913068771362\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 7.84745717048645\n",
+      "yielding 2\n",
+      "48\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_06_17_53pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_06_17_53.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_07_03_19\n",
+      "aleady done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_07_03_19pytorch_rgbd_debug.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_07_03_19.mp4\n",
+      "skipping non-dir /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_09_07_03_19pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_12_30\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "len(cis) 24\n",
+      "verts torch.Size([285274, 3])\n",
+      "faces torch.Size([533443, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4d4f4b400>\n",
+      "start batch\n",
+      "batch done 9.860872268676758\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 9.77180814743042\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 9.790065288543701\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 9.80699634552002\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 9.652620792388916\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 9.410562515258789\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 9.328906059265137\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 9.339547872543335\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 9.230678081512451\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 9.366596460342407\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 9.247538566589355\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 9.203657150268555\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_12_30pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_13_48\n",
+      "len(cis) 25\n",
+      "verts torch.Size([377264, 3])\n",
+      "faces torch.Size([689827, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb451438670>\n",
+      "start batch\n",
+      "batch done 12.502545595169067\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 12.391855001449585\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 12.385469436645508\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 12.457818269729614\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 12.472528219223022\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 12.611446619033813\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 12.364638805389404\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 12.012330532073975\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 11.889163970947266\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 11.95132064819336\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 11.967162132263184\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 11.952049016952515\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 11.934701442718506\n",
+      "yielding 2\n",
+      "24\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_13_48pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_15_35\n",
+      "len(cis) 23\n",
+      "verts torch.Size([368635, 3])\n",
+      "faces torch.Size([688231, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4d4fcf790>\n",
+      "start batch\n",
+      "batch done 12.338163614273071\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 12.208203554153442\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 12.270771026611328\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 12.277809143066406\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 12.202118873596191\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 12.213706254959106\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 12.219764471054077\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 12.080536603927612\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 12.086273193359375\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 11.934801816940308\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 11.870025634765625\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 11.898917198181152\n",
+      "yielding 2\n",
+      "22\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_08_26_16_15_35pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_01_10_52_56\n",
+      "len(cis) 191\n",
+      "verts torch.Size([565536, 3])\n",
+      "faces torch.Size([1013784, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4d4fcf850>\n",
+      "start batch\n",
+      "batch done 18.099862098693848\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 17.983892917633057\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 17.99262285232544\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 17.759662866592407\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 17.754931211471558\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 17.69835138320923\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 17.649794578552246\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 17.669198513031006\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 17.739625215530396\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 17.80592942237854\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 17.91886878013611\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 17.975651502609253\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 18.02666974067688\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 18.06190800666809\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 18.023832082748413\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 17.9787757396698\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 17.94359803199768\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 17.81126093864441\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 17.803245544433594\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 17.850021362304688\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 18.012259006500244\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 18.198020696640015\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 18.198463201522827\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 18.18800687789917\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 18.060357570648193\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 18.071075439453125\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 18.18110680580139\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 18.181074142456055\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 18.191590070724487\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 18.13661789894104\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 18.160101652145386\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 18.107611417770386\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 17.93538498878479\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 17.83904004096985\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 17.729218006134033\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 17.72085928916931\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 17.69325017929077\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 17.71406078338623\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 17.730464935302734\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 17.87321639060974\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 17.973939657211304\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 17.894031047821045\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 17.90035057067871\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 17.998658418655396\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 17.94445252418518\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 17.888272523880005\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 17.82423233985901\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 17.779359340667725\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 17.758953094482422\n",
+      "yielding 2\n",
+      "96\n",
+      "yielding 2\n",
+      "97\n",
+      "start batch\n",
+      "batch done 17.749176263809204\n",
+      "yielding 2\n",
+      "98\n",
+      "yielding 2\n",
+      "99\n",
+      "start batch\n",
+      "batch done 17.78287649154663\n",
+      "yielding 2\n",
+      "100\n",
+      "yielding 2\n",
+      "101\n",
+      "start batch\n",
+      "batch done 17.67760396003723\n",
+      "yielding 2\n",
+      "102\n",
+      "yielding 2\n",
+      "103\n",
+      "start batch\n",
+      "batch done 17.68065333366394\n",
+      "yielding 2\n",
+      "104\n",
+      "yielding 2\n",
+      "105\n",
+      "start batch\n",
+      "batch done 17.605623483657837\n",
+      "yielding 2\n",
+      "106\n",
+      "yielding 2\n",
+      "107\n",
+      "start batch\n",
+      "batch done 17.64267921447754\n",
+      "yielding 2\n",
+      "108\n",
+      "yielding 2\n",
+      "109\n",
+      "start batch\n",
+      "batch done 17.668833255767822\n",
+      "yielding 2\n",
+      "110\n",
+      "yielding 2\n",
+      "111\n",
+      "start batch\n",
+      "batch done 17.617231607437134\n",
+      "yielding 2\n",
+      "112\n",
+      "yielding 2\n",
+      "113\n",
+      "start batch\n",
+      "batch done 17.643184423446655\n",
+      "yielding 2\n",
+      "114\n",
+      "yielding 2\n",
+      "115\n",
+      "start batch\n",
+      "batch done 17.70989680290222\n",
+      "yielding 2\n",
+      "116\n",
+      "yielding 2\n",
+      "117\n",
+      "start batch\n",
+      "batch done 17.8039813041687\n",
+      "yielding 2\n",
+      "118\n",
+      "yielding 2\n",
+      "119\n",
+      "start batch\n",
+      "batch done 17.72907042503357\n",
+      "yielding 2\n",
+      "120\n",
+      "yielding 2\n",
+      "121\n",
+      "start batch\n",
+      "batch done 17.70572018623352\n",
+      "yielding 2\n",
+      "122\n",
+      "yielding 2\n",
+      "123\n",
+      "start batch\n",
+      "batch done 17.655152082443237\n",
+      "yielding 2\n",
+      "124\n",
+      "yielding 2\n",
+      "125\n",
+      "start batch\n",
+      "batch done 17.67472004890442\n",
+      "yielding 2\n",
+      "126\n",
+      "yielding 2\n",
+      "127\n",
+      "start batch\n",
+      "batch done 17.660846710205078\n",
+      "yielding 2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "128\n",
+      "yielding 2\n",
+      "129\n",
+      "start batch\n",
+      "batch done 17.867539167404175\n",
+      "yielding 2\n",
+      "130\n",
+      "yielding 2\n",
+      "131\n",
+      "start batch\n",
+      "batch done 17.657208919525146\n",
+      "yielding 2\n",
+      "132\n",
+      "yielding 2\n",
+      "133\n",
+      "start batch\n",
+      "batch done 17.756351232528687\n",
+      "yielding 2\n",
+      "134\n",
+      "yielding 2\n",
+      "135\n",
+      "start batch\n",
+      "batch done 17.69210982322693\n",
+      "yielding 2\n",
+      "136\n",
+      "yielding 2\n",
+      "137\n",
+      "start batch\n",
+      "batch done 17.62552833557129\n",
+      "yielding 2\n",
+      "138\n",
+      "yielding 2\n",
+      "139\n",
+      "start batch\n",
+      "batch done 17.800055503845215\n",
+      "yielding 2\n",
+      "140\n",
+      "yielding 2\n",
+      "141\n",
+      "start batch\n",
+      "batch done 17.89986801147461\n",
+      "yielding 2\n",
+      "142\n",
+      "yielding 2\n",
+      "143\n",
+      "start batch\n",
+      "batch done 18.036298513412476\n",
+      "yielding 2\n",
+      "144\n",
+      "yielding 2\n",
+      "145\n",
+      "start batch\n",
+      "batch done 18.123709678649902\n",
+      "yielding 2\n",
+      "146\n",
+      "yielding 2\n",
+      "147\n",
+      "start batch\n",
+      "batch done 18.16549277305603\n",
+      "yielding 2\n",
+      "148\n",
+      "yielding 2\n",
+      "149\n",
+      "start batch\n",
+      "batch done 18.09208345413208\n",
+      "yielding 2\n",
+      "150\n",
+      "yielding 2\n",
+      "151\n",
+      "start batch\n",
+      "batch done 18.067283153533936\n",
+      "yielding 2\n",
+      "152\n",
+      "yielding 2\n",
+      "153\n",
+      "start batch\n",
+      "batch done 18.065958976745605\n",
+      "yielding 2\n",
+      "154\n",
+      "yielding 2\n",
+      "155\n",
+      "start batch\n",
+      "batch done 18.052181005477905\n",
+      "yielding 2\n",
+      "156\n",
+      "yielding 2\n",
+      "157\n",
+      "start batch\n",
+      "batch done 18.0349760055542\n",
+      "yielding 2\n",
+      "158\n",
+      "yielding 2\n",
+      "159\n",
+      "start batch\n",
+      "batch done 17.88839817047119\n",
+      "yielding 2\n",
+      "160\n",
+      "yielding 2\n",
+      "161\n",
+      "start batch\n",
+      "batch done 18.04221272468567\n",
+      "yielding 2\n",
+      "162\n",
+      "yielding 2\n",
+      "163\n",
+      "start batch\n",
+      "batch done 17.9801926612854\n",
+      "yielding 2\n",
+      "164\n",
+      "yielding 2\n",
+      "165\n",
+      "start batch\n",
+      "batch done 18.00434923171997\n",
+      "yielding 2\n",
+      "166\n",
+      "yielding 2\n",
+      "167\n",
+      "start batch\n",
+      "batch done 18.189549207687378\n",
+      "yielding 2\n",
+      "168\n",
+      "yielding 2\n",
+      "169\n",
+      "start batch\n",
+      "batch done 18.31538677215576\n",
+      "yielding 2\n",
+      "170\n",
+      "yielding 2\n",
+      "171\n",
+      "start batch\n",
+      "batch done 18.292080402374268\n",
+      "yielding 2\n",
+      "172\n",
+      "yielding 2\n",
+      "173\n",
+      "start batch\n",
+      "batch done 18.158685445785522\n",
+      "yielding 2\n",
+      "174\n",
+      "yielding 2\n",
+      "175\n",
+      "start batch\n",
+      "batch done 18.105260133743286\n",
+      "yielding 2\n",
+      "176\n",
+      "yielding 2\n",
+      "177\n",
+      "start batch\n",
+      "batch done 17.997833013534546\n",
+      "yielding 2\n",
+      "178\n",
+      "yielding 2\n",
+      "179\n",
+      "start batch\n",
+      "batch done 18.119922161102295\n",
+      "yielding 2\n",
+      "180\n",
+      "yielding 2\n",
+      "181\n",
+      "start batch\n",
+      "batch done 18.099710941314697\n",
+      "yielding 2\n",
+      "182\n",
+      "yielding 2\n",
+      "183\n",
+      "start batch\n",
+      "batch done 18.169838428497314\n",
+      "yielding 2\n",
+      "184\n",
+      "yielding 2\n",
+      "185\n",
+      "start batch\n",
+      "batch done 18.291542530059814\n",
+      "yielding 2\n",
+      "186\n",
+      "yielding 2\n",
+      "187\n",
+      "start batch\n",
+      "batch done 18.07632875442505\n",
+      "yielding 2\n",
+      "188\n",
+      "yielding 2\n",
+      "189\n",
+      "start batch\n",
+      "batch done 18.185797214508057\n",
+      "yielding 2\n",
+      "190\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_01_10_52_56pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_52_35\n",
+      "len(cis) 135\n",
+      "verts torch.Size([464084, 3])\n",
+      "faces torch.Size([860624, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb451438d90>\n",
+      "start batch\n",
+      "batch done 15.487832307815552\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 15.331826448440552\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 15.354057312011719\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 15.364320993423462\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 15.289235353469849\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 15.319800853729248\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 15.404713153839111\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 15.425180673599243\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 15.533788204193115\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 15.466375350952148\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 15.454041481018066\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 15.368762254714966\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 15.292244911193848\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 15.28965449333191\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 15.173131227493286\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 15.194873809814453\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 15.125460863113403\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 15.018349647521973\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 15.017354726791382\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 15.018797636032104\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 14.997432947158813\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 14.994354486465454\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 15.08864712715149\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 15.082398653030396\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 15.065136671066284\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 15.136799573898315\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 15.058637142181396\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 15.117002487182617\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 15.208054065704346\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 15.032907724380493\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 14.992950677871704\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 15.034774541854858\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 15.06687617301941\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 15.062359809875488\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 15.153517246246338\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 15.121984481811523\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 15.051688432693481\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 15.2269926071167\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 15.205971240997314\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 15.069361448287964\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 15.013183116912842\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 15.05926775932312\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 15.055979013442993\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 15.02945876121521\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 15.068772792816162\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 15.039256811141968\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 15.064551591873169\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 15.044830083847046\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 15.027093648910522\n",
+      "yielding 2\n",
+      "96\n",
+      "yielding 2\n",
+      "97\n",
+      "start batch\n",
+      "batch done 15.018035650253296\n",
+      "yielding 2\n",
+      "98\n",
+      "yielding 2\n",
+      "99\n",
+      "start batch\n",
+      "batch done 15.06758189201355\n",
+      "yielding 2\n",
+      "100\n",
+      "yielding 2\n",
+      "101\n",
+      "start batch\n",
+      "batch done 15.008366584777832\n",
+      "yielding 2\n",
+      "102\n",
+      "yielding 2\n",
+      "103\n",
+      "start batch\n",
+      "batch done 15.052534580230713\n",
+      "yielding 2\n",
+      "104\n",
+      "yielding 2\n",
+      "105\n",
+      "start batch\n",
+      "batch done 15.070204257965088\n",
+      "yielding 2\n",
+      "106\n",
+      "yielding 2\n",
+      "107\n",
+      "start batch\n",
+      "batch done 15.071048498153687\n",
+      "yielding 2\n",
+      "108\n",
+      "yielding 2\n",
+      "109\n",
+      "start batch\n",
+      "batch done 15.034637689590454\n",
+      "yielding 2\n",
+      "110\n",
+      "yielding 2\n",
+      "111\n",
+      "start batch\n",
+      "batch done 15.053092241287231\n",
+      "yielding 2\n",
+      "112\n",
+      "yielding 2\n",
+      "113\n",
+      "start batch\n",
+      "batch done 15.114135026931763\n",
+      "yielding 2\n",
+      "114\n",
+      "yielding 2\n",
+      "115\n",
+      "start batch\n",
+      "batch done 15.009367942810059\n",
+      "yielding 2\n",
+      "116\n",
+      "yielding 2\n",
+      "117\n",
+      "start batch\n",
+      "batch done 15.054425239562988\n",
+      "yielding 2\n",
+      "118\n",
+      "yielding 2\n",
+      "119\n",
+      "start batch\n",
+      "batch done 15.071231603622437\n",
+      "yielding 2\n",
+      "120\n",
+      "yielding 2\n",
+      "121\n",
+      "start batch\n",
+      "batch done 15.108250141143799\n",
+      "yielding 2\n",
+      "122\n",
+      "yielding 2\n",
+      "123\n",
+      "start batch\n",
+      "batch done 15.101808309555054\n",
+      "yielding 2\n",
+      "124\n",
+      "yielding 2\n",
+      "125\n",
+      "start batch\n",
+      "batch done 15.104489088058472\n",
+      "yielding 2\n",
+      "126\n",
+      "yielding 2\n",
+      "127\n",
+      "start batch\n",
+      "batch done 15.084415674209595\n",
+      "yielding 2\n",
+      "128\n",
+      "yielding 2\n",
+      "129\n",
+      "start batch\n",
+      "batch done 15.045806169509888\n",
+      "yielding 2\n",
+      "130\n",
+      "yielding 2\n",
+      "131\n",
+      "start batch\n",
+      "batch done 15.096894025802612\n",
+      "yielding 2\n",
+      "132\n",
+      "yielding 2\n",
+      "133\n",
+      "start batch\n",
+      "batch done 15.10225534439087\n",
+      "yielding 2\n",
+      "134\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_52_35pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_54_01\n",
+      "len(cis) 107\n",
+      "verts torch.Size([152005, 3])\n",
+      "faces torch.Size([270633, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb4d53a5370>\n",
+      "start batch\n",
+      "batch done 5.0221335887908936\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 4.892514705657959\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 4.8822922706604\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 4.87933611869812\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 4.882117748260498\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 4.888591051101685\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 4.897234916687012\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 4.929947376251221\n",
+      "yielding 2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 4.9071502685546875\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 4.902193546295166\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 4.910208225250244\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 4.886016368865967\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 4.903723955154419\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 4.868772506713867\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 4.867411851882935\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 4.887089014053345\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 4.928569555282593\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 4.94141411781311\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 4.953422784805298\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 4.966710567474365\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 4.962280750274658\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 4.933857202529907\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 4.897865295410156\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 4.887762069702148\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 4.874121427536011\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 4.878381729125977\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 4.896527528762817\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 4.898298263549805\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 4.870667219161987\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 4.900321960449219\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 4.879852533340454\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 4.889668226242065\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 4.89395809173584\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 4.8921873569488525\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 4.925278663635254\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 4.96287202835083\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 4.992512941360474\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 4.926609754562378\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 4.88411021232605\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 4.846395969390869\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 4.858204364776611\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 4.836480379104614\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 4.809831380844116\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 4.839672327041626\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 4.8483312129974365\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 4.8420140743255615\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 4.873597145080566\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 4.852824449539185\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 4.839403390884399\n",
+      "yielding 2\n",
+      "96\n",
+      "yielding 2\n",
+      "97\n",
+      "start batch\n",
+      "batch done 4.844208478927612\n",
+      "yielding 2\n",
+      "98\n",
+      "yielding 2\n",
+      "99\n",
+      "start batch\n",
+      "batch done 4.830946683883667\n",
+      "yielding 2\n",
+      "100\n",
+      "yielding 2\n",
+      "101\n",
+      "start batch\n",
+      "batch done 4.9234230518341064\n",
+      "yielding 2\n",
+      "102\n",
+      "yielding 2\n",
+      "103\n",
+      "start batch\n",
+      "batch done 5.00197696685791\n",
+      "yielding 2\n",
+      "104\n",
+      "yielding 2\n",
+      "105\n",
+      "start batch\n",
+      "batch done 5.026549577713013\n",
+      "yielding 2\n",
+      "106\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_54_01pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_55_51\n",
+      "len(cis) 23\n",
+      "verts torch.Size([129256, 3])\n",
+      "faces torch.Size([232663, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb451441100>\n",
+      "start batch\n",
+      "batch done 4.3848865032196045\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 4.265713214874268\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 4.240416049957275\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 4.144466400146484\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 4.123638868331909\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 4.103830575942993\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 4.109496831893921\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 4.110727787017822\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 4.112897872924805\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 4.166730642318726\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 4.181821346282959\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 4.15244460105896\n",
+      "yielding 2\n",
+      "22\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_55_51pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_56_34\n",
+      "len(cis) 48\n",
+      "verts torch.Size([217815, 3])\n",
+      "faces torch.Size([416642, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb45143fdf0>\n",
+      "start batch\n",
+      "batch done 7.7553136348724365\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 7.617738246917725\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 7.489384889602661\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 7.365813255310059\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 7.405453681945801\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 7.510983228683472\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 7.578561067581177\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 7.508244037628174\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 7.4272871017456055\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 7.42262864112854\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 7.41092324256897\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 7.450767517089844\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 7.5237791538238525\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 7.538700342178345\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 7.527208089828491\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 7.447166681289673\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 7.453516006469727\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 7.476274490356445\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 7.354171991348267\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 7.3653929233551025\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 7.389264345169067\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 7.395469665527344\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n",
+      "43\n",
+      "start batch\n",
+      "batch done 7.428934097290039\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 7.4108967781066895\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_56_34pytorch_rgbd_debug.mp4\n",
+      "\n",
+      "\n",
+      "\n",
+      "/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_58_40\n",
+      "len(cis) 97\n",
+      "verts torch.Size([353282, 3])\n",
+      "faces torch.Size([647309, 3])\n",
+      "mesh <pytorch3d.structures.meshes.Meshes object at 0x7fb45143f8b0>\n",
+      "start batch\n",
+      "batch done 11.507849216461182\n",
+      "yielding 2\n",
+      "0\n",
+      "yielding 2\n",
+      "1\n",
+      "start batch\n",
+      "batch done 11.372364521026611\n",
+      "yielding 2\n",
+      "2\n",
+      "yielding 2\n",
+      "3\n",
+      "start batch\n",
+      "batch done 11.339719772338867\n",
+      "yielding 2\n",
+      "4\n",
+      "yielding 2\n",
+      "5\n",
+      "start batch\n",
+      "batch done 11.453708410263062\n",
+      "yielding 2\n",
+      "6\n",
+      "yielding 2\n",
+      "7\n",
+      "start batch\n",
+      "batch done 11.403046369552612\n",
+      "yielding 2\n",
+      "8\n",
+      "yielding 2\n",
+      "9\n",
+      "start batch\n",
+      "batch done 11.44649362564087\n",
+      "yielding 2\n",
+      "10\n",
+      "yielding 2\n",
+      "11\n",
+      "start batch\n",
+      "batch done 11.418079853057861\n",
+      "yielding 2\n",
+      "12\n",
+      "yielding 2\n",
+      "13\n",
+      "start batch\n",
+      "batch done 11.431937456130981\n",
+      "yielding 2\n",
+      "14\n",
+      "yielding 2\n",
+      "15\n",
+      "start batch\n",
+      "batch done 11.395984649658203\n",
+      "yielding 2\n",
+      "16\n",
+      "yielding 2\n",
+      "17\n",
+      "start batch\n",
+      "batch done 11.37775182723999\n",
+      "yielding 2\n",
+      "18\n",
+      "yielding 2\n",
+      "19\n",
+      "start batch\n",
+      "batch done 11.379046201705933\n",
+      "yielding 2\n",
+      "20\n",
+      "yielding 2\n",
+      "21\n",
+      "start batch\n",
+      "batch done 11.441921710968018\n",
+      "yielding 2\n",
+      "22\n",
+      "yielding 2\n",
+      "23\n",
+      "start batch\n",
+      "batch done 11.39624547958374\n",
+      "yielding 2\n",
+      "24\n",
+      "yielding 2\n",
+      "25\n",
+      "start batch\n",
+      "batch done 11.36753225326538\n",
+      "yielding 2\n",
+      "26\n",
+      "yielding 2\n",
+      "27\n",
+      "start batch\n",
+      "batch done 11.383326530456543\n",
+      "yielding 2\n",
+      "28\n",
+      "yielding 2\n",
+      "29\n",
+      "start batch\n",
+      "batch done 11.324728965759277\n",
+      "yielding 2\n",
+      "30\n",
+      "yielding 2\n",
+      "31\n",
+      "start batch\n",
+      "batch done 11.342352867126465\n",
+      "yielding 2\n",
+      "32\n",
+      "yielding 2\n",
+      "33\n",
+      "start batch\n",
+      "batch done 11.386430263519287\n",
+      "yielding 2\n",
+      "34\n",
+      "yielding 2\n",
+      "35\n",
+      "start batch\n",
+      "batch done 11.340222835540771\n",
+      "yielding 2\n",
+      "36\n",
+      "yielding 2\n",
+      "37\n",
+      "start batch\n",
+      "batch done 11.408394813537598\n",
+      "yielding 2\n",
+      "38\n",
+      "yielding 2\n",
+      "39\n",
+      "start batch\n",
+      "batch done 11.369447708129883\n",
+      "yielding 2\n",
+      "40\n",
+      "yielding 2\n",
+      "41\n",
+      "start batch\n",
+      "batch done 11.31683897972107\n",
+      "yielding 2\n",
+      "42\n",
+      "yielding 2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "43\n",
+      "start batch\n",
+      "batch done 11.322331428527832\n",
+      "yielding 2\n",
+      "44\n",
+      "yielding 2\n",
+      "45\n",
+      "start batch\n",
+      "batch done 11.304376363754272\n",
+      "yielding 2\n",
+      "46\n",
+      "yielding 2\n",
+      "47\n",
+      "start batch\n",
+      "batch done 11.298699617385864\n",
+      "yielding 2\n",
+      "48\n",
+      "yielding 2\n",
+      "49\n",
+      "start batch\n",
+      "batch done 11.363690614700317\n",
+      "yielding 2\n",
+      "50\n",
+      "yielding 2\n",
+      "51\n",
+      "start batch\n",
+      "batch done 11.385607719421387\n",
+      "yielding 2\n",
+      "52\n",
+      "yielding 2\n",
+      "53\n",
+      "start batch\n",
+      "batch done 11.362175941467285\n",
+      "yielding 2\n",
+      "54\n",
+      "yielding 2\n",
+      "55\n",
+      "start batch\n",
+      "batch done 11.423774242401123\n",
+      "yielding 2\n",
+      "56\n",
+      "yielding 2\n",
+      "57\n",
+      "start batch\n",
+      "batch done 11.395496129989624\n",
+      "yielding 2\n",
+      "58\n",
+      "yielding 2\n",
+      "59\n",
+      "start batch\n",
+      "batch done 11.444361209869385\n",
+      "yielding 2\n",
+      "60\n",
+      "yielding 2\n",
+      "61\n",
+      "start batch\n",
+      "batch done 11.42470908164978\n",
+      "yielding 2\n",
+      "62\n",
+      "yielding 2\n",
+      "63\n",
+      "start batch\n",
+      "batch done 11.401618242263794\n",
+      "yielding 2\n",
+      "64\n",
+      "yielding 2\n",
+      "65\n",
+      "start batch\n",
+      "batch done 11.41335678100586\n",
+      "yielding 2\n",
+      "66\n",
+      "yielding 2\n",
+      "67\n",
+      "start batch\n",
+      "batch done 11.333601236343384\n",
+      "yielding 2\n",
+      "68\n",
+      "yielding 2\n",
+      "69\n",
+      "start batch\n",
+      "batch done 11.361776351928711\n",
+      "yielding 2\n",
+      "70\n",
+      "yielding 2\n",
+      "71\n",
+      "start batch\n",
+      "batch done 11.384546995162964\n",
+      "yielding 2\n",
+      "72\n",
+      "yielding 2\n",
+      "73\n",
+      "start batch\n",
+      "batch done 11.424896240234375\n",
+      "yielding 2\n",
+      "74\n",
+      "yielding 2\n",
+      "75\n",
+      "start batch\n",
+      "batch done 11.49638557434082\n",
+      "yielding 2\n",
+      "76\n",
+      "yielding 2\n",
+      "77\n",
+      "start batch\n",
+      "batch done 11.482038497924805\n",
+      "yielding 2\n",
+      "78\n",
+      "yielding 2\n",
+      "79\n",
+      "start batch\n",
+      "batch done 11.4642014503479\n",
+      "yielding 2\n",
+      "80\n",
+      "yielding 2\n",
+      "81\n",
+      "start batch\n",
+      "batch done 11.404814958572388\n",
+      "yielding 2\n",
+      "82\n",
+      "yielding 2\n",
+      "83\n",
+      "start batch\n",
+      "batch done 11.318844318389893\n",
+      "yielding 2\n",
+      "84\n",
+      "yielding 2\n",
+      "85\n",
+      "start batch\n",
+      "batch done 11.439493417739868\n",
+      "yielding 2\n",
+      "86\n",
+      "yielding 2\n",
+      "87\n",
+      "start batch\n",
+      "batch done 11.450179815292358\n",
+      "yielding 2\n",
+      "88\n",
+      "yielding 2\n",
+      "89\n",
+      "start batch\n",
+      "batch done 11.53331470489502\n",
+      "yielding 2\n",
+      "90\n",
+      "yielding 2\n",
+      "91\n",
+      "start batch\n",
+      "batch done 11.57719373703003\n",
+      "yielding 2\n",
+      "92\n",
+      "yielding 2\n",
+      "93\n",
+      "start batch\n",
+      "batch done 11.555033445358276\n",
+      "yielding 2\n",
+      "94\n",
+      "yielding 2\n",
+      "95\n",
+      "start batch\n",
+      "batch done 11.542011976242065\n",
+      "yielding 2\n",
+      "96\n",
+      "done /outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/2021_09_05_11_58_40pytorch_rgbd_debug.mp4\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "ROOT = '/outer_root/media/red14000/Pictures_and_Docs/lidarphone_lidar_scans/'\n",
+    "\n",
+    "for d in sorted(os.listdir(ROOT)):\n",
+    "    if '.DS_Store' in d:\n",
+    "        continue\n",
+    "\n",
+    "    base_dir = os.path.join(ROOT, d)\n",
+    "    if not os.path.isdir(base_dir):\n",
+    "        print('skipping non-dir', base_dir)\n",
+    "        continue\n",
+    "    print()\n",
+    "    print()\n",
+    "    print()\n",
+    "    print(base_dir)\n",
+    "    \n",
+    "\n",
+    "    outpath = os.path.join(ROOT, d + 'pytorch_rgbd_debug.mp4')\n",
+    "    if os.path.exists(outpath):\n",
+    "        print('aleady done', outpath)\n",
+    "        continue\n",
+    "    \n",
+    "    from psegs.datasets import ios_lidar\n",
+    "\n",
+    "\n",
+    "    from oarphpy import util as oputil\n",
+    "    json_paths = oputil.all_files_recursive(base_dir, pattern='frame*.json')\n",
+    "    json_paths = sorted(json_paths)\n",
+    "    \n",
+    "    try:\n",
+    "        cis = [ios_lidar.threeDScannerApp_create_camera_image(p) for p in json_paths]\n",
+    "    except AssertionError as e:\n",
+    "        print('err', e)\n",
+    "        continue\n",
+    "\n",
+    "    print('len(cis)', len(cis))\n",
+    "\n",
+    "    \n",
+    "    mesh_path = os.path.join(base_dir, 'export_refined.obj')\n",
+    "    if not os.path.exists(mesh_path):\n",
+    "        mesh_path = os.path.join(base_dir, 'export.obj')\n",
+    "    \n",
+    "    \n",
+    "    import imageio\n",
+    "    writer = imageio.get_writer(outpath, fps=5)\n",
+    "    \n",
+    "    from psegs.render.mesh2rgbd import pytorch3d_iter_mesh2uvd_for_camera_images\n",
+    "    \n",
+    "    iter_uvds = pytorch3d_iter_mesh2uvd_for_camera_images(cis, mesh_path, batch_size=2)\n",
+    "    for i, (ci, uvd) in enumerate(zip(cis, iter_uvds)):\n",
+    "        debug = ci.image\n",
+    "        from psegs.util.plotting import draw_xy_depth_in_image\n",
+    "        draw_xy_depth_in_image(debug, uvd, period_meters=0.1)\n",
+    "        writer.append_data(debug)\n",
+    "        print(i)\n",
+    "    \n",
+    "    writer.close()\n",
+    "    \n",
+    "    import torch\n",
+    "    torch.cuda.empty_cache()\n",
+    "    \n",
+    "    import gc\n",
+    "    gc.collect()\n",
+    "    \n",
+    "    print('done', outpath)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/pybullet_collisions.ipynb b/notebooks/pybullet_collisions.ipynb
new file mode 100644
index 0000000..67b93a3
--- /dev/null
+++ b/notebooks/pybullet_collisions.ipynb
@@ -0,0 +1,62 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# !pip3 install -U pybullet\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "from psegs.render import pybullet_ttc as pbttc\n",
+    "\n",
+    "ob1 = pbttc.CuboidAgent()\n",
+    "ob2 = pbttc.CuboidAgent()\n",
+    "\n",
+    "sim = pbttc.PyBulletSim(cuboid_agents=[ob1, ob2])\n",
+    "\n",
+    "sim.run(debug_video_out='pybullet_debug_out.mp4')\n",
+    "\n",
+    "from IPython.display import Video\n",
+    "Video('pybullet_debug_out.mp4')\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/semantickitti_to_sd_table.ipynb b/notebooks/semantickitti_to_sd_table.ipynb
new file mode 100644
index 0000000..1ce5092
--- /dev/null
+++ b/notebooks/semantickitti_to_sd_table.ipynb
@@ -0,0 +1,1142 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SemanticKITTI to Stamped Datum Table\n",
+    "\n",
+    "CAN DELETE THIS NOTEBOOK\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# parameters\n",
+    "\n",
+    "# Please follow the instructions posted on the SemanticKITTI website to obtain the data:\n",
+    "# http://www.semantic-kitti.org/dataset.html#download\n",
+    "# Additionally, if you wish to study optical flow, you'll want to expand the KITTI zip\n",
+    "# file `data_odometry_color.zip`.\n",
+    "# Extract the data as described to a directory and paste that directory path here:\n",
+    "SEMANTICKITTI_ROOT = '/outer_root/host_mnt/Volumes/970-evo-raid0/semantickitti_odom_tmp/'\n",
+    "\n",
+    "OUTPUT_ROOT = '/tmp/semantickitti_fused_root/'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found Sequence 00 with 4541 scans\n",
+      "Found Sequence 01 with 1101 scans\n",
+      "Found Sequence 02 with 4661 scans\n",
+      "Found Sequence 03 with 801 scans\n",
+      "Found Sequence 04 with 271 scans\n",
+      "Found Sequence 05 with 2761 scans\n",
+      "Found Sequence 06 with 1101 scans\n",
+      "Found Sequence 07 with 1101 scans\n",
+      "Found Sequence 09 with 1591 scans\n",
+      "Found Sequence 10 with 1201 scans\n",
+      "Found 19130 total scans\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Setup\n",
+    "\n",
+    "import time\n",
+    "import numpy as np\n",
+    "import os\n",
+    "\n",
+    "import open3d as o3d\n",
+    "from oarphpy import util as oputil\n",
+    "\n",
+    "# Deduced from:\n",
+    "# https://github.com/PRBonn/semantic-kitti-api/blob/c2d7712964a9541ed31900c925bf5971be2107c2/auxiliary/SSCDataset.py#L20\n",
+    "SK_SPLIT_SEQUENCES = {\n",
+    "    \"train\": [\"00\", \"01\", \"02\", \"03\", \"04\", \"05\", \"06\", \"07\", \"09\", \"10\"],\n",
+    "    \"valid\": [\"08\"],\n",
+    "    \"test\": [\"11\", \"12\", \"13\", \"14\", \"15\", \"16\", \"17\", \"18\", \"19\", \"20\", \"21\"]\n",
+    "}\n",
+    "\n",
+    "SK_MOVING_LABELS = [\n",
+    "    252, # \"moving-car\"\n",
+    "    253, # \"moving-bicyclist\"\n",
+    "    254, # \"moving-person\"\n",
+    "    255, # \"moving-motorcyclist\"\n",
+    "    256, # \"moving-on-rails\"\n",
+    "    257, # \"moving-bus\"\n",
+    "    258, # \"moving-truck\"\n",
+    "    259, # \"moving-other-vehicle\"\n",
+    "]\n",
+    "\n",
+    "def get_scene_basepath(seq):\n",
+    "    return os.path.join(SEMANTICKITTI_ROOT, 'dataset/sequences', seq)\n",
+    "\n",
+    "SK_SEQ_TO_NSCANS = {}\n",
+    "for seq in SK_SPLIT_SEQUENCES['train']:\n",
+    "    scene_base = get_scene_basepath(seq)\n",
+    "    last_vel = max(os.listdir(os.path.join(scene_base + '/velodyne/')))\n",
+    "    n_scans = int(last_vel.replace('.bin', '')) + 1\n",
+    "    print('Found Sequence %s with %s scans' % (seq, n_scans))\n",
+    "    SK_SEQ_TO_NSCANS[seq] = n_scans\n",
+    "print(\"Found %s total scans\" % sum(SK_SEQ_TO_NSCANS.values()))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import time\n",
+    "# import six\n",
+    "# from contextlib import contextmanager\n",
+    "# class ThruputObserver(object):\n",
+    "#   \"\"\"A utility for measuring the runtime and throughput of a subroutine.\n",
+    "#   Similar in spirit to `tqdm`, except `ThruputObserver`:\n",
+    "#    * Tracks not just time but a size metric (e.g. memory) in bytes\n",
+    "#    * Reports percentiles\n",
+    "#    * Simply logs strings and is not terminal-interactive\n",
+    "  \n",
+    "#   While `tqdm` is useful for notebooks, `ThruputObserver` seeks to be more\n",
+    "#   useful for longer-running batch jobs.\n",
+    "#   \"\"\"\n",
+    "  \n",
+    "#   def __init__(\n",
+    "#       self,\n",
+    "#       name='',\n",
+    "#       log_on_del=False,\n",
+    "#       only_stats=None,\n",
+    "#       log_freq=100,\n",
+    "#       n_total=None,\n",
+    "#       n_total_chunks=None):\n",
+    "#     self.n = 0\n",
+    "#     self.num_bytes = 0\n",
+    "#     self.ts = []\n",
+    "#     self.name = name\n",
+    "#     self.log_on_del = log_on_del\n",
+    "#     self.only_stats = only_stats or []\n",
+    "#     self.n_total = max(n_total, 1) if n_total is not None else None\n",
+    "#     self.n_total_chunks = (\n",
+    "#       max(n_total_chunks, 1) if n_total_chunks is not None else None)\n",
+    "#     self._start = None\n",
+    "#     self.__log_freq = log_freq\n",
+    "#     self.__last_log = 0\n",
+    "  \n",
+    "#   @contextmanager\n",
+    "#   def observe(self, n=0, num_bytes=0):\n",
+    "#     \"\"\"\n",
+    "#     NB: contextmanagers appear to be expensive due to object creation.\n",
+    "#     Use ThurputObserver#{start,stop}_block() for <10ms ops. \n",
+    "#     FMI https://stackoverflow.com/questions/34872535/why-contextmanager-is-slow\n",
+    "#     \"\"\"\n",
+    "\n",
+    "#     self.start_block()\n",
+    "#     yield\n",
+    "#     self.stop_block(n=n, num_bytes=num_bytes)\n",
+    "  \n",
+    "#   def start_block(self):\n",
+    "#     self._start = time.time()\n",
+    "  \n",
+    "#   def update_tallies(self, n=0, num_bytes=0, new_block=False):\n",
+    "#     self.n += n\n",
+    "#     self.num_bytes += num_bytes\n",
+    "#     if new_block:\n",
+    "#       self.stop_block()\n",
+    "#       self.start_block()\n",
+    "  \n",
+    "#   def stop_block(self, n=0, num_bytes=0):\n",
+    "#     end = time.time()\n",
+    "#     self.n += n\n",
+    "#     self.num_bytes += num_bytes\n",
+    "#     if self._start is not None:\n",
+    "#       self.ts.append(end - self._start)\n",
+    "#     self._start = None\n",
+    "  \n",
+    "#   def maybe_log_progress(self, every_n=-1):\n",
+    "#     if every_n >= 0:\n",
+    "#       self.__log_freq = every_n\n",
+    "#     if self.n >= self.__last_log + self.__log_freq:\n",
+    "#       from oarphpy.util import log\n",
+    "#       print(\"Progress for \\n\" + str(self)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+    "#       self.__last_log = self.n\n",
+    "#         # Track last log because `n` may increase inconsistently\n",
+    "#       if every_n == -1 and (self.n >= (1.7 * self.__log_freq)):\n",
+    "#         self.__log_freq = int(1.7 * self.__log_freq)\n",
+    "#           # Exponentially decay logging frequency. Don't decay quite as\n",
+    "#           # fast as Vowpal Wabbit did, though.\n",
+    "\n",
+    "#   @staticmethod\n",
+    "#   def union(thruputs):\n",
+    "#     u = ThruputObserver()\n",
+    "#     for t in thruputs:\n",
+    "#       u += t\n",
+    "#     return u\n",
+    "\n",
+    "#   @property\n",
+    "#   def total_time(self):\n",
+    "#     return sum(self.ts)\n",
+    "\n",
+    "#   def get_stats(self):\n",
+    "#     import numpy as np\n",
+    "#     from humanfriendly import format_size\n",
+    "#     from humanfriendly import format_timespan\n",
+    "\n",
+    "#     total_time = self.total_time\n",
+    "\n",
+    "#     stats = [\n",
+    "#       ('Thruput', ''),\n",
+    "#       ('N thru', (self.n\n",
+    "#                     if self.n_total is None\n",
+    "#                     else '%s (of %s)' % (self.n, self.n_total))),\n",
+    "#       ('N chunks', (len(self.ts)\n",
+    "#                     if self.n_total_chunks is None\n",
+    "#                     else '%s (of %s)' % (len(self.ts), self.n_total_chunks))),\n",
+    "#       ('Total time', format_timespan(total_time) if total_time else '-'),\n",
+    "#       ('Total thru', format_size(self.num_bytes)),\n",
+    "#       ('Rate', \n",
+    "#         format_size(self.num_bytes / total_time) + ' / sec'\n",
+    "#         if total_time else '-'),\n",
+    "#       ('Hz', float(self.n) / total_time if total_time else '-'),\n",
+    "#     ]\n",
+    "#     percent_complete = None\n",
+    "#     if self.n_total is not None:\n",
+    "#       percent_complete = 100. * float(self.n) / self.n_total\n",
+    "#     elif self.n_total_chunks is not None:\n",
+    "#       percent_complete = 100. * float(len(self.ts)) / self.n_total_chunks\n",
+    "#     if percent_complete is not None:\n",
+    "#       eta_sec = (\n",
+    "#         (100. - percent_complete) * \n",
+    "#         (total_time / (percent_complete + 1e-10)))\n",
+    "#       stats.extend([\n",
+    "#         ('Progress', ''),\n",
+    "#         ('Percent Complete', percent_complete),\n",
+    "#         ('Est. Time To Completion', format_timespan(eta_sec)),\n",
+    "#       ])\n",
+    "#     if len(self.ts) >= 2:\n",
+    "#       format_t = lambda t: format_timespan(t, detailed=True)\n",
+    "#       stats.extend([\n",
+    "#         ('Latency (per chunk)', ''),\n",
+    "#         ('Avg', format_t(np.mean(self.ts))),\n",
+    "#         ('p50', format_t(np.percentile(self.ts, 50))),\n",
+    "#         ('p95', format_t(np.percentile(self.ts, 95))),\n",
+    "#         ('p99', format_t(np.percentile(self.ts, 99))),\n",
+    "#       ])\n",
+    "#     if self.only_stats:\n",
+    "#       stats = tuple(\n",
+    "#         (name, value)\n",
+    "#         for name, value in stats\n",
+    "#         if name in self.only_stats\n",
+    "#       )\n",
+    "#     return stats\n",
+    "\n",
+    "#   def __iadd__(self, other):\n",
+    "#     self.n += other.n\n",
+    "#     self.num_bytes += other.num_bytes\n",
+    "#     self.ts.extend(other.ts)\n",
+    "#     return self\n",
+    "\n",
+    "#   def __str__(self):\n",
+    "#     import tabulate\n",
+    "#     stats = self.get_stats()\n",
+    "#     summary = tabulate.tabulate(stats)\n",
+    "#     if self.name:\n",
+    "#       prefix = '%s [Pid:%s Id:%s]' % (self.name, os.getpid(), id(self))\n",
+    "#       summary = prefix + '\\n' + summary\n",
+    "#     return summary\n",
+    "  \n",
+    "#   def __del__(self):\n",
+    "#     if self.log_on_del:\n",
+    "#       self.stop_block()\n",
+    "\n",
+    "#       from oarphpy.util import create_log\n",
+    "#       log = create_log()\n",
+    "#       print('\\n' + str(self) + '\\n') #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+    "  \n",
+    "#   @staticmethod\n",
+    "#   def monitoring_tensor(name, tensor, **observer_init_kwargs):\n",
+    "#     \"\"\"Monitor the size of the given tensorflow `Tensor` and record a\n",
+    "#     text TF Summary with the contents of this ThruputObserver.\"\"\"\n",
+    "\n",
+    "#     class Observer(object):\n",
+    "#       def __init__(self, dtype_size_bytes):\n",
+    "#         self.observer = ThruputObserver(name=name, **observer_init_kwargs)\n",
+    "#         self.dtype_size_bytes = dtype_size_bytes\n",
+    "#       def __call__(self, t_shape):\n",
+    "#         import numpy as np\n",
+    "#         n = t_shape[0]\n",
+    "#         num_bytes = np.prod(t_shape) * self.dtype_size_bytes\n",
+    "#         self.observer.stop_block(n=n, num_bytes=num_bytes)\n",
+    "#         self.observer.maybe_log_progress()\n",
+    "        \n",
+    "#         # Tensorboard is very picky about wanting Markdown :P\n",
+    "#         import tabulatehelper as th\n",
+    "#         stats = self.observer.get_stats()\n",
+    "#         out = th.md_table(stats, headers=[name])\n",
+    "\n",
+    "#         self.observer.start_block()\n",
+    "#         return out\n",
+    "    \n",
+    "#     import tensorflow as tf\n",
+    "#     obs_str_tensor = tf.compat.v1.py_func(\n",
+    "#               Observer(tensor.dtype.size), [tf.shape(tensor)], tf.string)\n",
+    "#     tf.summary.text(name + '/ThruputObserver', obs_str_tensor)\n",
+    "#     return obs_str_tensor\n",
+    "  \n",
+    "#   @staticmethod\n",
+    "#   def wrap_func(func, **observer_init_kwargs):\n",
+    "#     \"\"\"Decorate `func` and observe a block on each call\"\"\"\n",
+    "#     class MonitoredFunc(object):\n",
+    "#       def __init__(self, func, observer_init_kwargs):\n",
+    "#         self.func = func\n",
+    "#         self.observer = ThruputObserver(**observer_init_kwargs)\n",
+    "#       def __call__(self, *args, **kwargs):\n",
+    "#         from oarphpy.util.misc import get_size_of_deep\n",
+    "#         self.observer.start_block()\n",
+    "#         ret = self.func(*args, **kwargs)\n",
+    "#         self.observer.stop_block(n=1, num_bytes=get_size_of_deep(ret))\n",
+    "#         self.observer.maybe_log_progress()\n",
+    "#         return ret\n",
+    "#     return MonitoredFunc(func, observer_init_kwargs)\n",
+    "\n",
+    "#   @staticmethod\n",
+    "#   def monitor_generator(gen, **observer_init_kwargs): #~~~~~~~~~~~~~~~~~~~~\n",
+    "#     observer_init_kwargs['log_on_del'] = True\n",
+    "#     t = ThruputObserver(**observer_init_kwargs)\n",
+    "#     while True:\n",
+    "#         t.start_block()\n",
+    "#         x = six.next(gen)\n",
+    "#         t.stop_block(n=1, num_bytes=oputil.get_size_of_deep(x)) # ~~~~~~~~~~~~~~~~~~~\n",
+    "        \n",
+    "#         yield x\n",
+    "        \n",
+    "#         t.maybe_log_progress()\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Support Code\n",
+    "\n",
+    "def get_calibration(seq):\n",
+    "    scene_base = get_scene_basepath(seq)\n",
+    "    return parse_calibration(os.path.join(scene_base, 'calib.txt'))\n",
+    "\n",
+    "def get_poses(seq):\n",
+    "    scene_base = get_scene_basepath(seq)\n",
+    "    return parse_poses(os.path.join(scene_base, \"poses.txt\"))\n",
+    "    \n",
+    "def parse_calibration(path):\n",
+    "    \"\"\"Parse a calibration file and return a map to 4x4 Numpy matrices.\n",
+    "    Important keys returned:\n",
+    "    * Tr - the lidar to camera static transform\n",
+    "    * P2 - the left camera projective matrix P\n",
+    "    Based upon https://github.com/PRBonn/semantic-kitti-api/blob/9b5feda3b19ea560a298493b9a5ebebe0cbe2cc2/generate_sequential.py#L14\n",
+    "    \"\"\"\n",
+    "    calib = {}\n",
+    "\n",
+    "    with open(path) as f:\n",
+    "        for line in f:\n",
+    "            key, mat_str = line.strip().split(\":\")\n",
+    "            values = [float(v) for v in mat_str.strip().split()]\n",
+    "            mat = np.zeros((4, 4))\n",
+    "            mat[0, 0:4] = values[0:4]\n",
+    "            mat[1, 0:4] = values[4:8]\n",
+    "            mat[2, 0:4] = values[8:12]\n",
+    "            mat[3, 3] = 1.0\n",
+    "            calib[key] = mat\n",
+    "    return calib\n",
+    "\n",
+    "def parse_poses(path):\n",
+    "    \"\"\"Read a SemanticKITTI (per-scan) poses file and return a list of 4x4 homogenous\n",
+    "    RT matrices that express world-to-left-camera transforms.  The index of this list is\n",
+    "    implicitly the scan ID.\n",
+    "    \n",
+    "    Based upon: https://github.com/PRBonn/semantic-kitti-api/blob/9b5feda3b19ea560a298493b9a5ebebe0cbe2cc2/generate_sequential.py#L42\n",
+    "    \"\"\"\n",
+    "    poses = []\n",
+    "    with open(path) as f:\n",
+    "        for line in f:\n",
+    "            values = [float(v) for v in line.strip().split()]\n",
+    "            mat = np.zeros((4, 4))\n",
+    "            mat[0, 0:4] = values[0:4]\n",
+    "            mat[1, 0:4] = values[4:8]\n",
+    "            mat[2, 0:4] = values[8:12]\n",
+    "            mat[3, 3] = 1.0\n",
+    "            poses.append(mat)\n",
+    "    return poses\n",
+    "    \n",
+    "\n",
+    "    \n",
+    "# #     Tr = calib[\"Tr\"]\n",
+    "# #     Tr_inv = np.linalg.inv(Tr)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "#   \"\"\" read poses file with per-scan poses from given filename\n",
+    "#       Returns\n",
+    "#       -------\n",
+    "#       list\n",
+    "#           list of poses as 4x4 numpy arrays.\n",
+    "#   \"\"\"\n",
+    "#   file = open(filename)\n",
+    "\n",
+    "#   poses = []\n",
+    "\n",
+    "#   Tr = calibration[\"Tr\"]\n",
+    "# #   print('Tr', Tr)\n",
+    "# #   Tr = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])\n",
+    "#   Tr_inv = np.linalg.inv(Tr)\n",
+    "\n",
+    "#   for line in file:\n",
+    "#     values = [float(v) for v in line.strip().split()]\n",
+    "\n",
+    "#     pose = np.zeros((4, 4))\n",
+    "#     pose[0, 0:4] = values[0:4]\n",
+    "#     pose[1, 0:4] = values[4:8]\n",
+    "#     pose[2, 0:4] = values[8:12]\n",
+    "#     pose[3, 3] = 1.0\n",
+    "\n",
+    "#     poses.append(np.matmul(Tr_inv, np.matmul(pose, Tr)))\n",
+    "# #     poses.append(np.matmul(pose, Tr))\n",
+    "#   file.close()\n",
+    "#   return poses"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up Spark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-02-11 09:51:40,514\toarph 17241 : Using source root /opt/psegs/psegs \n",
+      "INFO - 2021-02-11 09:51:40,514 - spark - Using source root /opt/psegs/psegs \n",
+      "2021-02-11 09:51:40,516\toarph 17241 : Using source root /opt/psegs \n",
+      "INFO - 2021-02-11 09:51:40,516 - spark - Using source root /opt/psegs \n",
+      "2021-02-11 09:51:40,555\toarph 17241 : Generating egg to /tmp/tmpngjmscf__oarphpy_eggbuild ...\n",
+      "INFO - 2021-02-11 09:51:40,555 - spark - Generating egg to /tmp/tmpngjmscf__oarphpy_eggbuild ...\n",
+      "INFO - 2021-02-11 09:51:40,571 - driver - Generating grammar tables from /usr/lib/python3.8/lib2to3/Grammar.txt\n",
+      "INFO - 2021-02-11 09:51:40,626 - driver - Generating grammar tables from /usr/lib/python3.8/lib2to3/PatternGrammar.txt\n",
+      "2021-02-11 09:51:40,706\toarph 17241 : ... done.  Egg at /tmp/tmpngjmscf__oarphpy_eggbuild/psegs-0.0.0-py3.8.egg\n",
+      "INFO - 2021-02-11 09:51:40,706 - spark - ... done.  Egg at /tmp/tmpngjmscf__oarphpy_eggbuild/psegs-0.0.0-py3.8.egg\n",
+      "INFO - 2021-02-11 09:51:43,209 - kernelextension - Client Connected ('127.0.0.1', 56946)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from oarphpy.spark import NBSpark\n",
+    "NBSpark.SRC_ROOT = '/opt/psegs/psegs'\n",
+    "NBSpark.SRC_ROOT_MODULES = ['psegs']\n",
+    "NBSpark.CONF_KV.update({\n",
+    "    'spark.driver.maxResultSize': '10g',\n",
+    "    'spark.driver.memory': '16g',\n",
+    "  })\n",
+    "# NBSpark.CONF_KV.pop('spark.extraListeners')\n",
+    "spark = NBSpark.getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Fuse World Clouds and Dump Them\n",
+    "\n",
+    "Nota Bene! Excellent large point cloud viewer: \n",
+    "```\n",
+    "docker --context default run -it --name=potree_viewer --rm --net=host -v `pwd`:/shared  jonazpiazu/potree\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# class SingleSequenceWorldCloudFuser(object):\n",
+    "    \n",
+    "#     def __init__(self, seq):\n",
+    "#         self.seq = seq\n",
+    "#         self.scene_base = get_scene_basepath(seq)\n",
+    "        \n",
+    "#         print(\"Loading calibration for sequence %s\" % seq)\n",
+    "#         self.calib = get_calibration(seq)\n",
+    "              \n",
+    "#         print(\"Loading poses for sequence %s\" % seq)\n",
+    "#         self.all_poses = get_poses(seq)\n",
+    "\n",
+    "#     @classmethod\n",
+    "#     def get_moving_mask_for_scan(cls, scene_base, scan_id):\n",
+    "#         scan_name = str(scan_id).rjust(6, '0')\n",
+    "#         labels_path = os.path.join(scene_base, 'labels', scan_name + '.label')\n",
+    "#         labels = np.fromfile(labels_path, dtype=np.uint32)\n",
+    "#         labels = labels.reshape((-1))\n",
+    "#         sem_label = labels & 0xFFFF  # semantic label in lower half\n",
+    "#         inst_label = labels >> 16    # instance id in upper half\n",
+    "#          # NB: 22 / 252 is chase car in scene 08 !!!\n",
+    "        \n",
+    "#         moving_mask = np.logical_or.reduce(tuple((sem_label == c) for c in SK_MOVING_LABELS))\n",
+    "#         return moving_mask\n",
+    "        \n",
+    "#     def read_scan_get_clean_world_cloud(self, scan_id):\n",
+    "#         import numpy as np\n",
+    "\n",
+    "#         scan_name = str(scan_id).rjust(6, '0')\n",
+    "#         scan_path = os.path.join(self.scene_base, 'velodyne', scan_name + '.bin')\n",
+    "#         lidar = np.frombuffer(open(scan_path, 'rb').read(), dtype=np.float32).reshape((-1, 4))\n",
+    "#         cloud = np.ones(lidar.shape)  # need homogenous for change below\n",
+    "#         cloud[:, 0:3] = lidar[:, 0:3]\n",
+    "\n",
+    "#         # Move cloud into the world frame\n",
+    "#         Tr = self.calib[\"Tr\"]\n",
+    "#         Tr_inv = np.linalg.inv(Tr)\n",
+    "#         cam2_pose = self.all_poses[scan_id]\n",
+    "#         pose = np.matmul(Tr_inv, np.matmul(cam2_pose, Tr))  \n",
+    "#         cloud = np.matmul(pose, cloud.T).T\n",
+    "\n",
+    "#         # Clean out points for anything moving\n",
+    "# #         moving_mask = np.logical_or.reduce(tuple((sem_label == c) for c in SK_MOVING_LABELS))\n",
+    "# #         if not moving_mask.any():\n",
+    "# #             frames_no_movers.append(s)\n",
+    "#         moving_mask = self.get_moving_mask_for_scan(self.scene_base, scan_id)\n",
+    "#         static_cloud = cloud[~moving_mask][:, :3]\n",
+    "        \n",
+    "#         # TODO need to scrube the ego car !!  \n",
+    "#         # moving_cloud = cloud[moving_mask][:, :3]\n",
+    "#         return static_cloud\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# for seq, n_scans in sorted(SK_SEQ_TO_NSCANS.items()):\n",
+    "#     print(\"Fusing sequence %s ...\" % seq)\n",
+    "#     fuser = SingleSequenceWorldCloudFuser(seq)\n",
+    "    \n",
+    "#     slices = n_scans // 100\n",
+    "#     task_rdd = spark.sparkContext.parallelize(range(n_scans), numSlices=slices)\n",
+    "#     cloud_rdd = task_rdd.map(lambda s: fuser.read_scan_get_clean_world_cloud(s))\n",
+    "    \n",
+    "#     import pyspark\n",
+    "#     cloud_rdd = cloud_rdd.persist(pyspark.StorageLevel.MEMORY_AND_DISK)\n",
+    "    \n",
+    "    \n",
+    "#     iter_clouds = cloud_rdd.toLocalIterator()#prefetchPartitions=True)):  TODO FIXME USING SPARK 2.4 !!!\n",
+    "#     iter_clouds_t = ThruputObserver.monitor_generator(iter_clouds, n_total=n_scans, log_freq=100)\n",
+    "#     fused_world_cloud = np.vstack(iter_clouds_t)\n",
+    "    \n",
+    "#     print(\"Fused world cloud: {s} ({sz:.2f} GBytes)\".format(\n",
+    "#         s=fused_world_cloud.shape, sz=fused_world_cloud.nbytes * 1e-9))\n",
+    "    \n",
+    "#     fused_world_root = os.path.join(OUTPUT_ROOT, 'fused_world_clouds')\n",
+    "#     oputil.mkdir(fused_world_root)\n",
+    "\n",
+    "#     import pickle\n",
+    "#     path = os.path.join(fused_world_root, \"%s.pkl\" % seq)\n",
+    "#     pickle.dump(fused_world_cloud, open(path, 'wb'), protocol=4)\n",
+    "#     print('Saved fused world cloud pkl to %s' % path)\n",
+    "    \n",
+    "#     pcd = o3d.geometry.PointCloud()\n",
+    "#     pcd.points = o3d.utility.Vector3dVector(fused_world_cloud)\n",
+    "#     path = os.path.join(fused_world_root, \"%s.ply\" % seq)\n",
+    "#     o3d.io.write_point_cloud(path, pcd)\n",
+    "#     print('Saved fused world cloud to %s' % path)\n",
+    "# # #     n_moving_pts = sum(c.shape[0] for c in all_moving_clouds)\n",
+    "# # #     print('moving_cloud pts', n_moving_pts, float(n_moving_pts) / fused_world_cloud.shape[0])\n",
+    "# # #     print('frames_no_movers', frames_no_movers[:20])\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Search for frames with zero moving things"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "# # for seq, n_scans in sorted(SK_SEQ_TO_NSCANS.items()):\n",
+    "# #     print(\"Searching sequence %s ...\" % seq)\n",
+    "    \n",
+    "# #     slices = n_scans // 100\n",
+    "# #     task_rdd = spark.sparkContext.parallelize(range(n_scans), numSlices=slices)\n",
+    "    \n",
+    "# #     scan_has_no_movers = lambda scan_id: (not seq_scan_has_movers(seq, scan_id))\n",
+    "# #     scans_no_movers = task_rdd.filter(scan_has_no_movers).collect()\n",
+    "    \n",
+    "# #     print(\"Sequence %s has %s frames with no moving points ...\" % (seq, len(scans_no_movers)))\n",
+    "\n",
+    "\n",
+    "# import sys\n",
+    "# sys.path.append('/opt/psegs')\n",
+    "\n",
+    "# import copy\n",
+    "# from psegs import datum\n",
+    "# from psegs import util\n",
+    "# from psegs.table.sd_table import StampedDatumTableBase\n",
+    "# class SemanticKITTIFusedSDTable(StampedDatumTableBase):\n",
+    "    \n",
+    "#     ONLY_FRAMES_WITH_NO_MOVERS = True\n",
+    "    \n",
+    "#     import sys\n",
+    "#     sys.path.append('/opt/psegs')\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def _get_all_segment_uris(cls):\n",
+    "#         return [\n",
+    "#             datum.URI(\n",
+    "#                 dataset='semantikitti-psegs-fused',\n",
+    "#                 split='train',\n",
+    "#                 segment_id=str(seq))\n",
+    "#             for seq in SK_SEQ_TO_NSCANS.keys()\n",
+    "#         ]\n",
+    "\n",
+    "#     @classmethod\n",
+    "#     def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):\n",
+    "#         \"\"\"Subclasses should create and return a list of `RDD[StampedDatum]`s\n",
+    "\n",
+    "#         only_segments must be segment uris\n",
+    "#         TODO docs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\"\"\"\n",
+    "        \n",
+    "        \n",
+    "#         assert existing_uri_df is None, \"Resume feature not supported\"\n",
+    "#         seg_uris = cls.get_all_segment_uris()\n",
+    "#         if only_segments:\n",
+    "#             util.log.info(\"Filtering to only %s segments\" % len(only_segments))\n",
+    "#             seg_uris = [\n",
+    "#                 uri for uri in seg_uris\n",
+    "#                 if any(\n",
+    "#                   suri.soft_matches_segment(uri) for suri in only_segments)\n",
+    "#             ]\n",
+    "        \n",
+    "#         datum_rdds = []\n",
+    "#         for seg_uri in seg_uris:\n",
+    "#             seq = seg_uri.segment_id\n",
+    "#             if cls.ONLY_FRAMES_WITH_NO_MOVERS:\n",
+    "#                 util.log.info(\"Finding scans for sequence %s with no movering points ...\" % seq)\n",
+    "#                 n_scans = SK_SEQ_TO_NSCANS[seq]\n",
+    "#                 slices = n_scans // 100\n",
+    "#                 task_rdd = spark.sparkContext.parallelize(range(n_scans), numSlices=slices)\n",
+    "#                 scan_has_no_movers = lambda scan_id: (not seq_scan_has_movers(seq, scan_id))\n",
+    "#                 scans_no_movers = task_rdd.filter(scan_has_no_movers).collect()\n",
+    "#                 util.log.info(\"... sequence %s has %s scans with no movers.\" % (seq, len(scans_no_movers)))\n",
+    "#                 scan_ids = scans_no_movers\n",
+    "#             else:\n",
+    "#                 scan_ids = list(range(SK_SEQ_TO_NSCANS[seq]))\n",
+    "            \n",
+    "            \n",
+    "#             tasks = [(seg_uri, scan_id) for scan_id in scan_ids]\n",
+    "            \n",
+    "#             # Emit camera_image RDD\n",
+    "#             ctask_rdd = spark.sparkContext.parallelize(tasks)\n",
+    "#             datum_rdd = ctask_rdd.map(lambda t: cls.create_camera_frame(*t))\n",
+    "#             datum_rdds.append(datum_rdd)\n",
+    "            \n",
+    "#             # Emit ego_pose RDD\n",
+    "#             ptask_rdd = spark.sparkContext.parallelize(tasks)\n",
+    "#             datum_rdd = ptask_rdd.map(lambda t: cls.create_ego_pose(*t))\n",
+    "#             datum_rdds.append(datum_rdd)\n",
+    "            \n",
+    "#             # Emit world cloud once\n",
+    "#             wc_rdd = spark.sparkContext.parallelize([seg_uri])\n",
+    "#             datum_rdd = wc_rdd.map(lambda t: cls.create_world_cloud(t))\n",
+    "#             datum_rdds.append(datum_rdd)\n",
+    "    \n",
+    "#         return datum_rdds\n",
+    "        \n",
+    "#         # Emit camera and pose RDDs\n",
+    "        \n",
+    "        \n",
+    "#         # for each segment emit camera and ego pose RDDs\n",
+    "#         # for each world cloud emit flyweight\n",
+    "#         # if we had cuboids, we'd emit them and object fused clouds\n",
+    "#         # for the fused stuff, perhaps lazy-create those? and/or require as a\n",
+    "#         # FIXTURES thing.\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def _get_calib(cls, seq):\n",
+    "#         if not hasattr(cls, '_calib'):\n",
+    "#             cls._calib = {}\n",
+    "#         if seq not in cls._calib:\n",
+    "#             cls._calib[seq] = get_calibration(seq)\n",
+    "#         return cls._calib[seq]\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def _get_poses(cls, seq):\n",
+    "#         if not hasattr(cls, '_poses'):\n",
+    "#             cls._poses = {}\n",
+    "#         if seq not in cls._poses:\n",
+    "#             cls._poses[seq] = get_poses(seq)\n",
+    "#         return cls._poses[seq]\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def create_camera_frame(cls, base_uri, scan_id):\n",
+    "#         seq = base_uri.segment_id\n",
+    "#         calib = cls._get_calib(seq)\n",
+    "        \n",
+    "#         uri = copy.deepcopy(base_uri)\n",
+    "#         uri.topic = 'camera|left_rect'\n",
+    "#         uri.timestamp = int(scan_id) # HACK!\n",
+    "\n",
+    "#         scene_base = get_scene_basepath(seq)\n",
+    "#         scan_name = str(scan_id).rjust(6, '0')\n",
+    "#         img_path = os.path.join(scene_base, 'image_2/', scan_name + '.png')\n",
+    "#         assert os.path.exists(img_path), (\n",
+    "#             \"Did you remember to expand data_odometry_color.zip ? %s not found\" % img_path)\n",
+    "#         with open(img_path, 'rb') as f:\n",
+    "#             width, height = util.get_png_wh(f.read(100)) # HACK!!!!\n",
+    "        \n",
+    "#         image_png = util.LazyThunktor(lambda: open(img_path, 'rb').read())\n",
+    "        \n",
+    "#         # HACK!!!  This is actually P !!!\n",
+    "#         K = calib['P2']\n",
+    "        \n",
+    "#         # hack! this is lidar to cam\n",
+    "#         ego_to_sensor = datum.Transform.from_transformation_matrix(\n",
+    "#                 calib['Tr'], src_frame='lidar', dest_frame=uri.topic)\n",
+    "        \n",
+    "#         sd_ego_pose = cls.create_ego_pose(base_uri, scan_id)\n",
+    "#         ego_pose = sd_ego_pose.transform\n",
+    "#         ci = datum.CameraImage(\n",
+    "#               sensor_name=uri.topic,\n",
+    "#               image_png=image_png,\n",
+    "#               width=width,\n",
+    "#               height=height,\n",
+    "#               timestamp=uri.timestamp,\n",
+    "#               ego_pose=ego_pose,\n",
+    "#               K=K,\n",
+    "#               ego_to_sensor=ego_to_sensor,\n",
+    "#               extra={'semantic_kitti.scan_id': str(scan_id)})\n",
+    "#         return datum.StampedDatum(uri=uri, camera_image=ci)\n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def create_ego_pose(cls, base_uri, scan_id):\n",
+    "#         seq = base_uri.segment_id\n",
+    "#         poses = cls._get_poses(seq)\n",
+    "        \n",
+    "#         uri = copy.deepcopy(base_uri)\n",
+    "#         uri.topic = 'ego_pose'\n",
+    "#         uri.timestamp = int(scan_id) # HACK!\n",
+    "        \n",
+    "#         # Hack! believe ego frame is lidar here?\n",
+    "#         ego_pose = datum.Transform.from_transformation_matrix(\n",
+    "#                 poses[scan_id], src_frame='world', dest_frame='ego')\n",
+    "\n",
+    "#         return datum.StampedDatum(uri=uri, transform=ego_pose)      \n",
+    "    \n",
+    "#     @classmethod\n",
+    "#     def create_world_cloud(cls, base_uri):\n",
+    "#         seq = base_uri.segment_id\n",
+    "\n",
+    "#         uri = copy.deepcopy(base_uri)\n",
+    "#         uri.topic = 'lidar|world_fused'\n",
+    "#         uri.timestamp = 0 # HACK!\n",
+    "        \n",
+    "#         cloud_path = os.path.join(OUTPUT_ROOT, 'fused_world_clouds', seq + '.ply')\n",
+    "#         def ply_to_np(path):\n",
+    "#             import open3d\n",
+    "#             pcd = open3d.io.read_point_cloud(str(path))\n",
+    "#             return np.asarray(pcd.points)\n",
+    "#         cloud = util.LazyThunktor(lambda: ply_to_np(cloud_path))\n",
+    "#         pc = datum.PointCloud(\n",
+    "#               sensor_name=uri.topic,\n",
+    "#               timestamp=uri.timestamp,\n",
+    "#               cloud=cloud,\n",
+    "#               ego_to_sensor=datum.Transform(),\n",
+    "#               ego_pose=datum.Transform(),\n",
+    "#               extra={'semantic_kitti.world_cloud_path': cloud_path})\n",
+    "#         return datum.StampedDatum(uri=uri, point_cloud=pc)\n",
+    "\n",
+    "# seg_uris = SemanticKITTIFusedSDTable.get_all_segment_uris()\n",
+    "# sd_rdd = SemanticKITTIFusedSDTable._get_segment_datum_rdd_or_df(spark, seg_uris[0])\n",
+    "# print(sd_rdd.count())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/opt/psegs')\n",
+    "\n",
+    "import copy\n",
+    "\n",
+    "from psegs import datum\n",
+    "from psegs import util\n",
+    "from psegs.table.sd_table import StampedDatumTableBase\n",
+    "\n",
+    "\n",
+    "def get_moving_mask_for_scan(scene_base, scan_id):\n",
+    "    scan_name = str(scan_id).rjust(6, '0')\n",
+    "    labels_path = os.path.join(scene_base, 'labels', scan_name + '.label')\n",
+    "    labels = np.fromfile(labels_path, dtype=np.uint32)\n",
+    "    labels = labels.reshape((-1))\n",
+    "    sem_label = labels & 0xFFFF  # semantic label in lower half\n",
+    "    inst_label = labels >> 16    # instance id in upper half\n",
+    "     # NB: 22 / 252 is chase car in scene 08 !!!\n",
+    "\n",
+    "    moving_mask = np.logical_or.reduce(tuple((sem_label == c) for c in SK_MOVING_LABELS))\n",
+    "    return moving_mask\n",
+    "\n",
+    "def seq_scan_has_movers(seq, scan_id):\n",
+    "    scene_base = get_scene_basepath(seq)\n",
+    "#     moving_mask = SingleSequenceWorldCloudFuser.get_moving_mask_for_scan(scene_base, scan_id)\n",
+    "    moving_mask = get_moving_mask_for_scan(scene_base, scan_id)\n",
+    "    return moving_mask.any()\n",
+    "\n",
+    "def read_scan_get_cloud(seq, scan_id, remove_movers=True, filter_ego=True):\n",
+    "    scan_name = str(scan_id).rjust(6, '0')\n",
+    "    scene_base = get_scene_basepath(seq)\n",
+    "    scan_path = os.path.join(scene_base, 'velodyne', scan_name + '.bin')\n",
+    "\n",
+    "    # Read the raw lidar\n",
+    "    lidar = np.frombuffer(open(scan_path, 'rb').read(), dtype=np.float32).reshape((-1, 4))\n",
+    "    cloud = np.ones(lidar.shape)  # need homogenous for change below\n",
+    "    cloud[:, 0:3] = lidar[:, 0:3]\n",
+    "\n",
+    "    if remove_movers:\n",
+    "        # Clean out points for anything moving\n",
+    "        moving_mask = get_moving_mask_for_scan(scene_base, scan_id)\n",
+    "        cloud = cloud[~moving_mask]#[:, :3]\n",
+    "    \n",
+    "    if filter_ego:\n",
+    "        pass # TODO ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+    "    \n",
+    "    return cloud\n",
+    "\n",
+    "\n",
+    "\n",
+    "class SemanticKITTISDTable(StampedDatumTableBase):\n",
+    "    \n",
+    "    ONLY_FRAMES_WITH_NO_MOVERS = True\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_all_segment_uris(cls):\n",
+    "        return [\n",
+    "            datum.URI(\n",
+    "                dataset='semantikitti-psegs-fused',\n",
+    "                split='train',\n",
+    "                segment_id=str(seq))\n",
+    "            for seq in SK_SEQ_TO_NSCANS.keys()\n",
+    "        ]\n",
+    "\n",
+    "    @classmethod\n",
+    "    def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):\n",
+    "        assert existing_uri_df is None, \"Resume feature not supported\"\n",
+    "        \n",
+    "        seg_uris = cls.get_all_segment_uris()\n",
+    "        if only_segments:\n",
+    "            util.log.info(\"Filtering to only %s segments\" % len(only_segments))\n",
+    "            seg_uris = [\n",
+    "                uri for uri in seg_uris\n",
+    "                if any(\n",
+    "                  suri.soft_matches_segment(uri) for suri in only_segments)\n",
+    "            ]\n",
+    "        \n",
+    "        datum_rdds = []\n",
+    "        for seg_uri in seg_uris:\n",
+    "            seq = seg_uri.segment_id\n",
+    "            if cls.ONLY_FRAMES_WITH_NO_MOVERS:\n",
+    "                util.log.info(\"Finding scans for sequence %s with no movering points ...\" % seq)\n",
+    "                n_scans = SK_SEQ_TO_NSCANS[seq]\n",
+    "                slices = n_scans // 100\n",
+    "                task_rdd = spark.sparkContext.parallelize(range(n_scans), numSlices=slices)\n",
+    "                scan_has_no_movers = lambda scan_id: (not seq_scan_has_movers(seq, scan_id))\n",
+    "                scans_no_movers = task_rdd.filter(scan_has_no_movers).collect()\n",
+    "                util.log.info(\"... sequence %s has %s scans with no movers.\" % (seq, len(scans_no_movers)))\n",
+    "                scan_ids = scans_no_movers\n",
+    "            else:\n",
+    "                scan_ids = list(range(SK_SEQ_TO_NSCANS[seq]))\n",
+    "            \n",
+    "            \n",
+    "            tasks = [(seg_uri, scan_id) for scan_id in scan_ids]\n",
+    "            \n",
+    "            # Emit camera_image RDD\n",
+    "            ctask_rdd = spark.sparkContext.parallelize(tasks)\n",
+    "            datum_rdd = ctask_rdd.map(lambda t: cls.create_camera_frame(*t))\n",
+    "            datum_rdds.append(datum_rdd)\n",
+    "            \n",
+    "            # Emit ego_pose RDD\n",
+    "            ptask_rdd = spark.sparkContext.parallelize(tasks)\n",
+    "            datum_rdd = ptask_rdd.map(lambda t: cls.create_ego_pose(*t))\n",
+    "            datum_rdds.append(datum_rdd)\n",
+    "            \n",
+    "            # Emit velodyne cloud RDD\n",
+    "            pctask_rdd = spark.sparkContext.parallelize(tasks[:100])\n",
+    "            datum_rdd = pctask_rdd.map(lambda t: cls.create_point_cloud_in_world(*t))\n",
+    "            datum_rdds.append(datum_rdd)\n",
+    "#             # Emit world cloud once\n",
+    "#             wc_rdd = spark.sparkContext.parallelize([seg_uri])\n",
+    "#             datum_rdd = wc_rdd.map(lambda t: cls.create_world_cloud(t))\n",
+    "#             datum_rdds.append(datum_rdd)\n",
+    "    \n",
+    "        return datum_rdds\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_calib(cls, seq):\n",
+    "        if not hasattr(cls, '_calib'):\n",
+    "            cls._calib = {}\n",
+    "        if seq not in cls._calib:\n",
+    "            cls._calib[seq] = get_calibration(seq)\n",
+    "        return cls._calib[seq]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_poses(cls, seq):\n",
+    "        if not hasattr(cls, '_poses'):\n",
+    "            cls._poses = {}\n",
+    "        if seq not in cls._poses:\n",
+    "            cls._poses[seq] = get_poses(seq)\n",
+    "        return cls._poses[seq]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def create_camera_frame(cls, base_uri, scan_id):\n",
+    "        seq = base_uri.segment_id\n",
+    "        calib = cls._get_calib(seq)\n",
+    "        \n",
+    "        uri = copy.deepcopy(base_uri)\n",
+    "        uri.topic = 'camera|left_rect'\n",
+    "        uri.timestamp = int(scan_id) # HACK!\n",
+    "\n",
+    "        scene_base = get_scene_basepath(seq)\n",
+    "        scan_name = str(scan_id).rjust(6, '0')\n",
+    "        img_path = os.path.join(scene_base, 'image_2/', scan_name + '.png')\n",
+    "        assert os.path.exists(img_path), (\n",
+    "            \"Did you remember to expand data_odometry_color.zip ? %s not found\" % img_path)\n",
+    "        with open(img_path, 'rb') as f:\n",
+    "            width, height = util.get_png_wh(f.read(100)) # Util only needs the first few bytes\n",
+    "        \n",
+    "        import imageio\n",
+    "        image_factory = lambda: imageio.imread(img_path)\n",
+    "        \n",
+    "        # HACK!!!  This is actually P !!!\n",
+    "        K = calib['P2']\n",
+    "        \n",
+    "        # hack! this is lidar to cam\n",
+    "        ego_to_sensor = datum.Transform.from_transformation_matrix(\n",
+    "                calib['Tr'], src_frame='lidar', dest_frame=uri.topic)\n",
+    "        \n",
+    "        sd_ego_pose = cls.create_ego_pose(base_uri, scan_id)\n",
+    "        ego_pose = sd_ego_pose.transform\n",
+    "        ci = datum.CameraImage(\n",
+    "              sensor_name=uri.topic,\n",
+    "              image_factory=image_factory,\n",
+    "              width=width,\n",
+    "              height=height,\n",
+    "              timestamp=uri.timestamp,\n",
+    "              ego_pose=ego_pose,\n",
+    "              K=K,\n",
+    "              ego_to_sensor=ego_to_sensor,\n",
+    "              extra={'semantic_kitti.scan_id': str(scan_id)})\n",
+    "        return datum.StampedDatum(uri=uri, camera_image=ci)\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def create_ego_pose(cls, base_uri, scan_id):\n",
+    "        seq = base_uri.segment_id\n",
+    "        poses = cls._get_poses(seq)\n",
+    "        \n",
+    "        uri = copy.deepcopy(base_uri)\n",
+    "        uri.topic = 'ego_pose'\n",
+    "        uri.timestamp = int(scan_id) # HACK!\n",
+    "        \n",
+    "        # Hack! believe ego frame is lidar here?\n",
+    "        ego_pose = datum.Transform.from_transformation_matrix(\n",
+    "                poses[scan_id], src_frame='world', dest_frame='ego')\n",
+    "\n",
+    "        return datum.StampedDatum(uri=uri, transform=ego_pose)\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def create_point_cloud_in_world(cls, base_uri, scan_id):\n",
+    "        \n",
+    "        uri = copy.deepcopy(base_uri)\n",
+    "        uri.topic = 'lidar|world' + ('_cleaned' if cls.ONLY_FRAMES_WITH_NO_MOVERS else '')\n",
+    "        uri.timestamp = int(scan_id) # HACK!\n",
+    "        \n",
+    "        sd_ego_pose = cls.create_ego_pose(base_uri, scan_id)\n",
+    "        ego_pose = sd_ego_pose.transform\n",
+    "        \n",
+    "        def _get_cloud(seq, sid):\n",
+    "            cloud = read_scan_get_cloud(\n",
+    "                        seq,\n",
+    "                        sid,\n",
+    "                        remove_movers=cls.ONLY_FRAMES_WITH_NO_MOVERS)\n",
+    "            \n",
+    "            # Move cloud into the world frame\n",
+    "            calib = cls._get_calib(seq)\n",
+    "            all_poses = cls._get_poses(seq)\n",
+    "            Tr = calib[\"Tr\"]\n",
+    "            Tr_inv = np.linalg.inv(Tr)\n",
+    "            cam2_pose = all_poses[sid]\n",
+    "            pose = np.matmul(Tr_inv, np.matmul(cam2_pose, Tr))\n",
+    "            cloud = np.matmul(pose, cloud.T).T\n",
+    "            \n",
+    "            return cloud\n",
+    "\n",
+    "        pc = datum.PointCloud(\n",
+    "          sensor_name=uri.topic,\n",
+    "          timestamp=uri.timestamp,\n",
+    "          cloud_factory=lambda: _get_cloud(base_uri.segment_id, scan_id),\n",
+    "          ego_to_sensor=datum.Transform(), # Hack! cloud is in world frame\n",
+    "          ego_pose=ego_pose,\n",
+    "          extra={'semantic_kitti.scan_id': str(scan_id)})\n",
+    "        return datum.StampedDatum(uri=uri, point_cloud=pc)\n",
+    "        \n",
+    "\n",
+    "seg_uris = SemanticKITTISDTable.get_all_segment_uris()\n",
+    "# sd_rdd = SemanticKITTISDTable._get_segment_datum_rdd_or_df(spark, seg_uris[0])\n",
+    "# print(sd_rdd.count())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-02-11 09:51:44,067\tps   17241 : Filtering to only 1 segments\n",
+      "INFO - 2021-02-11 09:51:44,067 - fused_lidar - Filtering to only 1 segments\n",
+      "2021-02-11 09:51:44,068\tps   17241 : SemanticKITTIFusedWorldCloudTable building fused world clouds ...\n",
+      "INFO - 2021-02-11 09:51:44,068 - fused_lidar - SemanticKITTIFusedWorldCloudTable building fused world clouds ...\n",
+      "2021-02-11 09:51:44,069\tps   17241 : ... have 1 segments to fuse ...\n",
+      "INFO - 2021-02-11 09:51:44,069 - fused_lidar - ... have 1 segments to fuse ...\n",
+      "2021-02-11 09:51:44,070\tps   17241 : ... working on 00 ...\n",
+      "INFO - 2021-02-11 09:51:44,070 - fused_lidar - ... working on 00 ...\n",
+      "2021-02-11 09:51:44,071\tps   17241 : ... have fused cloud; skipping! /opt/psegs/dataroot/fused_world_clouds/naive_cuboid_scrubber/semantikitti-psegs-fused/train/00/fused_world.ply\n",
+      "INFO - 2021-02-11 09:51:44,071 - fused_lidar - ... have fused cloud; skipping! /opt/psegs/dataroot/fused_world_clouds/naive_cuboid_scrubber/semantikitti-psegs-fused/train/00/fused_world.ply\n",
+      "2021-02-11 09:51:44,072\tps   17241 : ... SemanticKITTIFusedWorldCloudTable done fusing clouds.\n",
+      "INFO - 2021-02-11 09:51:44,072 - fused_lidar - ... SemanticKITTIFusedWorldCloudTable done fusing clouds.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psegs.exp.fused_lidar import FusedWorldCloudTableBase\n",
+    "\n",
+    "class SemanticKITTIFusedWorldCloudTable(FusedWorldCloudTableBase):\n",
+    "    SRC_SD_TABLE = SemanticKITTISDTable\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):\n",
+    "        seg_rdd = cls.SRC_SD_TABLE.get_segment_datum_rdd(spark, segment_uri)\n",
+    "        \n",
+    "        # SemanticKITTI has no cuboids, so the Fuser algo simply concats the cloud points\n",
+    "        def iter_task_rows(iter_sds):\n",
+    "            from pyspark import Row\n",
+    "            from oarphpy.spark import RowAdapter\n",
+    "            for sd in iter_sds:\n",
+    "                if sd.point_cloud is not None:\n",
+    "                    pc = sd.point_cloud\n",
+    "                    task_id = \"%s.%s\" % (sd.uri.segment_id, pc.extra['semantic_kitti.scan_id'])\n",
+    "                    yield Row(\n",
+    "                        task_id=task_id,\n",
+    "                        point_clouds=[pc],\n",
+    "                        cuboids=[])\n",
+    "        \n",
+    "        task_rdd = seg_rdd.mapPartitions(iter_task_rows)\n",
+    "        return task_rdd\n",
+    "        \n",
+    "seg_uris = SemanticKITTIFusedWorldCloudTable.get_all_segment_uris()\n",
+    "sd_rdd = SemanticKITTIFusedWorldCloudTable._get_segment_datum_rdd_or_df(spark, seg_uris[0])\n",
+    "print(sd_rdd.count())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/psegs-util b/psegs-util
new file mode 100755
index 0000000..819be77
--- /dev/null
+++ b/psegs-util
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+# vim: tabstop=2 shiftwidth=2 expandtab
+
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+DESC = """
+devtool - This tool serves to both document and automate the PSegs
+development workflow.
+
+## Example
+
+TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``O
+"""
+
+import os
+import subprocess
+import sys
+import tempfile
+
+## Logging
+import logging
+LOG_FORMAT = "%(asctime)s\t%(name)-4s %(process)d : %(message)s"
+log = logging.getLogger("ps")
+log.setLevel(logging.INFO)
+console_handler = logging.StreamHandler(sys.stderr)
+console_handler.setFormatter(logging.Formatter(LOG_FORMAT))
+log.addHandler(console_handler)
+
+PS_DOCKER_REPOSITORY = os.environ.get('PS_DOCKER_REPOSITORY', 'psegs')
+PS_CONTAINER_NAME = os.environ.get('PS_CONTAINER_NAME', 'psegs')
+PS_IMAGE_NAME = os.environ.get('PS_IMAGE_NAME', 'psegs')
+PS_ROOT = os.environ.get(
+  'PS_ROOT', os.path.abspath(os.path.dirname(__file__)))
+
+## Utils 
+
+def get_psegs_version(ps_root):
+  import warnings
+  with warnings.catch_warnings():
+    warnings.filterwarnings("ignore",category=DeprecationWarning)
+    import imp
+      # Dear imp maintainers, please stop renaming things ...
+
+  path = os.path.join(ps_root, 'psegs/__init__.py')
+  m = imp.load_source('_', path)
+  return m.__version__
+
+def run_cmd(cmd):
+  cmd = cmd.replace('\n', '').strip()
+  log.info("Running %s ..." % cmd)
+  subprocess.check_call(cmd, shell=True)
+  log.info("... done with %s " % cmd)
+
+
+class DockerEnv(object):
+  """Handle for a single Dockerized environemt"""
+
+  DOCKERFILE_PATH = os.path.join(PS_ROOT, 'docker', 'Dockerfile')
+  IMAGE_NAME = PS_IMAGE_NAME
+  IMAGE_VERSION = get_psegs_version(PS_ROOT)
+  SRC_ROOT = PS_ROOT
+  DOCKER_REPOSITORY = PS_DOCKER_REPOSITORY
+
+  @classmethod
+  def full_image_name(cls):
+    return "%s/%s:%s" % (
+      cls.DOCKER_REPOSITORY , cls.IMAGE_NAME, cls.IMAGE_VERSION)
+
+  @classmethod
+  def build(cls):
+    image = cls.full_image_name()
+    CMD = """
+      DOCKER_BUILDKIT=1 docker build -t {image} -f {dockerfile} {rootdir}
+    """.format(
+      image=image,
+      dockerfile=cls.DOCKERFILE_PATH,
+      rootdir=cls.SRC_ROOT)
+    run_cmd(CMD)
+
+  @classmethod
+  def push_as_latest(cls):
+    image = cls.full_image_name()
+    latest = image.split(':')[0]
+    toks = image.split(':')
+    toks[-1] = 'latest'
+    latest = ':'.join(toks)
+    run_cmd('docker tag ' + image + ' ' + latest)
+    run_cmd('docker push ' + latest)
+    run_cmd('docker push ' + image)
+
+  @classmethod
+  def start(
+        cls,
+        container_name=PS_CONTAINER_NAME,
+        mnt_local_root=True,
+        include_outer_root=True):
+    image = cls.full_image_name()
+    have_nvidia_docker = False
+    try:
+      run_cmd('nvidia-docker --help > /dev/null')
+      have_nvidia_docker = True
+    except Exception:
+      log.info("Not using nvidia-docker")
+
+    mounts = ''
+    if mnt_local_root:
+      mounts += ' -v `pwd`:/opt/psegs:z'
+    if include_outer_root:
+      mounts += ' -v /:/outer_root'
+
+    docker = 'nvidia-docker' if have_nvidia_docker else 'docker'
+    CMD = """
+      {docker} run
+        --name {container_name}
+        -d -it -P --net=host
+        {mounts}
+          {docker_image} sleep infinity || docker start {container_name} || true
+    """.format(
+          docker=docker,
+          container_name=container_name,
+          mounts=mounts,
+          docker_image=image)
+    run_cmd(CMD)
+
+  @classmethod
+  def shell(cls, container_name=PS_CONTAINER_NAME):
+    cls.start(container_name=container_name)
+    EXEC_CMD = 'docker exec -it %s bash' % container_name
+    os.execvp("docker", EXEC_CMD.split(' '))
+
+  @classmethod
+  def remove(cls, container_name=PS_CONTAINER_NAME):
+    try:
+      run_cmd('docker rm -f %s' % container_name)
+    except Exception:
+      pass
+    log.info("Removed container %s" % container_name)
+
+  @classmethod
+  def run_cmd(
+        cls,
+        cmd,
+        container_name=None,
+        force_build=False,
+        mnt_local_root=True,
+        include_outer_root=True,
+        rm=True):
+    """Run `cmd` in a container, and potentially build the needed docker image
+    if it doesn't exist."""
+    image = cls.full_image_name()
+    
+    have_image = False
+    if not force_build:
+      try:
+        run_cmd('docker image inspect %s > /dev/null' % image)
+        have_image = True
+      except Exception:
+        pass
+
+    if not have_image:
+      log.info("Don't have %s, trying to build ..." % image)
+      cls.build()
+      log.info("... done building.")
+
+    log.info("Using docker image %s" % image)
+
+
+    ### Run `cmd`!
+    if not container_name:
+      container_name = 'psegs-temp'
+
+    cls.start(
+      container_name=container_name,
+      mnt_local_root=mnt_local_root,
+      include_outer_root=include_outer_root)
+    RUN_CMD = 'docker exec -it %s %s' % (container_name, cmd)
+    run_cmd(RUN_CMD)
+
+    if rm:
+      cls.remove(container_name=container_name)
+
+  @classmethod
+  def run_tests(cls):
+    # Always use a clean run
+    cls.remove(container_name='psegs-test')
+    
+    # Test!
+    CMD = 'python3 setup.py test'
+    cls.run_cmd(
+          CMD,
+          container_name='psegs-test',
+          mnt_local_root=False,
+          include_outer_root=False,
+          force_build=True,
+          rm=True)
+
+
+def run_dsutil(dataset):
+  assert os.path.exists('/opt/psegs'), (
+    "Run DSUtil from inside the PSegs dockerized environment.  Haved you run "
+    "psegs-util --shell ?")
+  
+  assert sys.version_info[0] >= 3, \
+    "Python 3 required, try with `python3 ./psegs-util --dsutil=help`"
+
+  # Import PSegs or die
+  sys.path.append(PS_ROOT)
+  try:
+    import psegs
+  except Exception as e:
+    assert False, (
+      "Could not find PSegs, have you run psegs-util --shell? err %s" % (e,))
+  
+  from psegs import dsutil
+  dsutil.run(dataset)
+
+
+def create_arg_parser():
+  import argparse
+  
+  parser = argparse.ArgumentParser(
+                      description=DESC,
+                      formatter_class=argparse.RawDescriptionHelpFormatter)
+
+  # Actions
+  parser.add_argument(
+    '--shell', default=False, action='store_true',
+    help='Drop into a dockerized shell')
+  parser.add_argument(
+    '--shell-rm', default=False, action='store_true',
+    help='Remove the PS dev env container')
+  
+  parser.add_argument(
+    '--build-env', default=False, action='store_true',
+    help='Build the PS docker image')
+  parser.add_argument(
+    '--push-as-latest', default=False, action='store_true',
+    help='Tag Docker images at latest and push them')
+  parser.add_argument(
+    '--test', default=False, action='store_true',
+    help='Run unit tests in the PS docker environment')
+  parser.add_argument(
+    '--docs', default='',
+    help='Generate docs and copy HTML to the given directory')
+  
+  parser.add_argument(
+    '--dsutil',
+    help='Run the PSegs Dataset Util for this dataset '
+         '(to see choices, run with --dsutil=help )')
+
+  return parser
+
+
+## Routines
+
+def main(args=None):
+  if not args:
+    parser = create_arg_parser()
+    args = parser.parse_args()
+  
+  if args.build_env:
+    DockerEnv.build()
+  
+  if args.push_as_latest:
+    DockerEnv.push_as_latest()
+  
+  if args.test:
+    DockerEnv.run_tests()
+      
+  if args.shell:
+    DockerEnv.shell()
+  
+  if args.shell_rm:
+    DockerEnv.remove()
+  
+  if args.docs:
+    DockerEnv.build()
+    DockerEnv.run_cmd("bash -c 'cd docs && make html'")
+    run_cmd("cp -v -r docs/build/html %s/psegs_docs" % args.docs)
+  
+  if args.dsutil:
+    run_dsutil(args.dsutil)
+
+if __name__ == '__main__':
+  main()
+
diff --git a/psegs/__init__.py b/psegs/__init__.py
new file mode 100644
index 0000000..d1981d5
--- /dev/null
+++ b/psegs/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = '0.0.2'
+
diff --git a/psegs/browser.py b/psegs/browser.py
new file mode 100755
index 0000000..68a6284
--- /dev/null
+++ b/psegs/browser.py
@@ -0,0 +1,62 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+BROWSER_DESC = """
+browser - A script (and library) providing basic browsing functionality for
+available PSegs data
+
+## Examples
+
+Show all known segments:
+python3 psegs/browser.py --list-segments
+
+Show all known segments only for 'my-dataset'
+python3 psegs/browser.py --list-segments --dataset=my-dataset
+
+
+"""
+
+from psegs import xform as psx
+
+
+def create_arg_parser():
+  import argparse
+
+  parser = argparse.ArgumentParser(
+                    description=BROWSER_DESC,
+                    formatter_class=argparse.RawDescriptionHelpFormatter)
+
+  parser.add_argument(
+    '--list-segments', default=False, action='store_true',
+    help='List all known segments')
+
+  psx.configure_arg_parser(parser)
+
+  return parser
+
+
+def main(args=None):
+  import pprint
+
+  if args is None:
+    parser = create_arg_parser()
+    args = parser.parse_args()
+  
+  if args.list_segments:
+    seg_uris = psx.get_matching_seg_uris(args)
+    strs = [str(u) for u in seg_uris]
+    pprint.pprint(strs)
+
+if __name__ == '__main__':
+  main()
diff --git a/psegs/cache.py b/psegs/cache.py
new file mode 100644
index 0000000..aca61d9
--- /dev/null
+++ b/psegs/cache.py
@@ -0,0 +1,61 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+
+class LocalDiskCache(object):
+  """Defines the API that PSegs expects to cache clients and provides
+  a simple local adhoc disk cache."""
+
+  TIMELESS_KEY = datetime.datetime.fromtimestamp(0)
+
+  def __init__(self):
+    """Cache clients must have a zero-arg ctor"""
+    pass
+
+  def new_filepath(self, f_relpath, t=None):
+    from psegs.conf import C
+    dest = C.DATA_ROOT / 'psegs_local_disk_cache' / 'adhoc_files' / f_relpath
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    return dest
+
+  def new_dirpath(self, relpath, t=None):
+    from psegs.conf import C
+    dest = C.DATA_ROOT / 'psegs_local_disk_cache' / 'adhoc_dirs' / relpath
+    dest.mkdir(parents=True, exist_ok=True)
+    return dest
+
+  # Methods for temporally global or "timeless" keys
+
+  def new_timeless_filepath(self, fname):
+    return self.new_filepath(fname, t=self.TIMELESS_KEY)
+  
+  def new_timeless_dirpath(self, relpath):
+    return self.new_dirpath(relpath, t=self.TIMELESS_KEY)
+
+
+class AssetDiskCache(LocalDiskCache):
+  # TODO demo how to subclass and plug in your own cache client ....
+
+  def __init__(self, config=None):
+    """TODO get canonical psegs config from somewhere or write to /opt/psegs/psegs_temp / dataroot stuff
+    """
+    self.yay = None
+
+  def new_filepath(self, fname, t=None):
+    raise NotImplementedError
+
+  def new_dirpath(self, relpath, t=None):
+    raise NotImplementedError
+
diff --git a/psegs/conf.py b/psegs/conf.py
new file mode 100644
index 0000000..860d3d8
--- /dev/null
+++ b/psegs/conf.py
@@ -0,0 +1,60 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tempfile
+from pathlib import Path
+
+import attr
+
+DEFAULT_PSEGS_ROOT = Path('/opt/psegs')
+DEFAULT_DATA_ROOT = DEFAULT_PSEGS_ROOT / Path('dataroot')
+DEFAULT_EXTERNAL_TEST_FIXTURES_ROOT = (
+  DEFAULT_DATA_ROOT / Path('external_test_fixtures'))
+DEFAULT_EXT_DATA_ROOT = DEFAULT_PSEGS_ROOT / Path('ext_data')
+DEFAULT_TEMP_DIR = Path(tempfile.gettempdir()) / 'psegs_temp'
+
+@attr.s(eq=True)
+class ProjConf(object):
+  """A singleton that holds project-specific configuration (with defaults
+  sensible for testing)"""
+
+  PS_ROOT = attr.ib(type=Path, default=DEFAULT_PSEGS_ROOT, converter=Path)
+  """Path: root of the PSegs project (Python code & data)"""
+
+  DATA_ROOT = attr.ib(type=Path, default=DEFAULT_DATA_ROOT, converter=Path)
+  """Path: root (perhaps local) for all input and output data."""
+
+  EXTERNAL_TEST_FIXTURES_ROOT = attr.ib(
+    type=Path, default=DEFAULT_EXTERNAL_TEST_FIXTURES_ROOT, converter=Path)
+  """Path: root for externally-hosted test fixtures; these are required for
+  some dataset-specific tests to run."""
+
+  EXT_DATA_ROOT = attr.ib(
+    type=Path, default=DEFAULT_EXT_DATA_ROOT, converter=Path)
+  """Path: root for extention data (e.g. meta-labels mined from standard
+  datasets).  These files are required for some specific features."""
+
+  SD_TABLE_ROOT = attr.ib(
+    type=Path,
+    default=DEFAULT_DATA_ROOT / 'stamped_datum',
+    converter=Path)
+  """Path: store :class:`~psegs.table.sd_table.StampedDatumTableFactory` data
+  table(s) here.  Putting all tables in the same root 'directory' makes it
+  easier to (virtually) concatenate them."""
+
+  PS_TEMP = attr.ib(type=Path, default=DEFAULT_TEMP_DIR, converter=Path)
+  """Path: save PSegs-specific debug files in this temp directory; co-locate
+  them to make introspection and deletion easier."""
+
+C = ProjConf()
diff --git a/psegs/datasets/__init__.py b/psegs/datasets/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/psegs/datasets/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/psegs/datasets/adhoc_pixels.py b/psegs/datasets/adhoc_pixels.py
new file mode 100644
index 0000000..c7d42f8
--- /dev/null
+++ b/psegs/datasets/adhoc_pixels.py
@@ -0,0 +1,1012 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from pathlib import Path
+
+import six
+
+from psegs import datum
+from psegs import util
+from psegs.spark import Spark
+from psegs.cache import AssetDiskCache
+from psegs.util.misc import get_image_wh
+from psegs.util.video import ffmpeg_explode
+from psegs.util.video import VideoMeta
+from psegs.util.video import VideoExplodeParams
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+###############################################################################
+## StampedDatumTableFactory for a Video
+
+
+def read_frame(video_uri, frame_idx):
+  import imageio
+  r = imageio.get_reader(video_uri)
+  return r.get_data(frame_idx)
+
+
+def lazy_load_cached_frame(video_uri, frame_idx, cache_path):
+  import imageio
+  from pathlib import Path
+  cache_path = Path(cache_path)
+  if not cache_path.exists():
+    im = read_frame(video_uri, frame_idx)
+    imageio.imwrite(cache_path, im, compress_level=1) # Fastest
+  return imageio.imread(cache_path)
+
+
+class AdhocVideosSDTFactory(StampedDatumTableFactory):
+  """This `StampedDatumTableFactory` wraps a single video and exposes the
+  video frames as a segment of (lazy-loaded) PSegs `CameraImage`s.  See
+  `create_factory_for_video()` below for quickstart.
+  """
+
+  VIDEO_URI = ''
+  START_NANOSTAMP = 0
+  FRAMES_PER_SECOND = 1
+  N_FRAMES = 0
+  HEIGHT = 0
+  WIDTH = 0
+
+  DATASET = 'anon'
+  SPLIT = 'anon'
+  SEGMENT_ID = 'anon_segment'
+  TOPIC = 'camera_video_adhoc'
+
+  ASSET_CACHE_DIR = None
+
+  @classmethod
+  def create_factory_for_video(
+        cls,
+        video_uri,
+        dataset='anon',
+        split='anon',
+        topic='camera_video_adhoc',
+        segment_id=None,
+        start_timestamp_use='st_mtime',
+        asset_cache_dir='',
+        limit=-1):
+    """Create and return a `StampedDatumTableFactory` class instance
+    for the given video.  We will read part of the video to assess
+    image dimensions, length, etc.
+
+    If `start_timestamp_use` is a string, we will infer timestamps by not
+    just the frame index but by stat-ing the video and using this 
+    stat attribute.
+
+    If `start_timestamp_use` is an integer, we'll use that nanostamp
+    offset instead.
+
+    Use this factory-factory function to produce a factory for your images,
+    and then:
+     * Use `create_sd_table()` to directly get a `StampedDatumTable` with your
+        wrapped `CameraImage` instances.
+     * Register the returned class to a PSegs `UnionFactory` as part of a
+        larger collection of segments.
+      
+
+
+    TODO:
+    for f in `ls /outer_root/media/mai-tank/vids_to_sfm_temp/` ; do echo $f ; \
+      mkdir -p /outer_root/media/970evo_2/vids_to_sfm_temp_expanded2/${f}_expanded ; \
+        cd /outer_root/media/970evo_2/vids_to_sfm_temp_expanded2/${f}_expanded && \
+          ffmpeg -i /outer_root/media/mai-tank/vids_to_sfm_temp/${f} -qscale:v 2 -framerate 5 -vf scale=-1:1024 ffmpeg_extracted_${f}_1024_%9d.jpg
+          cd - ; \
+       done
+
+
+    """
+  
+    import imageio
+
+    F = cls.maybe_load_factory(asset_cache_dir=asset_cache_dir)
+    if F is not None:
+      return F
+
+    if segment_id is None:
+      from urllib.parse import urlparse
+      res = urlparse(str(video_uri))
+      path = res.path
+      fname = Path(path).name
+      segment_id = fname
+
+    r = imageio.get_reader(video_uri)
+    n_frames = r.get_meta_data()['nframes']
+    if n_frames == float('inf'):
+      # For some python / imageio versions, you have to use this API:
+      n_frames = r.count_frames()
+      if n_frames == float('inf'):
+        raise ValueError(
+          "Don't currently support infinite streams: %s %s" % (
+            r.get_meta_data(), video_uri))
+    if limit > 0:
+      n_frames = limit
+    
+    fps = r.get_meta_data()['fps']
+    h, w = r.get_data(0).shape[:2]
+
+    if isinstance(start_timestamp_use, six.string_types):
+      res = Path(video_uri).lstat()
+      start_time_sec = getattr(res, start_timestamp_use)
+      start_time = int(1e9 * start_time_sec)
+    else:
+      start_time = int(start_timestamp_use)
+
+    class MyAdhocVideosSDTFactory(cls):
+      VIDEO_URI = str(video_uri)
+      START_NANOSTAMP = start_time
+      FRAMES_PER_SECOND = fps
+      N_FRAMES = n_frames
+      HEIGHT = h
+      WIDTH = w
+
+      DATASET = dataset
+      SPLIT = split
+      SEGMENT_ID = segment_id
+      TOPIC = topic
+
+      ASSET_CACHE_DIR = Path(asset_cache_dir) if asset_cache_dir else None
+    
+    if asset_cache_dir:
+      MyAdhocVideosSDTFactory.maybe_save_factory()
+
+    return MyAdhocVideosSDTFactory
+
+
+  @classmethod
+  def create_factories_for_videos(
+        cls,
+        root_search_dir,
+        video_extensions=('.mov', '.mp4'),
+        spark=None,
+        **create_factory_kwargs):
+
+    from oarphpy.util.misc import is_stupid_mac_file
+
+    root_search_dir = Path(root_search_dir)
+    video_paths = [
+      p
+      for p in root_search_dir.rglob('**/*')
+      if (
+        not p.is_dir() and
+        not is_stupid_mac_file(p) and
+        any(p.name.lower().endswith(ext) for ext in video_extensions)
+      )
+    ]
+
+    with Spark.sess(spark) as spark:
+      util.log.info(
+        f"AdhocVideosSDTFactory: Creating factories for "
+        f"{len(video_paths)} videos ...")
+      path_rdd = spark.sparkContext.parallelize(
+                        video_paths, numSlices=len(video_paths))
+      def create_factory(p):
+        return cls.create_factory_for_video(p, **create_factory_kwargs)
+      factory_rdd = path_rdd.map(create_factory)
+
+      sdt_factories = factory_rdd.collect()
+      util.log.info("... done.")
+    return sdt_factories
+
+  @classmethod
+  def get_segment_uri(cls):
+    return datum.URI(
+            dataset=cls.DATASET,
+            split=cls.SPLIT,
+            segment_id=cls.SEGMENT_ID)
+
+  @classmethod
+  def get_image_uris(cls):
+    base_uri = cls.get_segment_uri()
+    uris = []
+    for i in range(cls.N_FRAMES):
+      t = int(cls.START_NANOSTAMP + i * (1e9 / cls.FRAMES_PER_SECOND))
+      uri = base_uri.replaced(topic=cls.TOPIC, timestamp=t)
+      uri.extra['AdhocVideosSDTFactory.video_uri'] = str(cls.VIDEO_URI)
+      uri.extra['AdhocVideosSDTFactory.frame_index'] = str(i)
+      uris.append(uri)
+    return uris
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    video_uri = uri.extra['AdhocVideosSDTFactory.video_uri']
+    frame_index = int(uri.extra['AdhocVideosSDTFactory.frame_index'])
+    
+    if cls.ASSET_CACHE_DIR is None:
+      image_factory = lambda: read_frame(video_uri, frame_index)
+    else:
+      frame_fname = '.'.join([uri.topic, str(uri.timestamp)])
+      frame_fname = frame_fname + '.png'
+      frames_dir = cls.ASSET_CACHE_DIR / 'frames'
+      if not frames_dir.exists():
+        frames_dir.mkdir(parents=True, exist_ok=True)
+      lazy_path = frames_dir / frame_fname
+      image_factory = lambda: lazy_load_cached_frame(
+                                  video_uri,
+                                  frame_index,
+                                  lazy_path)
+    
+    ci = datum.CameraImage.create_world_frame_ci(
+          sensor_name=uri.topic,
+          width=cls.WIDTH,
+          height=cls.HEIGHT,
+          timestamp=uri.timestamp,
+          image_factory=image_factory,
+          extra=dict(uri.extra))
+
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+  
+  @classmethod
+  def create_sd_table(cls, spark=None):
+    with Spark.sess(spark) as spark:
+      seg_uri = cls.get_segment_uri()
+      sdt = cls.get_segment_sd_table(seg_uri, spark=spark)
+      return sdt
+
+  @classmethod
+  def maybe_load_factory(cls, asset_cache_dir=''):
+    if not asset_cache_dir:
+      asset_cache_dir = cls.ASSET_CACHE_DIR
+    
+    if asset_cache_dir is None:
+      return None
+
+    asset_cache_dir = Path(asset_cache_dir)
+    table_factory_path = asset_cache_dir / 'psegs_AdhocVideosSDTFactory_df.pkl'
+
+    if not table_factory_path.exists():
+      return None
+    
+    import cloudpickle
+    with open(table_factory_path, 'rb') as f:
+      return cloudpickle.load(f)
+  
+  @classmethod
+  def maybe_save_factory(cls):
+    if cls.ASSET_CACHE_DIR is None:
+      return False
+    
+    cls.ASSET_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    table_factory_path = (
+      cls.ASSET_CACHE_DIR / 'psegs_AdhocVideosSDTFactory_df.pkl')
+    
+    import cloudpickle
+    with open(table_factory_path, 'wb') as f:
+      cloudpickle.dump(cls, f, protocol=3) # Support older python
+    return True
+
+  @classmethod
+  def maybe_build_cache(cls, spark=None):
+    if cls.ASSET_CACHE_DIR is None:
+      return False
+
+    util.log.info(f"Building cache for {cls.__name__} ...")
+    cls.maybe_save_factory()
+    with Spark.sess(spark) as spark:
+      sdt = cls.create_sd_table(spark=spark)
+      
+      datum_rdd = sdt.to_datum_rdd(spark=spark)
+
+      # Try to favor longer-lived python processes
+      from oarphpy.spark import cluster_cpu_count
+      n_cpus = min(cluster_cpu_count(spark), 8)
+      util.log.info('fixme too many cpus doesnt work with ffmpeg')
+      datum_rdd = datum_rdd.repartition(n_cpus)
+
+      util.log.info(
+        f"... exploding video {cls.VIDEO_URI} into {cls.N_FRAMES} images ...")
+      def get_num_pixels(sd):
+        if sd.camera_image:
+          im = sd.camera_image.image
+          h, w = im.shape[:2]
+          return h * w
+        else:
+          return 0
+      total_pixels = datum_rdd.map(get_num_pixels).sum()
+      util.log.info(
+        f"... exploded {1e-9 * total_pixels} total Gigapixels.")
+
+  ## StampedDatumTableFactory Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return [cls.get_segment_uri()]
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    if existing_uri_df is not None:
+      util.log.info(
+        f"Note: resume mode unsupported, got existing_uri_df {existing_uri_df}")
+    
+    if only_segments:
+      has_match = any(
+              suri.soft_matches_segment_of(cls.get_segment_uri())
+              for suri in only_segments)
+      if not has_match:
+        return []
+
+    # Generate URIs ...
+    uris = cls.get_image_uris()
+    uri_rdd = spark.sparkContext.parallelize(uris)
+    util.log.info(f"Creating datums for {len(uris)} frames ...")
+
+    datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+
+    return [datum_rdd]
+
+
+# class DiskCachedFramesVideoSegmentFactory_load_image(object):
+#   __slots__ = ['_path']
+#   def __init__(self, path):
+#     self._path = path
+#   def __call__(self):
+#     import imageio
+#     return imageio.imread(self._path)
+# def DiskCachedFramesVideoSegmentFactory_load_image(path):
+#   import imageio
+#   return imageio.imread(path)
+
+class DiskCachedFramesVideoSegmentFactory(StampedDatumTableFactory):
+  """This `StampedDatumTableFactory` TODO
+  """
+
+  ## Classloader / writer and persisted state
+
+  # Base URI for all datums
+  BASE_URI = None
+  
+  # Extracted video metadata
+  VIDEO_METADATA = None
+
+  # Cache pre-computed `DiskCachedFramesVideoSegmentFactory` here; avoids
+  # having to re-read videos for metadata.  You probably want these
+  # cached on the same disk (e.g. a local disk) that `IMAGE_CACHE_CLS` uses.
+  CLS_CACHE_DIR = None
+
+  # Cache the actual frame image paths after a call to `explode_frames()`
+  EXPLODED_FRAME_PATHS = None
+
+  EXPLODE_PARAMS = None
+
+  IMAGE_CACHE_CLS = None
+
+  AUTO_EXPLODE = True
+
+  @classmethod
+  def _maybe_load_F(cls, uri, cls_cache_dir=None):
+    F_path = cls._cached_cls_path_for_uri(uri, cls_cache_dir=cls_cache_dir)
+    if not F_path.exists():
+      return None
+    import cloudpickle
+    with open(F_path, 'rb') as f:
+      return cloudpickle.load(f)
+  
+  @classmethod
+  def _save_F(cls, F):
+    F_path = F._cached_cls_path_for_uri(
+      F.BASE_URI, cls_cache_dir=F.CLS_CACHE_DIR)
+    F_path.parent.mkdir(parents=True, exist_ok=True)
+    import cloudpickle
+    with open(F_path, 'wb') as f:
+      cloudpickle.dump(F, f, protocol=3) # Support older python
+
+  @classmethod
+  def _needs_explode(cls):
+    return not (
+      cls.EXPLODED_FRAME_PATHS and
+      all(Path(p).exists() for p in cls.EXPLODED_FRAME_PATHS)
+    )
+    
+  ## User API / Factory-Factory API
+
+  DEFAULT_BASE_URI = datum.URI(
+                  dataset='anon',
+                  split='anon',
+                  # NB: leave segment_id blank to deduce it from video_uri
+                  topic='video_camera')
+
+  DEFAULT_EXPODE_PARAMS = VideoExplodeParams()
+
+  @classmethod
+  def create_factory_for_video(
+        cls,
+        video_uri,
+        base_uri=None,
+        explode_params=None,
+        auto_explode=True,
+        start_timestamp_lstat_attr='st_mtime',
+        start_time_nanostamp=None,
+        cls_cache_dir=None,
+        img_cache_cls=None,
+        force_recompute_cls=False,
+        do_cache_factory=True):
+    """Create and return a `StampedDatumTableFactory` class instance for the
+    given `video_uri`.  Use a pre-computed and cached class instance if
+    available (unless `force_recompute_cls`); save the results of this function
+    call to the cache only if `do_cache_factory`.  We will read part of the
+    video to assess image dimensions, length, etc.  Use `explode_frames()` to
+    also decode the entire video and fill the frame caches.
+
+    If `start_timestamp_use` is a string, we will infer timestamps by not
+    just the frame index but by stat-ing the video and using this 
+    stat attribute.
+
+    If `start_timestamp_use` is an integer, we'll use that nanostamp
+    offset instead.
+    """
+  
+    cls_base_uri = base_uri or copy.deepcopy(cls.DEFAULT_BASE_URI)
+    
+    explode_params = explode_params or copy.deepcopy(cls.DEFAULT_EXPODE_PARAMS)
+    topic_suffix = (
+      f"|max_hw_{explode_params.max_hw}"
+      f"|ext_{explode_params.image_file_extension}"
+    )
+    if not cls_base_uri.topic:
+      cls_base_uri.topic = 'video_camera'
+    cls_base_uri.topic = cls_base_uri.topic + topic_suffix    
+    
+    if not cls_base_uri.segment_id:
+      cls_base_uri.segment_id = cls._default_segment_id_for_video_uri(video_uri)
+
+    if not force_recompute_cls:
+      F = cls._maybe_load_F(cls_base_uri, cls_cache_dir=cls_cache_dir)
+      if F is not None:
+        util.log.debug(
+          f"Using cached {F.__name__} for {str(cls_base_uri)}")
+        return F
+
+    util.log.info(
+      "DiskCachedFramesVideoSegmentFactory: Creating video meta for %s" % video_uri)
+    video_meta = VideoMeta.create_for_video(
+      video_uri, lstat_attr=start_timestamp_lstat_attr)
+    if start_time_nanostamp is not None:
+      video_meta.start_time_nanostamp = start_time_nanostamp
+    
+    img_cache_cls = img_cache_cls or cls.DEFAULT_IMAGE_CACHE_CLS
+
+    class MyVideoSDTFactory(cls):
+      BASE_URI = cls_base_uri
+      VIDEO_METADATA = video_meta
+      CLS_CACHE_DIR = cls_cache_dir
+      EXPLODE_PARAMS = explode_params
+      IMAGE_CACHE_CLS = img_cache_cls
+      AUTO_EXPLODE = auto_explode
+
+    if do_cache_factory:
+      util.log.info(
+        f"Saving cached {cls.__name__} for {str(cls_base_uri)}")
+      cls._save_F(MyVideoSDTFactory)
+
+    return MyVideoSDTFactory
+
+  @classmethod
+  def get_segment_uri(cls):
+    # Add path if available, to help --list-and-exit
+    suri = copy.deepcopy(cls.BASE_URI.to_segment_uri())
+    if cls.VIDEO_METADATA is not None:
+      video_uri = cls.VIDEO_METADATA.video_uri
+      suri.extra[cls.__name__ + '.video_uri'] = str(video_uri)
+    return suri
+  
+
+  DEFAULT_IMAGE_CACHE_CLS = AssetDiskCache
+
+  @classmethod
+  def explode_frames(
+        cls,
+        force_recompute=False,
+        do_cache_factory=True,
+        img_cache_now_time=None):
+    
+    if not (force_recompute or cls._needs_explode()):
+      util.log.debug(
+        f"Factory \n{str(cls.BASE_URI)}\n already has "
+                    f"{len(cls.EXPLODED_FRAME_PATHS or [])} exploded frames.")
+      return cls
+      
+    img_cache = cls.IMAGE_CACHE_CLS()
+    cache_dirkey = str(
+      Path('DiskCachedFramesVideoSegmentFactory_root') / 
+      cls._uri_dirkey_for_uri(cls.BASE_URI)
+    )
+    dest_root = img_cache.new_dirpath(cache_dirkey, t=img_cache_now_time)
+
+    util.log.info(
+      f"Factory \n{str(cls.BASE_URI)}\n exploding frames to \n{dest_root} ...")
+    exploded_frame_paths = ffmpeg_explode(
+                              cls.EXPLODE_PARAMS,
+                              cls.VIDEO_METADATA.video_uri,
+                              dest_root)
+    util.log.info("... explode complete!")
+
+    class MyExplodedVideoSDTFactory(cls):
+      EXPLODED_FRAME_PATHS = exploded_frame_paths
+    
+    if do_cache_factory:
+      util.log.info(
+        f"Saving updated cached {MyExplodedVideoSDTFactory.__name__}"
+        f" for {str(MyExplodedVideoSDTFactory.BASE_URI)}")
+      MyExplodedVideoSDTFactory._save_F(MyExplodedVideoSDTFactory)
+
+    return MyExplodedVideoSDTFactory
+    
+  @classmethod
+  def create_sd_table(cls, spark=None):
+    """Create and return a `StampedDatumTable` for just this Factory's video"""
+    with Spark.sess(spark) as spark:
+      seg_uri = cls.get_segment_uri()
+      sdt = cls.get_segment_sd_table(seg_uri, spark=spark)
+      return sdt
+
+  ## StampedDatumTableFactory Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return [cls.get_segment_uri()]
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    if existing_uri_df is not None:
+      util.log.info(
+        f"Note: resume mode unsupported, got an existing_uri_df")
+    
+    if only_segments:
+      has_match = any(
+              suri.soft_matches_segment_of(cls.get_segment_uri())
+              for suri in only_segments)
+      if not has_match:
+        return []
+
+    if cls.AUTO_EXPLODE:
+      if cls._needs_explode():
+        util.log.info(
+          f"Auto-exploding cached images for {cls.__name__}"
+          f" for {str(cls.BASE_URI)} ...")
+        EF = cls.explode_frames()
+        util.log.info("... done auto-exploding!")     
+        return EF._create_datum_rdds(
+          spark,
+          existing_uri_df=existing_uri_df,
+          only_segments=only_segments)
+
+
+    # Generate URIs ...
+    uris = cls._get_image_uris()
+    uri_rdd = spark.sparkContext.parallelize(uris)
+    util.log.info(f"Creating datums for {len(uris)} frames ...")
+
+    datum_rdd = uri_rdd.map(cls._create_stamped_datum)
+
+    return [datum_rdd]
+
+
+  ## Utils
+
+  @classmethod
+  def _default_segment_id_for_video_uri(cls, video_uri):
+    from urllib.parse import urlparse
+    import hashlib
+  
+    res = urlparse(str(video_uri))
+    path = res.path
+    fname = Path(path).name
+
+    uri_hash = hashlib.md5(str(video_uri).encode('utf-8')).hexdigest()
+
+    return f'{fname}_{uri_hash[:10]}'
+
+  @classmethod
+  def _uri_dirkey_for_uri(cls, uri):
+    key = Path(uri.dataset) / uri.split / uri.segment_id / uri.topic
+    return str(key)
+
+  @classmethod
+  def _cached_cls_path_for_uri(cls, uri, cls_cache_dir=None):
+    cls_cache_dir = cls_cache_dir or cls.CLS_CACHE_DIR
+    if not cls_cache_dir:
+      from psegs.conf import C
+      cls_cache_dir = C.DATA_ROOT / 'DiskCachedFramesVideoSegmentFactory_cache'
+      
+    cls_cache_dir = Path(cls_cache_dir)
+    
+    util.log.debug(f"{cls.__name__} using cls_cache_dir {cls_cache_dir} ...")
+    cls_cached_path = (
+      cls_cache_dir / 
+      cls._uri_dirkey_for_uri(uri) /
+      'DiskCachedFramesVideoSegmentFactory_cls.cpkl')
+    
+    return cls_cached_path
+  
+  @classmethod
+  def _get_uri_extra(cls):
+    import urllib.parse
+    
+    vm = cls.VIDEO_METADATA
+    extra = dict(
+      video_uri=urllib.parse.quote_plus(str(vm.video_uri)),
+      start_time_nanostamp=vm.start_time_nanostamp,
+      frames_per_second=vm.frames_per_second,
+      n_frames=vm.n_frames,
+      is_10bit_hdr=vm.is_10bit_hdr,
+    )
+    prefix = 'DiskCachedFramesVideoSegmentFactory.'
+    return dict((prefix + k, str(v)) for k, v in extra.items())
+
+  # @classmethod
+  # def _ffmpeg_asplode_frames(
+  #         cls,
+  #         video_meta,
+  #         dest_root,
+  #         max_hw=-1,
+  #         file_extension='png',
+  #         jpeg_quality=2):
+    
+  #   import math
+  #   from oarphpy import util as oputil
+    
+  #   video_path = Path(video_meta.video_uri).resolve()
+
+  #   rescale_arg = ''
+  #   if max_hw >= 0:
+  #     rescale_arg = (
+  #       f"-vf 'scale=if(gte(iw\,ih)\,min({max_hw}\,iw)\,-2):if(lt(iw\,ih)\,min({max_hw}\,ih)\,-2)' "
+  #     )
+  #   qscale_arg = ''
+  #   if file_extension == 'jpg':
+  #     qscale_arg = f" -qscale {jpeg_quality} "
+
+  #   zfill = int(math.log10(video_meta.n_frames)) + 1
+
+  #   FFMPEG_CMD = f"""
+  #     cd {dest_root} && \
+  #     ffmpeg \
+  #       -y \
+  #       -noautorotate \
+  #       -vframes {video_meta.n_frames} \
+  #       -i {video_path} \
+  #       {rescale_arg} \
+  #       -vsync 0 \
+  #       {qscale_arg} \
+  #         DiskCachedFramesVideoSegmentFactory_frame_%0{zfill}d.{file_extension}
+  #   """
+  #   oputil.run_cmd(FFMPEG_CMD)
+
+  #   paths = sorted(
+  #     Path(p)
+  #     for p in oputil.all_files_recursive(
+  #       dest_root, 
+  #       pattern='DiskCachedFramesVideoSegmentFactory_frame_*'))
+  #   return paths
+
+  @classmethod
+  def _get_image_uris(cls):
+    base_uri = cls.BASE_URI
+    vm = cls.VIDEO_METADATA
+    uris = []
+    frame_paths = cls.EXPLODED_FRAME_PATHS or []
+    n_frames_exploded = len(frame_paths)
+    for i, frame_path in enumerate(frame_paths):
+      t = int(
+        vm.start_time_nanostamp + i * (1e9 / float(vm.frames_per_second)))
+      uri = base_uri.replaced(timestamp=t)
+      uri.extra.update(cls._get_uri_extra())
+      uri.extra['DiskCachedFramesVideoSegmentFactory.frame_path'] = str(frame_path)
+      uri.extra['DiskCachedFramesVideoSegmentFactory.frame_index'] = str(i)
+      uri.extra['DiskCachedFramesVideoSegmentFactory.n_frames_exploded'] = str(n_frames_exploded)
+      uris.append(uri)
+    return uris
+
+  @classmethod
+  def _create_stamped_datum(cls, uri):
+    assert cls.EXPLODED_FRAME_PATHS, \
+      (f"User must call explode_frames() before realizing a "
+       f"StampedDatumTable, {cls} {cls.VIDEO_METADATA}")
+
+    frame_idx = uri.extra['DiskCachedFramesVideoSegmentFactory.frame_index']
+    frame_idx = int(frame_idx)
+    n_frames_exploded = uri.extra['DiskCachedFramesVideoSegmentFactory.n_frames_exploded']
+    n_frames_exploded = int(n_frames_exploded)
+
+    frame_path = uri.extra['DiskCachedFramesVideoSegmentFactory.frame_path']
+
+    assert Path(frame_path).exists(), frame_path
+    extra = dict(uri.extra)
+    extra['DiskCachedFramesVideoSegmentFactory.frame_path'] = str(frame_path)
+
+    # image_factory = (
+    #   lambda: DiskCachedFramesVideoSegmentFactory_load_image(frame_path))
+    # image_factory = DiskCachedFramesVideoSegmentFactory_load_image(frame_path)
+
+    def _load_image(path=None):
+      import imageio
+      return imageio.imread(path)
+    image_factory = lambda: _load_image(path=frame_path)
+
+    vm = cls.VIDEO_METADATA
+    # w, h = vm.width, vm.height
+    # if cls.EXPLODE_PARAMS.max_hw >= 0:
+    #   if w > cls.EXPLODE_PARAMS.max_hw or h > cls.EXPLODE_PARAMS.max_hw:
+    w, h = get_image_wh(frame_path)
+    
+    # All we know for sure is the frame index, so include a refined estimated
+    # frame timestamp and context used for that estimate.  This estimate could
+    # be WRONG tho if there are lots of dropped frames and the video metadata
+    # is actually correct.
+    extra['DiskCachedFramesVideoSegmentFactory.VideoMeta.start_time_nanostamp'] = str(vm.start_time_nanostamp)
+    extra['DiskCachedFramesVideoSegmentFactory.VideoMeta.n_frames'] = str(vm.n_frames)
+    extra['DiskCachedFramesVideoSegmentFactory.VideoMeta.frames_per_second'] = str(vm.frames_per_second)
+    extra['DiskCachedFramesVideoSegmentFactory.VideoMeta.end_time_nanostamp'] = str(vm.end_time_nanostamp)
+
+    duration_ns = vm.end_time_nanostamp - vm.start_time_nanostamp
+    ns_per_frame = duration_ns / float(n_frames_exploded)
+    estimated_frame_nanostamp = frame_idx * ns_per_frame
+    extra['DiskCachedFramesVideoSegmentFactory.estimated_frame_nanostamp'] = str(estimated_frame_nanostamp)
+
+    ci = datum.CameraImage.create_world_frame_ci(
+          sensor_name=uri.topic,
+          width=w,
+          height=h,
+          timestamp=uri.timestamp,
+          image_factory=image_factory,
+          extra=extra)
+
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+
+  
+
+
+
+###############################################################################
+## StampedDatumTableFactory for an Image Collection
+
+def load_image(path):
+  # Defined at package-level for easier serialization
+  import imageio
+  return imageio.imread(path)
+
+
+def video_to_pngs(video_uri, out_dir, preserve_mtime=True):
+  import math
+  import imageio
+  from tqdm import tqdm
+
+  out_dir = Path(out_dir)
+  out_dir.mkdir(parents=True, exist_ok=True)
+
+  r = imageio.get_reader(video_uri)
+  n_frames = r.count_frames()
+  n_zfill = int(math.log10(n_frames)) + 1
+
+  for i in tqdm(range(n_frames)):
+    im = r.get_data(i)
+    dest = out_dir / (f'frame_%s.png' % str(i).zfill(n_zfill))
+    imageio.imwrite(dest, im, compress_level=1) # Fastest
+
+
+class AdhocImagePathsSDTFactory(StampedDatumTableFactory):
+  """This `StampedDatumTableFactory` wraps a single collection of image paths
+  and exposes them as a segment of PSegs `CameraImage`s.  See
+  `create_factory_for_images()` below for quickstart.
+  """
+
+  IMAGE_PATHS = []
+  IMAGE_TIMESTAMPS = None
+    # Defaults to index in IMAGE_PATHS
+
+  DATASET = 'anon'
+  SPLIT = 'anon'
+  SEGMENT_ID = 'anon_segment'
+  TOPIC = 'camera_adhoc'
+
+  # For smaller datasets, use a smaller number of partitions based upon the
+  # size of `IMAGE_PATHS`
+  SPARK_AUTO_REPARTITION = True
+
+  @classmethod
+  def create_factory_for_images(
+        cls,
+        images_dir=None,
+        image_exts=('.jpg', '.png'),
+        image_paths=None,
+        dataset='anon',
+        split='anon',
+        topic='camera_adhoc',
+        segment_id=None,
+        timestamp_use='st_mtime',
+        limit=-1):
+    """Create and return a `StampedDatumTableFactory` class instance
+    for the given set of images.  Provide either a directory of images
+    `images_dir` and chosen file extensions (case-insensitive) `image_exts`
+    OR a list of paths `image_paths`.
+
+    If `timestamp_use` is not null, we will infer timestamps by stat-ing the
+    files and using the given stat attribute.  We may induce "fake" timestamps
+    (off by a few nanoseconds) if many files have the same timestamp.  
+    Alternatively, you can provide a list of nanostamps `timestamp_use` to use.
+
+    Use this factory-factory function to produce a factory for your images,
+    and then:
+     * Use `create_sd_table()` to directly get a `StampedDatumTable` with your
+        wrapped `CameraImage` instances.
+     * Register the returned class to a PSegs `UnionFactory` as part of a
+        larger collection of segments.
+    """
+  
+    assert image_paths is not None or images_dir is not None
+
+    if images_dir is not None:
+      images_dir = Path(images_dir)
+      image_paths = sorted(
+        p for p in images_dir.iterdir() if p.suffix.lower() in image_exts)
+      if segment_id is None:
+        segment_id = images_dir.name
+    else:
+      image_paths = [Path(p) for p in image_paths]
+    
+    if segment_id is None:
+      if images_dir is not None:
+        segment_id = images_dir.name
+      else:
+        assert len(image_paths) > 0
+        segment_id = image_paths[0].parent.name
+
+    if limit > 0:
+      image_paths = image_paths[:limit]
+
+    image_timestamps = None
+    if isinstance(timestamp_use, six.string_types):
+      def get_nanostamp(path):
+        res = path.lstat()
+        t_sec = getattr(res, timestamp_use)
+        return int(t_sec * 1e9)
+
+      image_timestamps = [
+        get_nanostamp(p) for p in image_paths
+      ]
+
+      # Ensure we don't induce any timestamp collisions.  All images are
+      # distinct, so they should have distinct timestamps.  If the mitigation
+      # below 
+      t_to_count = {}
+      distinct_t = []
+      for t in image_timestamps:
+        if t not in t_to_count:
+          distinct_t.append(t)
+          t_to_count[t] = 1
+        else:
+          distinct_t.append(t + t_to_count[t])
+            # Add a few nanos to make distinct
+          t_to_count[t] += 1
+      image_timestamps = distinct_t
+
+    elif timestamp_use is not None:
+      image_timestamps = list(timestamp_use)
+
+    class MyAdhocImagePathsSDTFactory(cls):
+      IMAGE_PATHS = image_paths
+      IMAGE_TIMESTAMPS = image_timestamps
+      DATASET = dataset
+      SPLIT = split
+      SEGMENT_ID = segment_id
+      TOPIC = topic
+    
+    return MyAdhocImagePathsSDTFactory
+
+  @classmethod
+  def get_segment_uri(cls):
+    return datum.URI(
+            dataset=cls.DATASET,
+            split=cls.SPLIT,
+            segment_id=cls.SEGMENT_ID)
+
+  @classmethod
+  def get_image_uris(cls):
+    base_uri = cls.get_segment_uri()
+    uris = []
+    for i, p in enumerate(cls.IMAGE_PATHS):
+      if cls.IMAGE_TIMESTAMPS is None:
+        t = i + 1
+      else:
+        t = cls.IMAGE_TIMESTAMPS[i]
+
+      uri = base_uri.replaced(topic=cls.TOPIC, timestamp=t)
+      uri.extra['AdhocImagePathsSDTFactory.image_path'] = str(p)
+      uris.append(uri)
+    return uris
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    img_path = Path(uri.extra['AdhocImagePathsSDTFactory.image_path'])
+    w, h = get_image_wh(img_path)
+    
+    ci = datum.CameraImage.create_world_frame_ci(
+          sensor_name=uri.topic,
+          width=w,
+          height=h,
+          timestamp=uri.timestamp,
+          image_factory=lambda: load_image(img_path),
+          extra=dict(uri.extra))
+
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+  
+  @classmethod
+  def create_sd_table(cls, spark=None):
+    with Spark.sess(spark) as spark:
+      seg_uri = cls.get_segment_uri()
+      sdt = cls.get_segment_sd_table(seg_uri, spark=spark)
+      return sdt
+
+
+  ## StampedDatumTableFactory Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return [cls.get_segment_uri()]
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    if existing_uri_df is not None:
+      util.log.info(
+        f"Note: resume mode unsupported, got existing_uri_df {existing_uri_df}")
+    
+    if only_segments:
+      has_match = any(
+              suri.soft_matches_segment_of(cls.get_segment_uri())
+              for suri in only_segments)
+      if not has_match:
+        return []
+
+    # Generate URIs ...
+    uris = cls.get_image_uris()
+    num_slices = None
+    if cls.SPARK_AUTO_REPARTITION:
+      from oarphpy.spark import cluster_cpu_count
+      import math
+      n_cpus = cluster_cpu_count(spark)
+      if len(uris) < n_cpus:
+        num_slices = max(1, int(math.log(len(uris) or 1)))
+
+    # ... now create RDD
+    uri_rdd = spark.sparkContext.parallelize(uris, numSlices=num_slices)
+    util.log.info(f"Creating datums for {len(uris)} images ...")
+
+    datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+    return [datum_rdd]
+
+
+###############################################################################
+## Adhoc Directory Tree -> UnionFactory of Adhoc{ImagePaths,Video}SDTFactories
+
+"""
+video file -> segment
+directory of images -> assume want sub-segment
+
+future: directory of images for dedicated segment (e.g. camera after iphone)
+
+"""
+
+
+
+if __name__ == '__main__':
+  F = AdhocVideosSDTFactory.create_factory_for_video(
+        '/outer_root/media/970-evo-plus-raid0/iphone_vids_to_sfm/lidar_hero10/image_capture_continuous/GX010018.MP4',
+        asset_cache_dir='/outer_root/media/970-evo-plus-raid0/iphone_vids_to_sfm/vids_to_sfm/lidar_hero10_winter_stinsin_GX010018.MP4_cache')
+  F.maybe_build_cache()
+
+  # video_to_pngs(
+  #   '/outer_root/media/970-evo-plus-raid0/iphone_vids_to_sfm/vids_to_sfm/dubs-gym-bluetiful-subie-lidar-comparison.MOV',
+  #   '/outer_root/media/970-evo-plus-raid0/hloc_out/anon.anon.dubs-gym-bluetiful-subie-lidar-comparison.MOV/images/')
diff --git a/psegs/datasets/colmap.py b/psegs/datasets/colmap.py
new file mode 100644
index 0000000..08d04bd
--- /dev/null
+++ b/psegs/datasets/colmap.py
@@ -0,0 +1,914 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+# This module helps convert COLMAP reconstructions to PSegs segments.
+# pycolmap is a soft dependency for this module
+# To enable: `pip3 install pycolmap>=0.1.0``
+
+from psegs import util
+try:
+  import pycolmap
+except ImportError:
+  util.log.error("This module requires pycolmap>=0.1.0")
+
+
+import copy
+import json
+import itertools
+from pathlib import Path
+from tqdm.auto import tqdm
+
+import cv2
+import numpy as np
+
+from psegs import datum
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+def _find_image_record(image_name, recon, err_msg=''):
+  # pycolmap does not provide a map, so we need to do a linear find
+  iinfo = None
+  iid = -1
+  for ciid, cinfo in recon.images.items():
+    if cinfo.name == image_name:
+      iinfo = cinfo
+      iid = ciid
+  if iinfo is None:
+    err_msg = err_msg or f"Could not find image_name {image_name}"
+    raise KeyError(err_msg)
+  return iinfo, iid
+
+
+def colmap_recon_create_world_cloud(
+        recon_dir,
+        sensor_name='colmap_sparse'):
+  """Given a COLMAP reconstruction assets directory `recon_dir`, extract the
+  sparse model 3D point cloud as a single `PointCloud` in the world frame.
+  """
+
+  def _get_cloud(recon_dir):
+
+    recon = pycolmap.Reconstruction(recon_dir)
+    ptid_to_info = recon.points3D
+
+    xyzrgbErrViz = np.zeros((len(ptid_to_info), 8), dtype='float')
+    for i, (ptid, info) in enumerate(sorted(ptid_to_info.items())):
+      xyzrgbErrViz[i, :3] = info.xyz
+      xyzrgbErrViz[i, 3:6] = info.color
+      xyzrgbErrViz[i, 6] = info.error
+      xyzrgbErrViz[i, 7] = info.track.length()
+    return xyzrgbErrViz
+  
+  # The points are in the world frame; provide identity transform(s)
+  pc = datum.PointCloud.create_world_frame_cloud(
+          sensor_name=sensor_name,
+          cloud_factory=lambda: _get_cloud(recon_dir),
+          cloud_colnames=[
+            'x', 'y', 'z',
+            'r', 'g', 'b',
+            'colmap_err', 'num_views_visible',
+          ])
+  return pc
+
+
+def colmap_get_intrinsics(camera):
+  if len(camera.params) < 4:
+    # Probably SIMPLE_PINHOLE
+    # FMI https://github.com/colmap/colmap/blob/9f3a75ae9c72188244f2403eb085e51ecf4397a8/scripts/python/visualize_model.py#L88
+    fx, cx, cy = camera.params[:4]  
+    fy = fx
+  else:
+    fx, fy, cx, cy = camera.params[:4]
+  
+  K = np.array([
+        [fx,  0, cx],
+        [0,  fy, cy],
+        [0,   0,  1],
+  ])
+  
+  camera_model = None
+  if hasattr(camera, 'model'):
+    camera_model = camera.model
+  elif hasattr(camera, 'model_name'):
+    # pycolmap >= 0.4.0
+    camera_model = camera.model_name
+  assert camera_model is not None
+
+  distortion_kv = {}
+  distortion_model = f'colmap_camera.model={camera_model}'
+  if camera_model == 'OPENCV':
+    distortion_model = 'OPENCV'
+    distortion_kv = {
+      'k1': float(camera.params[4]),
+      'k2': float(camera.params[5]),
+      'p1': float(camera.params[6]),
+      'p2': float(camera.params[7]),
+    }
+  elif camera_model == 'FULL_OPENCV':
+    distortion_model = 'FULL_OPENCV'
+    distortion_kv = {
+      'k1': float(camera.params[4]),
+      'k2': float(camera.params[5]),
+      'p1': float(camera.params[6]),
+      'p2': float(camera.params[7]),
+      'k3': float(camera.params[8]),
+      'k4': float(camera.params[9]),
+      'k5': float(camera.params[10]),
+      'k6': float(camera.params[11]),
+    }
+  elif camera_model == 'OPENCV_FISHEYE':
+    distortion_model = 'OPENCV_FISHEYE'
+    distortion_kv = {
+      'k1': float(camera.params[4]),
+      'k2': float(camera.params[5]),
+      'k3': float(camera.params[6]),
+      'k4': float(camera.params[7]),
+    }
+
+  h = camera.height
+  w = camera.width
+
+  return K, h, w, camera_model, distortion_model, distortion_kv
+
+def colmap_recon_create_camera_image(
+            image_name,
+            recon_dir,
+            src_images_dir,
+            sensor_name='colmap_sparse',
+            timestamp=0,
+            create_depth_image=False):
+  """Given a COLMAP reconstruction assets directory `recon_dir`, create
+  and return a single `CameraImage` from the COLMAP-computed camera pose
+  (and perhaps the visible COLMAP 3D keypoints only if `create_depth_image`).
+
+  `image_name` must be the (file) name of the image in the COLMAP
+  reconstruction, and `src_images_dir` is the input image directory (which
+  COLMAP typically requires be called "images") given to COLMAP.
+  """
+
+  recon = pycolmap.Reconstruction(recon_dir)
+  
+  # Find the image record
+  iinfo, iid = _find_image_record(image_name, recon, err_msg=f"Could not find {image_name} in {recon_dir}")
+  # iid = -1
+  # for ciid, cinfo in recon.images.items():
+  #   if cinfo.name == image_name:
+  #     iinfo = cinfo
+  #     iid = ciid
+  # assert iinfo is not None, 
+
+  cameras = recon.cameras
+  camera = cameras[iinfo.camera_id]
+  
+  ret = colmap_get_intrinsics(camera)
+  K, h, w, colmap_camera_model, distortion_model, distortion_kv = ret
+
+  extra = {
+    'colmap.image_id': str(iid),
+    'colmap.image_name': image_name,
+    'colmap.camera_params_raw_json': json.dumps(list(camera.params)),
+    'colmap.camera_model_name': colmap_camera_model,
+  }
+
+  R = iinfo.rotation_matrix()
+  T = iinfo.tvec
+  ego_to_sensor = datum.Transform(
+            src_frame='ego',
+            dest_frame=sensor_name)
+  ego_pose = datum.Transform(
+                rotation=R,
+                translation=T,
+                src_frame='world',
+                dest_frame='ego')
+                  # COLMAP provides world-to-camera transforms
+
+  if create_depth_image:
+    ptid_to_info = recon.points3D
+    p2ds = iinfo.get_valid_points2D()
+
+    # FIXME pycolmap `p2ds` segfaults in list comprehensions in python 3.10
+    # xyz_world = []
+    # errors = []
+    # n_visible = []
+    # uv = []
+    # for i in range(len(p2ds)):
+    #   p2d = p2ds[i]
+    #   xyz_world.append(ptid_to_info[p2d.point3D_id].xyz)
+    #   errors.append(ptid_to_info[p2d.point3D_id].error)
+    #   n_visible.append(ptid_to_info[p2d.point3D_id].track.length())
+    #   uv.append(p2d.xy)
+    # xyz_world = np.array(xyz_world)
+    # errors = np.array(errors)
+    # n_visible = np.array(n_visible)
+    # uv = np.array(uv)
+    
+
+    # for i in range(len(p2ds)):
+    #   print(([p2d.point3D_id for p2d in p2ds[:i]], i))
+    # breakpoint()
+    # print([p2d.point3D_id for p2d in p2ds])
+    xyz_world = np.array(
+      [ptid_to_info[p2d.point3D_id].xyz for p2d in p2ds]
+    )
+    
+    xyz_in_camera = (iinfo.rotation_matrix() @ xyz_world.T).T + iinfo.tvec
+    dist = np.linalg.norm(xyz_in_camera, axis=-1)
+    uv = np.array([p2d.xy for p2d in p2ds])
+    errors = np.array(
+      [ptid_to_info[p2d.point3D_id].error for p2d in p2ds]
+    )
+    n_visible = np.array(
+      [ptid_to_info[p2d.point3D_id].track.length() for p2d in p2ds]
+    )
+        
+    # Sometimes COLMAP includes points that are outside the image...
+    # TODO where do these come from? should not be due to distortion
+    idx = np.where(
+        (uv[:, 0] >= 0) &
+        (uv[:, 0] < w) &
+        (uv[:, 1] >= 0) &
+        (uv[:, 1] < h)
+    )
+    dist = dist[idx]
+    errors = errors[idx]
+    n_visible = n_visible[idx]
+    uv = uv[idx]
+
+    uu, vv = uv[:, 0].astype(int), uv[:, 1].astype(int)
+      # TODO: bilinear interpolation ? 
+      # the triangulation is already pretty noisy tho
+
+    dev = np.zeros((h, w, 3), dtype=np.float32)
+    dev[vv, uu, 0] = dist
+    dev[vv, uu, 1] = errors
+    dev[vv, uu, 2] = n_visible
+    channel_names = ['depth', 'colmap_err', 'num_views_visible']
+
+    image_factory = lambda: dev
+
+    dci = datum.CameraImage(
+              sensor_name=sensor_name,
+              image_factory=image_factory,
+              channel_names=channel_names,
+              height=h,
+              width=w,
+              timestamp=timestamp,
+              ego_pose=ego_pose,
+              ego_to_sensor=ego_to_sensor,
+              K=K,
+              distortion_model=distortion_model,
+              distortion_kv=distortion_kv,
+              extra=extra)
+    return dci
+  
+  else:
+
+    image_path = src_images_dir / iinfo.name
+    assert image_path.exists(), image_path
+
+    def _load_image(path):
+      import imageio
+      return imageio.imread(path)
+    image_factory = lambda: _load_image(image_path)
+    channel_names = ['r', 'g', 'b']
+
+    ci = datum.CameraImage(
+                sensor_name=sensor_name,
+                image_factory=image_factory,
+                channel_names=channel_names,
+                height=h,
+                width=w,
+                timestamp=timestamp,
+                ego_pose=ego_pose,
+                ego_to_sensor=ego_to_sensor,
+                K=K,
+                distortion_model=distortion_model,
+                distortion_kv=distortion_kv,
+                extra=extra)
+    return ci
+
+
+def colmap_get_image_name_to_covis_names(recon):
+  """Create and return a dict of `image.name` -> all other `image.name`s with
+  at least one co-visible 3D point (i.e. a matched point).  Returns an
+  undirected graph; covisibility is bijective.
+  """  
+  
+  points3d = recon.points3D
+  image_id_to_name = dict(
+    (image.image_id, image.name) for image in recon.images.values())
+  image_name_to_covis_names = dict(
+    (name, set())
+    for name in image_id_to_name.values())
+  
+  iter_images = tqdm(recon.images.values(), desc="Collect covisible images")
+  for image in iter_images:
+    name = image.name
+        
+    # Get all tracked neighbors... this could be slow...
+    for imp in image.points2D:
+      if imp.has_point3D():
+        ptid = imp.point3D_id
+        p3d = points3d[ptid]
+        for te in p3d.track.elements:
+          track_image_id = te.image_id
+          track_image_name = image_id_to_name[track_image_id]
+          if track_image_name != name:
+            
+            # Ensure the covisibility graph is *undirected*
+            image_name_to_covis_names[name].add(track_image_name)
+            image_name_to_covis_names[track_image_name].add(name)
+
+  # Reformat
+  image_name_to_covis_names = dict(
+    (name, sorted(neighbs))
+    for name, neighbs in image_name_to_covis_names.items())
+  return image_name_to_covis_names
+
+
+def colmap_recon_create_matched_pair(
+        image1_name,
+        image2_name,
+        recon_dir,
+        matcher_name='colmap_sparse',
+        timestamp=0,
+        include_point3d_colors_uint8=True,
+        include_point3d_world_xyz=True,
+        include_point3d_extras=True,
+        img1=None,
+        img2=None,
+        src_images_dir=None,
+        camera_image_kwargs={}):
+  """Given a COLMAP reconstruction assets directory `recon_dir`, extract the
+  matched pair for given image names.  (Note that the image pair might not
+  have any matches; see `colmap_get_image_name_to_covis_names()` to help
+  restrict to only image pairs with covisible points).
+
+  Optionally uses pre-filled `img1` and `img2`, else attempts to load them.
+  """
+
+  assert image1_name != image2_name, image2_name
+
+  matches_colnames = [
+          # Core required
+          'x1', 'y1', 'x2', 'y2'
+  ]
+  if include_point3d_colors_uint8:
+    matches_colnames += ['r', 'g', 'b']
+  if include_point3d_world_xyz:
+    matches_colnames += ['world_x', 'world_y', 'world_z']
+  if include_point3d_extras:
+    matches_colnames += ['error', 'track_length', 'colmap_p3id']
+
+  def _get_matches(
+          recon_dir, image1_name, image2_name,
+          include_point3d_colors_uint8=True,
+          include_point3d_world_xyz=True,
+          include_point3d_extras=True):
+    recon = pycolmap.Reconstruction(recon_dir)
+    ii1nfo, iid1 = _find_image_record(image1_name, recon, 
+                                err_msg=
+                                  f"Could not find {image1_name} (_get_matches) in {recon_dir}")
+    ii2nfo, iid2 = _find_image_record(image2_name, recon, 
+                                err_msg=
+                                  f"Could not find {image2_name} (_get_matches) in {recon_dir}")
+    i1_p3id_to_p2d = dict(
+      (p.point3D_id, p) for p in ii1nfo.points2D if p.has_point3D())
+    i2_p3id_to_p2d = dict(
+      (p.point3D_id, p) for p in ii2nfo.points2D if p.has_point3D())
+    covis_p3ids = set(i1_p3id_to_p2d.keys()) & set(i2_p3id_to_p2d.keys())
+
+    n_cols = (
+      4 + (3 if include_point3d_colors_uint8 else 0) 
+      + (3 if include_point3d_world_xyz else 0) 
+      + (3 if include_point3d_extras else 0)
+    )
+    matches = np.zeros((len(covis_p3ids), n_cols), dtype='float')
+    for i, p3id in enumerate(covis_p3ids):
+      i1_p2d = i1_p3id_to_p2d[p3id]
+      i2_p2d = i2_p3id_to_p2d[p3id]
+      p3d = recon.points3D[p3id]
+      c = 0
+
+      x1 = i1_p2d.x
+      y1 = i1_p2d.y
+      x2 = i2_p2d.x
+      y2 = i2_p2d.y
+      matches[i, c:c+4] = x1, y1, x2, y2
+      c += 4
+
+      if include_point3d_colors_uint8:
+        # Yes it's RGB with colors in [0, 255]
+        # https://github.com/colmap/colmap/blob/a7b50e4d70888cb2c7e5a35fc44a6a1e1f82e69a/src/colmap/scene/point3d.h#L57
+        r = p3d.color[0]
+        g = p3d.color[1]
+        b = p3d.color[2]
+        matches[i, c:c+3] = r, g, b
+        c+=3
+      
+      if include_point3d_world_xyz:
+        wx = p3d.x
+        wy = p3d.y
+        wz = p3d.z
+        matches[i, c:c+3] = wx, wy, wz
+        c+=3
+
+      if include_point3d_extras:
+        error = p3d.error
+        track_length = p3d.track.length()
+        colmap_p3id = p3id
+        matches[i, c:c+3] = error, track_length, colmap_p3id
+        c+=3
+
+    return matches
+
+  extra = {
+    'colmap.image1_name': image1_name,
+    'colmap.image2_name': image2_name,
+  }
+
+  should_fill_images = (img1 is None and img2 is None)
+  if should_fill_images:
+    assert src_images_dir is not None, f"Programming error, need {src_images_dir}"
+
+    img1 = colmap_recon_create_camera_image(
+              image1_name,
+              recon_dir,
+              src_images_dir,
+              **camera_image_kwargs)
+
+    img2 = colmap_recon_create_camera_image(
+              image2_name,
+              recon_dir,
+              src_images_dir,
+              **camera_image_kwargs)
+
+    extra['colmap.image1_id'] = img1.extra['colmap.image_id']
+    extra['colmap.image2_id'] = img2.extra['colmap.image_id']
+  else:
+    recon = pycolmap.Reconstruction(recon_dir)
+    ii1nfo, iid1 = _find_image_record(image1_name, recon, 
+                      err_msg=
+                        f"Could not find {image1_name} (cmp) in {recon_dir}")
+    ii2nfo, iid2 = _find_image_record(image2_name, recon, 
+                      err_msg=
+                        f"Could not find {image2_name} (cmp) in {recon_dir}")
+    extra['colmap.image1_id'] = str(iid1)
+    extra['colmap.image2_id'] = str(iid2)
+
+  extra.update({
+    'colmap.image1_name': image1_name,
+    'colmap.image2_name': image2_name,
+  })
+
+  mp = datum.MatchedPair(
+        matcher_name=matcher_name,
+        timestamp=timestamp,
+        img1=img1,
+        img2=img2,
+        matches_factory=lambda: _get_matches(
+          recon_dir, image1_name, image2_name,
+          include_point3d_colors_uint8=include_point3d_colors_uint8,
+          include_point3d_world_xyz=include_point3d_world_xyz,
+          include_point3d_extras=include_point3d_extras),
+        matches_colnames=matches_colnames,
+        extra=extra,
+  )
+  return mp
+
+
+def load_array(path):
+  """Listed as a package-level function to improve clarity / portability."""
+  import numpy as np
+  return np.load(path)
+
+
+class COLMAP_SDTFactory(StampedDatumTableFactory):
+  """This `StampedDatumTableFactory` helps convert a single COLMAP 
+  reconstruction into a single `StampedDatumTable` one-to-one.  While
+  most `StampedDatumTableFactory` classes help transform multiple segments,
+  This factory is agnostic to how the user stores multile COLMAP scene
+  reconstructions and simply helps map between single scenes.
+
+  For simple use with single scenes:
+   * If your input is a PSegs segment, use
+     `create_input_images_and_psegs_assets_for_colmap()` to prepare your
+     data for input to COLMAP.
+   # Run COLMAP as desired.
+   * Use `create_sd_table_for_reconstruction()` to read back the COLMAP
+     reconstruction as a PSegs segment.
+
+  To use en masse (e.g. multiple scenes and multiple factories):
+   * Subclass and configure the `*_DIR` members below for a *single*
+     COLMAP reconstruction.  If your input is a PSegs segment, you should run
+     `create_imgpath_to_uri_and_images()` before running COLMAP for each scene
+     to save a PSegs uri <-> COLMAP image name mapping on disk.
+  """
+
+  COLMAP_RECON_DIR = Path('my_colmap_sparse')
+  COLMAP_IMAGES_DIR = Path('my_colmap_images')
+  PSEGS_ASSETS_DIR = Path('my_psegs_assets')
+
+  INCLUDE_DEPTH_IMAGES = True
+  INCLUDE_WORLD_CLOUD = True
+  INCLUDE_MATCHED_PAIRS = True
+  USE_NP_CACHED_ASSETS = True
+
+  MP_MAX_PAIRS = -1
+  MP_MAX_PAIRS_SEED = 1337
+  MP_INCLUDE_POINT3D_COLORS_UINT8 = True
+  MP_INCLUDE_POINT3D_WORLD_XYS = True
+  MP_INCLUDE_POINT3D_EXTRAS = True
+  MP_INCLUDE_CAMERA_IMAGES = True
+
+  CI_RECON_TOPIC_SUFFIX = '|colmap_sparse'
+  DCI_RECON_TOPIC_SUFFIX = '|depth'
+  WORLD_CLOUD_TOPIC = 'fused_world_cloud|colmap_sparse'
+  MP_TOPIC_SUFFIX = '|matches'
+
+
+  ## Support
+
+  @classmethod
+  def get_input_image_paths(cls):
+    return sorted(
+      pp for pp in cls.COLMAP_IMAGES_DIR.iterdir() if not pp.is_dir()
+    )
+
+  @classmethod
+  def psegs_imgpath_to_uri_path(cls):
+    return cls.PSEGS_ASSETS_DIR / 'psegs_imgpath_to_uri.json'
+
+  @classmethod
+  def psegs_npy_cache_dir(cls):
+    return cls.PSEGS_ASSETS_DIR / 'npy_cached'
+
+  @classmethod
+  def create_imgpath_to_uri_and_images(
+          cls,
+          sd_table,
+          only_topics=None,
+          resize_image_max_height=-1,
+          spark=None):
+    
+    # Select the datums to export
+    datum_rdd = sd_table.get_datum_rdd_matching(
+                    only_types=['camera_image'],
+                    only_topics=only_topics)
+    
+    # Try to favor fewer, longer-lived python processes
+    from oarphpy.spark import cluster_cpu_count
+    from psegs.spark import Spark
+    with Spark.sess(spark) as spark:
+      n_cpus = cluster_cpu_count(spark)
+    datum_rdd = datum_rdd.repartition(n_cpus).cache()
+
+    util.log.info(f"Selected {datum_rdd.count()} input images ...")
+
+    cls.COLMAP_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
+    def save_image(stamped_datum):
+      import imageio
+      ci = stamped_datum.camera_image
+      if not ci.has_rgb(): # TODO: can we support grey?
+        return False
+      fname = (
+        str(stamped_datum.uri.topic) + "." + 
+        str(stamped_datum.uri.timestamp) + ".png")
+      dest = cls.COLMAP_IMAGES_DIR / fname
+      image = ci.image
+
+      h, w = image.shape[:2]
+      if (resize_image_max_height >= 0 and h > resize_image_max_height):
+        scale = float(resize_image_max_height) / h
+        th = int(scale * h)
+        tw = int(scale * w)
+        image = cv2.resize(image, (tw, th))
+
+      imageio.imsave(dest, image)
+      return str(stamped_datum.uri), str(dest)
+    
+    uri_paths = datum_rdd.map(save_image).filter(lambda x: x).collect()
+    util.log.info(f"... saved {len(uri_paths)} input images ...")
+    data_path = cls.psegs_imgpath_to_uri_path()
+    with open(data_path, 'w') as f:
+      json.dump(uri_paths, f, indent=2)
+    util.log.info(f"... saved PSegs uri<->image mapping to {data_path}")
+  
+  @classmethod
+  def get_imgpath_to_uri(cls):
+    data_path = cls.psegs_imgpath_to_uri_path()
+    if data_path.exists():
+      with open(data_path) as f:
+        uri_paths = json.load(f)
+      return dict(
+        (p, datum.URI.from_str(suri))
+        for suri, p in sorted(uri_paths))
+    else:
+      # Create an anonymous segment if no prior PSegs data available
+      base_uri = datum.URI(
+                  dataset='anon',
+                  split='anon',
+                  segment_id='anon_colmap_recon')
+      return dict(
+        (path,
+         base_uri.replaced(
+           timestamp=int(1e9 * t),
+           topic='camera|input'))
+        for t, path in enumerate(cls.get_input_image_paths()))
+
+  @classmethod
+  def get_segment_uri(cls):
+    """COLMAP Reconstructions act on only single segments"""
+    p_to_uri = cls.get_imgpath_to_uri()
+    if p_to_uri:
+      for uri in p_to_uri.values():
+        return uri.to_segment_uri()
+    else:
+      return None
+  
+  @classmethod
+  def get_colmap_recon_uris(cls):
+    if not cls.COLMAP_RECON_DIR.exists():
+      return []
+
+    imgpath_to_uri = cls.get_imgpath_to_uri()
+    
+    # Find registered images
+    recon = pycolmap.Reconstruction(cls.COLMAP_RECON_DIR)
+    registered_image_names = set(iinfo.name for iinfo in recon.images.values())
+
+    all_uris = []
+    _ci_uris = []
+    for imgpath, input_uri in imgpath_to_uri.items():
+      imgpath = Path(imgpath)
+      if imgpath.name in registered_image_names:
+        ci_uri = input_uri.replaced(
+            topic=input_uri.topic + cls.CI_RECON_TOPIC_SUFFIX)
+        ci_uri.extra['colmap.image_name'] = imgpath.name
+        ci_uri.extra['colmap.input_uri'] = input_uri.to_urlsafe_str()
+        all_uris.append(ci_uri)
+        _ci_uris.append(ci_uri)
+        if cls.INCLUDE_DEPTH_IMAGES:
+          dci_uri = ci_uri.replaced(
+            topic=ci_uri.topic + cls.DCI_RECON_TOPIC_SUFFIX)
+          all_uris.append(dci_uri)
+
+    if cls.INCLUDE_MATCHED_PAIRS:
+      ci_image_name_to_uri = dict(
+        (ci_uri.extra['colmap.image_name'], ci_uri) for ci_uri in _ci_uris)
+      image_name_to_covis_names = colmap_get_image_name_to_covis_names(recon)
+      # Build only one matched pair per distinct pair of images
+      image_pair_names = set(itertools.chain.from_iterable(
+        ((image_name1, in2) for in2 in image_name2s)
+        for image_name1, image_name2s in image_name_to_covis_names.items()))
+      if cls.MP_MAX_PAIRS > 0 and len(image_pair_names) > cls.MP_MAX_PAIRS:
+        import random
+        util.log.info(
+          "Sub-sampling matched pairs %s -> %s" % (
+            len(image_pair_names), cls.MP_MAX_PAIRS))
+        r = random.Random(cls.MP_MAX_PAIRS_SEED)
+        image_pair_names = r.sample(sorted(image_pair_names), cls.MP_MAX_PAIRS)
+      for im1_name, im2_name in image_pair_names:
+        ci1_uri = ci_image_name_to_uri[im1_name]
+        ci2_uri = ci_image_name_to_uri[im2_name]
+
+        c1_topic_base = ci1_uri.topic.replace(cls.CI_RECON_TOPIC_SUFFIX, '')
+        c2_topic_base = ci2_uri.topic.replace(cls.CI_RECON_TOPIC_SUFFIX, '')
+        mp_topic_base = '|'.join(sorted((c1_topic_base, c2_topic_base)))
+
+        mp_uri = copy.deepcopy(ci1_uri)
+        mp_uri = mp_uri.replaced(
+            timestamp=int(0.5 * abs(ci1_uri.timestamp + ci2_uri.timestamp)),
+            topic=(
+              # E.g. 'camera-input|camera-input|colmap_sparse|matches'
+              mp_topic_base + cls.CI_RECON_TOPIC_SUFFIX + cls.MP_TOPIC_SUFFIX
+              ))
+        mp_uri.extra['colmap.image1_name'] = im1_name
+        mp_uri.extra['colmap.image2_name'] = im2_name
+        mp_uri.extra['colmap.image1_uri'] = ci1_uri.to_urlsafe_str()
+        mp_uri.extra['colmap.image2_uri'] = ci2_uri.to_urlsafe_str()
+
+        all_uris.append(mp_uri)
+
+    if cls.INCLUDE_WORLD_CLOUD:
+      seg_uri = cls.get_segment_uri()
+      if seg_uri:
+        all_uris.append(
+          seg_uri.replaced(topic=cls.WORLD_CLOUD_TOPIC))
+    
+    return all_uris
+  
+  @classmethod
+  def _create_point_cloud(cls, uri):
+    from oarphpy.spark import CloudpickeledCallable
+
+    pc = colmap_recon_create_world_cloud(
+            cls.COLMAP_RECON_DIR,
+            sensor_name='colmap_sparse')
+
+    if cls.USE_NP_CACHED_ASSETS:
+      cloud_npy_fname = f"{uri.topic}_{uri.timestamp}_cloud.npy"
+      cloud_npy_path = cls.psegs_npy_cache_dir() / cloud_npy_fname
+      if not cloud_npy_path.exists():
+        with open(cloud_npy_path, 'wb') as f:
+          np.save(f, pc.get_cloud()) # Compression doesn't help
+        pc.cloud = None
+      pc.cloud_factory = CloudpickeledCallable(
+        lambda: load_array(cloud_npy_path))
+ 
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+
+  @classmethod
+  def _create_camera_image(cls, uri):
+    ci = colmap_recon_create_camera_image(
+              uri.extra['colmap.image_name'],
+              cls.COLMAP_RECON_DIR,
+              cls.COLMAP_IMAGES_DIR,
+              sensor_name=uri.topic,
+              timestamp=uri.timestamp,
+              create_depth_image=False)
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+
+  @classmethod
+  def _create_depth_camera_image(cls, uri):
+    from oarphpy.spark import CloudpickeledCallable
+
+    dci = colmap_recon_create_camera_image(
+              uri.extra['colmap.image_name'],
+              cls.COLMAP_RECON_DIR,
+              cls.COLMAP_IMAGES_DIR,
+              sensor_name=uri.topic,
+              timestamp=uri.timestamp,
+              create_depth_image=True)
+
+    ci_uri = copy.deepcopy(uri)
+    ci_uri.topic = ci_uri.topic.replace(cls.DCI_RECON_TOPIC_SUFFIX, '')
+    dci.extra['psegs.depth.rgb_uri'] = str(ci_uri)
+
+    if cls.USE_NP_CACHED_ASSETS:
+      # Make sure the filename is definitely distinct
+      colmap_image_id = dci.extra['colmap.image_id']
+      depth_npy_fname = (
+        f"{uri.topic}_{uri.timestamp}_image_id.{colmap_image_id}_depth.npy")
+      depth_npy_path = cls.psegs_npy_cache_dir() / depth_npy_fname
+      if not depth_npy_path.exists():
+        with open(depth_npy_path, 'wb') as f:
+          np.savez_compressed(f, image=dci.image)
+            # Lots of zeros; compression helps
+      dci.image_factory = CloudpickeledCallable(
+        lambda: load_array(depth_npy_path)['image'])
+      
+    return datum.StampedDatum(uri=uri, camera_image=dci)
+
+  @classmethod
+  def _create_matched_pair(cls, uri):
+    from oarphpy.spark import CloudpickeledCallable
+
+    img1, img2 = None, None
+    if cls.MP_INCLUDE_CAMERA_IMAGES:
+      ci1_uri = datum.URI.from_str(uri.extra['colmap.image1_uri'])
+      sd1 = cls._create_camera_image(ci1_uri)
+      img1 = sd1.camera_image
+
+      ci2_uri = datum.URI.from_str(uri.extra['colmap.image2_uri'])
+      sd2 = cls._create_camera_image(ci2_uri)
+      img2 = sd2.camera_image
+
+    mp = colmap_recon_create_matched_pair(
+      uri.extra['colmap.image1_name'],
+      uri.extra['colmap.image2_name'],
+      cls.COLMAP_RECON_DIR,
+      matcher_name=uri.topic,
+      timestamp=uri.timestamp,
+      include_point3d_colors_uint8=cls.MP_INCLUDE_POINT3D_COLORS_UINT8,
+      include_point3d_world_xyz=cls.MP_INCLUDE_POINT3D_WORLD_XYS,
+      include_point3d_extras=cls.MP_INCLUDE_POINT3D_EXTRAS,
+      img1=img1,
+      img2=img2,
+    )
+    mp.extra['colmap.image1_uri'] = uri.extra['colmap.image1_uri']
+    mp.extra['colmap.image2_uri'] = uri.extra['colmap.image2_uri']
+
+    if cls.USE_NP_CACHED_ASSETS:
+      # Make sure the filename is definitely distinct
+      iid1, iid2 = mp.extra['colmap.image1_id'], mp.extra['colmap.image2_id']
+      matches_npy_fname = (
+        f"{uri.topic}_{uri.timestamp}_iid1.{iid1}_iid2.{iid2}_matches.npy")
+      matches_npy_path = cls.psegs_npy_cache_dir() / matches_npy_fname
+      if not matches_npy_path.exists():
+        with open(matches_npy_path, 'wb') as f:
+          np.savez_compressed(f, matches=mp.get_matches())
+            # Use compression since we use it for other npy assets
+      mp.matches_factory = CloudpickeledCallable(
+        lambda: load_array(matches_npy_path)['matches'])
+
+    return datum.StampedDatum(uri=uri, matched_pair=mp)
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    if uri.topic == cls.WORLD_CLOUD_TOPIC:
+      return cls._create_point_cloud(uri)
+    elif uri.topic.endswith(cls.DCI_RECON_TOPIC_SUFFIX):
+      return cls._create_depth_camera_image(uri)
+    elif uri.topic.endswith(cls.CI_RECON_TOPIC_SUFFIX):
+      return cls._create_camera_image(uri)
+    elif uri.topic.endswith(cls.MP_TOPIC_SUFFIX):
+      return cls._create_matched_pair(uri)
+    else:
+      raise ValueError(f"Don't know what to do with {uri}")
+
+  @classmethod
+  def create_input_images_and_psegs_assets_for_colmap(
+        cls,
+        sd_table,
+        colmap_input_images_dir,
+        psegs_assets_dir):
+    
+    class MyCOLMAP_SDTFactory(cls):
+      # COLMAP_RECON_DIR not needed
+      COLMAP_IMAGES_DIR = Path(colmap_input_images_dir)
+      PSEGS_ASSETS_DIR = Path(psegs_assets_dir)
+
+    MyCOLMAP_SDTFactory.create_imgpath_to_uri_and_images(sd_table)
+
+  @classmethod
+  def get_reconstruction_sd_table(cls, spark=None):
+    seg_uri = cls.get_segment_uri()
+    if not seg_uri:
+      return None
+    return cls.get_segment_sd_table(segment_uri=seg_uri, spark=spark)
+
+  @classmethod
+  def create_sd_table_for_reconstruction(
+        cls,
+        colmap_recon_dir,
+        colmap_input_images_dir,
+        psegs_assets_dir,
+        spark=None):
+    
+    class MyCOLMAP_SDTFactory(cls):
+      COLMAP_RECON_DIR = Path(colmap_recon_dir)
+      COLMAP_IMAGES_DIR = Path(colmap_input_images_dir)
+      PSEGS_ASSETS_DIR = Path(psegs_assets_dir)
+    
+    return MyCOLMAP_SDTFactory.get_reconstruction_sd_table(spark=spark)
+      
+
+  ## StampedDatumTableFactory Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    suri = cls.get_segment_uri()
+    if suri is not None:
+      return [suri]
+    else:
+      return []
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    from oarphpy import util as oputil
+    from psegs import util
+
+    if existing_uri_df is not None:
+      util.log.info(
+        f"Note: resume mode unsupported, got existing_uri_df {existing_uri_df}")
+    if only_segments is not None:
+      has_match = any(
+              suri.soft_matches_segment_of(cls.get_segment_uri())
+              for suri in only_segments)
+      if not has_match:
+        return []
+
+    # Generate URIs ...
+    colmap_uris = cls.get_colmap_recon_uris()
+    uri_rdd = spark.sparkContext.parallelize(colmap_uris)
+    util.log.info(f"Creating datums for {len(colmap_uris)} URIs ...")
+
+    if cls.USE_NP_CACHED_ASSETS:
+      util.log.info(
+        f"... using numpy asset cache at {cls.psegs_npy_cache_dir()} ...")
+      print('todo use psegs disk cache')
+      print('todo use psegs disk cache')
+      print('todo use psegs disk cache')
+      if not cls.psegs_npy_cache_dir().exists():
+        util.log.info("... will populate numpy asset cache on read ...")
+        # Initial cache population may be memory intensive
+        uri_rdd = uri_rdd.repartition(uri_rdd.count())
+        cls.psegs_npy_cache_dir().mkdir(parents=True, exist_ok=True)
+
+    datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+
+    return [datum_rdd]
diff --git a/psegs/datasets/idsutil.py b/psegs/datasets/idsutil.py
new file mode 100644
index 0000000..0bcbda5
--- /dev/null
+++ b/psegs/datasets/idsutil.py
@@ -0,0 +1,70 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import textwrap
+import rich
+
+
+def is_ipython():
+  try:
+    from IPython import get_ipython
+    if 'IPKernelApp' not in get_ipython().config:
+      return False
+  except Exception:
+      return False
+  return True
+
+
+class IDatasetUtil(object):
+
+  @classmethod
+  def emplace(cls):
+    """Emplacing a dataset means downloading it any any other dependent
+    fixtures, including test fixtures (see e.g. PSegs Extensions in the root
+    README).  This method attempts to do that initial set-up for a given 
+    dataset.
+    
+    In many cases, dataset availability and licensing will require manual
+    effort from the user.  This method will explain necessary action (via
+    the terminal or notebook) and attempt to help interactively.
+
+    This method should be re-entrant (multiple attempts to emplace should be
+    safe) and will return True only when all emplacing has suceeded.
+    """
+    return False
+
+  @classmethod
+  def test(cls):
+    return False
+
+  @classmethod
+  def build_table(cls):
+    return False
+
+  @classmethod
+  def show_md(cls, txt):
+    txt = textwrap.dedent(txt)
+
+    if is_ipython():
+      from IPython.display import display
+      from IPython.display import Markdown as IPython_Markdown
+      display(IPython_Markdown(txt))
+    else:
+      from rich.console import Console
+      from rich.markdown import Markdown as rich_Markdown
+      c = Console()
+      c.print()
+      c.print(rich_Markdown(txt), soft_wrap=True, width=80)
+      c.print()
+
diff --git a/psegs/datasets/ios_lidar.py b/psegs/datasets/ios_lidar.py
new file mode 100644
index 0000000..5471f64
--- /dev/null
+++ b/psegs/datasets/ios_lidar.py
@@ -0,0 +1,977 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+## 3DScannerApp Data Parsing
+# The utilities below help parse "raw" data exported from 3DScannerApp:
+# https://www.3dscannerapp.com/
+#
+# To create data:
+#  * Download the app and do a scan
+#  * Share your scan as "All Data" or connect your device to a computer and
+#      download the files over USB
+#  * The downloaded directory has files like:
+#      frame_XXXXX.json - pose of the device at frame N
+#      frame_XXXXX.jpg  - image captured at frame N
+#      info.json - has GPS data and other context
+#      export.obj - raw fused mesh (unclear if this is from 
+#         Apple's fusion or not)
+# 
+#    If the capture was recorded in "low res" mode, the directory additionally
+#    has files like:
+#      depth_XXXXX.png - raw depth info as a 16-bit png (depth in millimeters)
+#      conf_XXXXX.png - sensor confidence for depth (Apple's ARConfidenceLevel)
+#
+# Parsing code references:
+#  * ARBodyPoseRecorder from the developer of 3DScannerApp:
+#     https://github.com/laanlabs/ARBodyPoseRecorder/blob/9e7a37cdfdb44bc223f7b983481841696a763782/ARBodyPoseRecorder/ViewController.swift#L233
+#  * rtabmap ( http://introlab.github.io/rtabmap/ ) code that appears to
+#     parse 3DScannerApp output:
+#     https://docs.ros.org/en/api/rtabmap/html/CameraImages_8cpp_source.html
+
+import copy
+import json
+import os
+from pathlib import Path
+
+import numpy as np
+from tqdm.auto import tqdm
+
+from psegs import datum
+from psegs import util
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+"""
+Apple camera frame:
+ +x is right
+"""
+
+
+def threeDScannerApp_get_ego_pose(json_data):
+  # The device ego pose is in a GPS-based coordinate frame where:
+  #  +y is "up" based upon *gravity*
+  #  +x is GPS East
+  #  +z is GPS South
+  # https://developer.apple.com/documentation/arkit/arconfiguration/worldalignment/gravityandheading
+  T_raw = json_data['cameraPoseARFrame'] # A row-major 4x4 matrix
+  T_arr_raw = np.array(T_raw).reshape([4, 4])
+
+  # Based upon rtabmap noted above
+  # Rotate from OpenGL coordinate frame to 
+  # +x = forward, +y = left, +z = up
+  OPENGTL_T_WORLD = np.array([
+    [ 0, -1,  0,  0],
+    [ 0,  0,  1,  0],
+    [-1,  0,  0,  0],
+    [ 0,  0,  0,  1],
+  ])
+
+  WORLD_T_OPENGL = np.array([
+    [ 0,  0, -1,  0],
+    [-1,  0,  0,  0],
+    [ 0,  1,  0,  0],
+    [ 0,  0,  0,  1],
+  ])
+
+  WORLD_T_PSEGS = np.array([
+    [ 0,  0, -1,  0],
+    [-1,  0,  0,  0],
+    [ 0,  1,  0,  0],
+    [ 0,  0,  0,  1],
+  ])
+
+  # pose = T_arr_raw @ WORLD_T_PSEGS
+  # pose = WORLD_T_OPENGL * T_arr_raw[:3, :4]
+  pose = T_arr_raw[:3, :4]
+  # assert False, (T_arr_raw, pose)
+  return datum.Transform.from_transformation_matrix(
+            pose,
+            src_frame='ego',
+            dest_frame='world')
+  
+
+def threeDScannerApp_get_K(json_data):
+  K_raw = json_data['intrinsics']
+  f_x = K_raw[0]
+  f_y = K_raw[4]
+  c_x = K_raw[2]
+  c_y = K_raw[5]
+
+  K = np.array([
+        [f_x,   0, c_x],
+        [0,   f_y, c_y],
+        [0,     0,   1],
+  ])
+
+  return K
+
+
+def threeDScannerApp_frame_id_from_fname(path):
+    path = os.path.basename(path)
+    prefix = path.split('.')[0]
+    toks = prefix.split('_')
+    assert len(toks) == 2, toks
+    frame_id = toks[-1]
+    return frame_id
+
+
+def threeDScannerApp_create_frame_to_timestamp(scene_dir):
+  """Unfortunately, 3D Scanner App only provides 'timestamps' in the form of
+  CACurrentMediaTime (which is mach_absolute_time or *system uptime*).
+  We want more accurate unix nanostamps in order to:
+    (1) be consistent with the rest of PSegs
+    (2) join 3D Scanner App data with other data sources / sensors external
+          to the iPhone
+  
+  So can we resolve nanostamps?
+  3D Scanner App does name scenes (by default) using a second-resolution 
+  timestamp, but that's in the local timezone, and the name (i.e. folder name)
+  is not hard to change / break.
+
+  It appears that the very first image recorded (i.e. frame_00000.jpg) has
+  the same unix mtime as the timestamp used for the default scene name.  
+  Unfortunately, this timstamp only has 1-second resolution, but without
+  further information, we assume that the mtime of frame_00000.jpg
+  corresponds to the uptime recorded in frame_00000.json (the info / pose)
+  file for the 0th frame).
+  
+  One caveat: the mtimes of the image files can sometimes change if the files
+  are copied or moved between hosts / filesystems.  (Note that the zip file
+  export of 3D Scanner App appears to preserve timestamps).  To preserve
+  timestamps in a json file, we run this command in the root of any
+  scene directory:
+
+  python -c "import os; import json; print(json.dumps(dict((p, os.path.getmtime(p)) for p in os.listdir('.')), indent=2))" > psegs_mtime.json 
+
+  This helper creates and returns a map of frame id -> nanostamp, including
+  for frames that do not have images (e.g. in low-res capture mode).
+
+  """
+
+  import os
+  import json
+  from oarphpy import util as oputil
+
+  scene_dir = Path(scene_dir)
+
+  base_stamp = None
+  psegs_mtimes_path = scene_dir / 'psegs_mtime.json'
+  if psegs_mtimes_path.exists():
+    with open(psegs_mtimes_path, 'r') as f:
+      psegs_mtimes = json.load(f)
+    
+    if 'frame_00000.jpg' in psegs_mtimes:
+      base_stamp = psegs_mtimes['frame_00000.jpg']
+
+  if base_stamp is None:
+    frame_0_img_path = scene_dir / 'frame_00000.jpg'
+    if not frame_0_img_path.exists():
+      return {}
+    base_stamp = os.path.getmtime(frame_0_img_path)
+
+  frame_0_info_path = scene_dir / 'frame_00000.json'
+  if not frame_0_info_path.exists():
+    return {}
+  base_nanostamp = int(1e9 * base_stamp)
+
+  with open(frame_0_info_path, 'r') as f:
+    info = json.load(f)
+  base_CACurrentMediaTime = info['time']
+    # CACurrentMediaTime, which is mach_absolute_time, which is
+    # *system uptime*. Has microsecond resolution, due to being provided
+    # here as a time in seconds (float format)?
+  
+  # Now infer timestamps for all frames
+  frame_info_paths = oputil.all_files_recursive(
+                        str(scene_dir), pattern='frame_*.json')
+  frame_info_paths.sort()
+  
+  frame_id_to_nanostamp = {}
+  for path in frame_info_paths:
+    frame_id = threeDScannerApp_frame_id_from_fname(path)
+    
+    with open(path, 'r') as f:
+      info = json.load(f)
+    frame_CACurrentMediaTime = info['time']
+    frame_offset_sec = frame_CACurrentMediaTime - base_CACurrentMediaTime
+    frame_nanostamp = int(1e9 * frame_offset_sec) + base_nanostamp
+      
+    frame_id_to_nanostamp[frame_id] = frame_nanostamp
+  return frame_id_to_nanostamp
+
+
+def threeDScannerApp_create_camera_image(
+        frame_json_path,
+        sensor_name='camera|front',
+        timestamp=None):
+
+  frame_json_path = str(frame_json_path)
+  assert os.path.exists(frame_json_path), frame_json_path
+
+  scan_dir = Path(os.path.dirname(frame_json_path))
+  frame_id = threeDScannerApp_frame_id_from_fname(frame_json_path)
+
+  with open(frame_json_path, 'r') as f:
+    json_data = json.load(f)
+  
+  ego_pose = threeDScannerApp_get_ego_pose(json_data)
+  K = threeDScannerApp_get_K(json_data)
+
+  if timestamp is None:
+    timestamp = int(json_data['time'] * 1e9)
+      # CACurrentMediaTime, which is mach_absolute_time, which is
+      # *system uptime*.  We use this as a fallback unless the caller
+      # has resolved timestamps for the whole scene.
+
+  REQUIRED_KEYS = (
+    'averageAngularVelocity',
+    'averageVelocity',
+    'exposureDuration',
+    'frame_index',
+  )
+  SKIP_KEYS = (
+    'cameraPoseARFrame',
+    'intrinsics'
+    'time',
+  )
+  
+  extra = dict(
+            ('threeDScannerApp.' + k, json.dumps(v))
+            for k, v in json_data.items()
+            if k not in SKIP_KEYS)
+  assert set(REQUIRED_KEYS) - set(json_data.keys()) == set(), \
+    "Have %s wanted %s" % (extra.keys(), REQUIRED_KEYS)
+
+  extra['threeDScannerApp.frame_json_name'] = os.path.basename(frame_json_path)
+  extra['threeDScannerApp.frame_id'] = str(frame_id)
+  extra['threeDScannerApp.scan_dir'] = str(os.path.basename(scan_dir))
+
+  # WORLD_T_PSEGS = np.array([
+  #   [ 0,  0, -1,  0],
+  #   [-1,  0,  0,  0],
+  #   [ 0,  1,  0,  0],
+  #   [ 0,  0,  0,  1],
+  # ])
+
+  # PSEGS_T_IOS_CAM = np.array([
+  #   [ 0, -1,  0,  0],
+  #   [ 0,  0,  1,  0],
+  #   [-1,  0,  0,  0],
+  #   [ 0,  0,  0,  1],
+  # ])
+
+
+  # https://docs.ros.org/en/api/rtabmap/html/classrtabmap_1_1CameraModel.html#a0853af9d0117565311da4ffc3965f8d2
+  # https://developer.apple.com/documentation/arkit/arcamera/2866108-transform
+  #   Apple camera frame is:
+  #     +x is right when device is in lanscape; along the device long edge
+  #     +y is up when device is in landscape
+  #     +z is out of the device screen
+  ego_to_sensor = datum.Transform(
+            rotation=np.array([
+              # [ 0,  0,  1],
+              # [-1,  0,  0],
+              # [ 0, -1,  0],
+              # [ 0,   0,  -1],
+              # [-1,   0,   0],
+              # [ 0,  -1,   0],
+              
+              # [ 0,  -1,   0],
+              # [ 0,   0,   1],
+              # [-1,   0,   0],
+              [ 1.,   0.,   0.],
+              [ 0.,  -1.,   0.],
+              [ 0.,   0.,  -1.],
+            ]),
+            src_frame=sensor_name,
+            dest_frame='ego')
+  
+  if 'depth' in sensor_name:
+  
+    depth_path = scan_dir / f'depth_{frame_id}.png'
+    assert os.path.exists(depth_path), depth_path
+  
+    conf_path = scan_dir / f'conf_{frame_id}.png'
+    assert os.path.exists(conf_path), conf_path
+
+    # Get dimensions from the conf image, which is a smaller file
+    from psegs.util import misc
+    with open(conf_path, 'rb') as f:
+      w, h = misc.get_png_wh(f.read(1024))
+    
+    # The intrinsics are for the RGB camera, which has a bigger image sensor.
+    # We need to know the size of that image in order to adjust the
+    # intrinsics for the depth sensor
+    rbg_path = scan_dir / f'frame_00000.jpg'
+    assert os.path.exists(rbg_path), rbg_path
+    
+    from oarphpy import util as oputil
+    with open(rbg_path, 'rb') as f:
+      rgb_w, rgb_h = oputil.get_jpeg_size(f.read(1024))
+  
+    scale_x = float(rgb_w) / w
+    scale_y = float(rgb_h) / h
+    K[0, 0] /= scale_x
+    K[0, 2] /= scale_x
+    K[1, 1] /= scale_y
+    K[1, 2] /= scale_y
+
+    def _get_depth_conf_image(depth_path, conf_path):
+      import imageio
+      import numpy as np
+      depth = imageio.imread(depth_path)
+      
+      # millimeters -> meters
+      depth = depth.astype(np.float32) * .001
+      depth = depth.reshape([depth.shape[0], depth.shape[1], 1])
+
+      conf = imageio.imread(conf_path)
+      conf = conf.reshape([conf.shape[0], conf.shape[1], 1])
+      depth_image = np.concatenate([depth, conf], axis=2)
+      return depth_image
+    
+    image_factory = lambda: _get_depth_conf_image(depth_path, conf_path)
+    channel_names = ['depth', 'confidence']
+
+    extra['threeDScannerApp.depth_path'] = os.path.basename(depth_path)
+    extra['threeDScannerApp.conf_path'] = os.path.basename(conf_path)
+
+  else:
+    frame_img_path = frame_json_path.replace('.json', '.jpg')
+    assert os.path.exists(frame_img_path), frame_img_path
+
+    from oarphpy import util as oputil
+    with open(frame_img_path, 'rb') as f:
+      w, h = oputil.get_jpeg_size(f.read(1024))
+
+    def _load_image(path):
+      import imageio
+      return imageio.imread(path)
+    image_factory = lambda: _load_image(frame_img_path)
+    channel_names = ['r', 'g', 'b']
+
+    extra['threeDScannerApp.img_path'] = os.path.basename(frame_img_path)
+
+  ci = datum.CameraImage(
+                sensor_name=sensor_name,
+                image_factory=image_factory,
+                channel_names=channel_names,
+                height=h,
+                width=w,
+                timestamp=timestamp,
+                ego_pose=ego_pose,
+                ego_to_sensor=ego_to_sensor,
+                K=K,
+                extra=extra)
+
+  return ci
+
+
+def threeDScannerApp_create_point_cloud_from_mesh(
+        mesh_path,
+        sensor_name='lidar|mesh'):
+  
+  assert os.path.exists(mesh_path), mesh_path
+
+  scan_dir = Path(os.path.dirname(mesh_path))
+
+  extra = {
+    'threeDScannerApp.mesh_path': os.path.basename(mesh_path),
+    'threeDScannerApp.scan_dir': os.path.basename(scan_dir),
+  }
+
+  # The meshes are in the world frame; provide identity transform(s)
+  ego_to_sensor = datum.Transform(
+            src_frame='ego',
+            dest_frame=sensor_name)
+  ego_pose = datum.Transform(src_frame='ego', dest_frame='world')
+  
+  def _get_cloud(mesh_path):
+    import open3d as o3d
+    import numpy as np
+    mesh = o3d.io.read_triangle_mesh(str(mesh_path))
+    xyz = np.asarray(mesh.vertices)
+    return xyz
+  cloud_factory = lambda: _get_cloud(mesh_path)
+
+  pc = datum.PointCloud(
+          sensor_name=sensor_name,
+          cloud_factory=cloud_factory,
+          ego_to_sensor=ego_to_sensor,
+          ego_pose=ego_pose,
+          extra=extra)
+  return pc
+
+
+def threeDScannerApp_get_segment_id(scan_dir='', info_path=''):
+  if not info_path:
+    info_path = str(Path(scan_dir) / 'info.json')
+  seg_dir = os.path.dirname(info_path)
+  if info_path:
+    with open(info_path, 'r') as f:
+      info = json.load(f)
+    segment_id = info.get('title', os.path.split(seg_dir)[-1])
+  else:
+    segment_id = seg_dir
+  return segment_id
+
+
+def threeDScannerApp_get_uris_from_scan_dir(scan_dir):
+  from oarphpy import util as oputil
+
+  uris = []
+  segment_id = threeDScannerApp_get_segment_id(scan_dir=scan_dir)
+
+  frame_to_t = threeDScannerApp_create_frame_to_timestamp(scan_dir)
+
+  mesh_paths = oputil.all_files_recursive(scan_dir, pattern='*.obj')
+  for mesh_path in mesh_paths:
+    start_t = min(frame_to_t.values())
+    frame_id = min(frame_to_t.keys())
+    fname = os.path.basename(mesh_path)
+    mesh_uri = datum.URI(
+                  segment_id=segment_id,
+                  topic='lidar|mesh|' + fname.replace('.obj', ''),
+                  timestamp=start_t,
+                  extra={
+                    'threeDScannerApp.scan_dir': scan_dir,
+                    'threeDScannerApp.frame_id': frame_id,
+                    'threeDScannerApp.mesh_path': fname,
+                  })
+    uris.append(mesh_uri)
+
+  # Sometimes the frame json info data gets lost.  Without that data,
+  # we can't deduce timestamps nor transforms.  So just ignore dropped
+  # frames.
+  finfo_paths = oputil.all_files_recursive(scan_dir, pattern='frame*.json')
+  for finfo_path in finfo_paths:
+    frame_id = threeDScannerApp_frame_id_from_fname(finfo_path)
+    t = frame_to_t[frame_id]
+
+    xform_uri = datum.URI(
+                  segment_id=segment_id,
+                  topic='ego_pose',
+                  timestamp=t,
+                  extra={
+                    'threeDScannerApp.scan_dir': scan_dir,
+                    'threeDScannerApp.frame_id': frame_id,
+                    'threeDScannerApp.json_path': os.path.basename(finfo_path),
+                  })
+    uris.append(xform_uri)
+
+    img_path = finfo_path.replace('.json', '.jpg')
+    if os.path.exists(img_path):
+      # NB: for 'low-res' capture mode, Depth gets recorded at ~6Hz but images
+      # only at ~2Hz.  Also, sometimes images just don't get recorded
+      # (dropped frames)
+      ci_uri = datum.URI(
+                  segment_id=segment_id,
+                  topic='camera|front',
+                  timestamp=t,
+                  extra={
+                    'threeDScannerApp.scan_dir': scan_dir,
+                    'threeDScannerApp.frame_id': frame_id,
+                    'threeDScannerApp.img_path': os.path.basename(img_path),
+                    'threeDScannerApp.json_path': os.path.basename(finfo_path),
+                  })
+      uris.append(ci_uri)
+    
+    depth_path = Path(scan_dir) / f'depth_{frame_id}.png'
+    conf_path = Path(scan_dir) / f'conf_{frame_id}.png'
+    if depth_path.exists() and conf_path.exists():
+      # NB: raw depth only available when app is in 'low-res' mode
+      pc_uri = datum.URI(
+                  segment_id=segment_id,
+                  topic='camera|front|depth',
+                  timestamp=t,
+                  extra={
+                    'threeDScannerApp.scan_dir': scan_dir,
+                    'threeDScannerApp.frame_id': frame_id,
+                    'threeDScannerApp.depth_path': os.path.basename(depth_path),
+                    'threeDScannerApp.conf_path': os.path.basename(conf_path),
+                    'threeDScannerApp.json_path': os.path.basename(finfo_path),
+                  })
+      uris.append(pc_uri)
+    
+  return uris
+
+
+def threeDScannerApp_create_stamped_datum(uri):
+  if 'threeDScannerApp.scan_dir' not in uri.extra:
+    raise ValueError(uri)
+  scan_dir = Path(uri.extra['threeDScannerApp.scan_dir'])
+  if uri.topic.startswith('camera'):
+    frame_json_path = scan_dir / uri.extra['threeDScannerApp.json_path']
+    ci = threeDScannerApp_create_camera_image(
+            frame_json_path,
+            sensor_name=uri.topic,
+            timestamp=uri.timestamp)
+    if 'depth' in uri.topic:
+      ci_uri = copy.deepcopy(uri)
+      ci_uri.topic = ci_uri.topic.replace('|depth', '')
+      ci.extra['psegs.depth.rgb_uri'] = str(ci_uri)
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+  elif uri.topic.startswith('lidar|mesh'):
+    scan_dir = Path(uri.extra['threeDScannerApp.scan_dir'])
+    mesh_path = scan_dir / uri.extra['threeDScannerApp.mesh_path']
+    pc = threeDScannerApp_create_point_cloud_from_mesh(
+      mesh_path, sensor_name=uri.topic)
+    pc.timestamp = uri.timestamp
+    pc.sensor_name = uri.topic
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+  elif uri.topic == 'ego_pose':
+    frame_json_path = scan_dir / uri.extra['threeDScannerApp.json_path']
+    with open(frame_json_path, 'r') as f:
+      json_data = json.load(f)
+    xform = threeDScannerApp_get_ego_pose(json_data)
+    return datum.StampedDatum(uri=uri, transform=xform)
+  else:
+    raise ValueError(uri)
+
+
+###############################################################################
+### Single-Scene Research Utils
+
+def threeDScannerApp_convert_raw_to_opend3d_rgbd(input_dir, output_dataset_dir):
+  from oarphpy import util as oputil
+  import imageio
+
+  output_dir_image = os.path.join(output_dataset_dir, 'image')
+  output_dir_depth = os.path.join(output_dataset_dir, 'depth')
+  output_dir_debug = os.path.join(output_dataset_dir, 'debug')
+
+  threeDScannerApp_convert_raw_to_sync_rgbd(
+    input_dir,
+    output_dir_image,
+    output_dir_depth=output_dir_depth,
+    output_dir_debug=output_dir_debug)
+  
+  # Pick a frame and get the intrinstics
+  input_rgb_paths = oputil.all_files_recursive(input_dir, pattern='frame*.jpg')
+  
+  sample_image = imageio.imread(input_rgb_paths[0])
+  h, w = sample_image.shape[:2]
+
+  K = None
+  frame_jsons = oputil.all_files_recursive(input_dir, pattern='frame*.json')
+  for path in frame_jsons:
+    with open(path, 'r') as f:
+      json_data = json.load(f)
+    if 'intrinsics' not in json_data:
+      continue
+    else:
+      K = threeDScannerApp_get_K(json_data)
+      break
+  
+  assert K is not None
+  f_x = K[0][0]
+  f_y = K[1][1]
+  c_x = K[0][2]
+  c_y = K[1][2]
+
+  # See example https://github.com/isl-org/Open3D/blob/a27456cc9f4cd43744e87c3e65a9bf196c0e5526/examples/python/reconstruction_system/sensors/realsense_recorder.py#L69
+  opend3d_calib_data = {
+    'width': w,
+    'height': h,
+    'intrinsic_matrix': [
+        # Column-major !
+        f_x, 0, 0, 0, f_y, 0, c_x, c_y, 1
+    ],
+  }
+
+  output_intrinsics_path = os.path.join(output_dataset_dir, 'intrinsic.json')
+  with open(output_intrinsics_path, 'w') as f:
+    json.dump(opend3d_calib_data, f, indent=2)
+  util.log.info("Saved intrinsics to %s" % output_intrinsics_path)
+
+
+def threeDScannerApp_convert_raw_to_sync_rgbd(
+      input_dir,
+      output_dir,
+      scale_depth_to_match_visible=True,
+      out_id_zfill=8,
+      rgb_prefix='image_',
+      depth_prefix='depth_',
+      ignore_depth_below_ARConfidenceLevel=1, # ARConfidenceLevel.medium
+      include_debug=True,
+      include_raw_xform=True,
+      output_dir_depth=None,
+      output_dir_debug=None,
+      parallel=-1):
+  
+  ## Get Input
+  from oarphpy import util as oputil
+  input_rgb_paths = oputil.all_files_recursive(
+                          input_dir, pattern='frame*.jpg')
+  input_depth_paths = oputil.all_files_recursive(
+                          input_dir, pattern='depth*.png')
+  assert input_rgb_paths
+  assert input_depth_paths
+
+  if output_dir_depth is None:
+    output_dir_depth = output_dir
+  if output_dir_debug is None:
+    output_dir_debug = output_dir
+  oputil.mkdir(str(output_dir))
+  oputil.mkdir(str(output_dir_depth))
+  oputil.mkdir(str(output_dir_debug))
+
+  ## Get Input Dimensions
+  import imageio
+  sample_img = imageio.imread(input_rgb_paths[0])
+  rgb_hw = sample_img.shape[:2]
+  util.log.info("Have RGB of resolution %s" % (rgb_hw,))
+
+  sample_depth = imageio.imread(input_depth_paths[0])
+  depth_hw = sample_depth.shape[:2]
+  util.log.info("Have depth of resolution %s" % (depth_hw,))
+
+  ## Define what we need to do
+  def convert(in_rgb, in_depth, out_id):
+    import shutil
+    import cv2
+    import imageio
+
+    out_id_str = str(out_id).zfill(out_id_zfill)
+
+    rgb_suffix = in_rgb.split('.')[-1]
+    rgb_suffix = '.' + rgb_suffix
+    rgb_dest = os.path.join(output_dir, rgb_prefix + out_id_str + rgb_suffix)
+    shutil.copyfile(in_rgb, rgb_dest)
+    util.log.info("%s -> %s" % (in_rgb, rgb_dest))
+
+    depth = imageio.imread(in_depth)
+    confidence = imageio.imread(in_depth.replace('depth_', 'conf_'))
+
+    if scale_depth_to_match_visible:
+      w, h = rgb_hw[1], rgb_hw[0]
+      depth = cv2.resize(depth, (w, h))
+      confidence = cv2.resize(confidence, (w, h))
+
+    # Zero out depth with low confidence
+    depth[ confidence < ignore_depth_below_ARConfidenceLevel ] = 0
+
+    depth_dest = os.path.join(
+                    output_dir_depth, depth_prefix + out_id_str + '.png')
+    imageio.imwrite(depth_dest, depth)
+    util.log.info("%s -> %s" % (in_depth, depth_dest))
+
+    if include_debug:
+      from psegs.util import plotting as pspl
+
+      if depth is None:
+        depth = imageio.imread(depth_dest)
+      
+      # millimeters -> meters
+      depth = depth.astype(np.float32) * .001
+      
+      debug = imageio.imread(in_rgb)
+      pspl.draw_depth_in_image(debug, depth, period_meters=.1)
+
+      debug_dest = os.path.join(
+                    output_dir_debug, 'debug_' + out_id_str + '.jpg')  
+      imageio.imwrite(debug_dest, debug)
+      util.log.info("Saved debug %s" % debug_dest)
+    
+    if include_raw_xform:
+      frame_json_path = in_rgb.replace('.jpg', '.json')
+      if os.path.exists(frame_json_path):
+        with open(frame_json_path, 'r') as f:
+          json_data = json.load(f)
+        
+        ego_pose = threeDScannerApp_get_ego_pose(json_data)
+        xform = ego_pose.get_transformation_matrix(homogeneous=True)
+        xform_dest = rgb_dest + '.xform.npz'
+        with open(xform_dest, 'wb') as f:
+          np.save(f, xform)
+
+    return rgb_dest, depth_dest
+
+  ## Set up conversion jobs
+  def get_frame_idx(path):
+    fname = os.path.basename(path)
+    return int(fname.split('.')[0].split('_')[1])
+
+  frame_to_img = dict((get_frame_idx(p), p) for p in input_rgb_paths)
+  frame_to_d = dict((get_frame_idx(p), p) for p in input_depth_paths)
+  
+  matched_frames = set(frame_to_img.keys()) & set(frame_to_d.keys())
+  util.log.info("Have %s frames to convert ..." % len(matched_frames))
+
+  frame_out_id = [
+    (frame_id, out_id)
+    for out_id, frame_id in enumerate(sorted(matched_frames))
+  ]
+  jobs = [
+    (frame_to_img[f], frame_to_d[f], out_id)
+    for f, out_id in frame_out_id
+  ]
+
+  ## Run conversion!
+  out_path_pairs = []
+  if parallel is None:
+    for j in jobs:
+      result = convert(*j)
+      out_path_pairs.append(result)
+  else:
+    from psegs.spark import Spark  
+    
+    with Spark.sess() as spark:
+      if parallel < 0:
+        import multiprocessing
+        parallel = multiprocessing.cpu_count()
+      job_rdd = spark.sparkContext.parallelize(jobs, numSlices=parallel)
+      out_path_pairs = job_rdd.map(lambda j: convert(*j)).collect()
+  
+  util.log.info("... converted %s." % len(jobs))
+  return out_path_pairs
+
+
+
+###############################################################################
+### iOS Lidar Fixtures & Other Constants
+
+class Fixtures(object):
+
+  # To use your own segments, override threeDScannerApp_data_root() and / or
+  # provide absoluate paths to info.json files (the latter is much faster
+  # when there are hundreds of segments).
+  INFO_JSON_PATHS = []
+
+  ### Extension Data ##########################################################
+  ### See https://github.com/pwais/psegs-ios-lidar-ext
+
+  EXT_DATA_ROOT = C.EXT_DATA_ROOT / 'psegs-ios-lidar-ext'
+
+  DATASET = 'psegs-ios-lidar-ext'
+  SPLIT = 'threeDScannerApp_data'
+
+  @classmethod
+  def threeDScannerApp_data_root(cls):
+    """A directory with 3DScannerApp scan sub-directories.  Subclasses
+    may override this to provide their own scans."""
+    return cls.EXT_DATA_ROOT / 'threeDScannerApp_data'
+
+  @classmethod
+  def threeDScannerApp_test_data_root(cls):
+    return cls.EXT_DATA_ROOT / 'threeDScannerApp_data_test_fixtures'
+
+  @classmethod
+  def get_threeDScannerApp_segment_uris(cls):
+    """Create and return one segment URI per scan"""
+    from oarphpy import util as oputil
+
+    if not (cls.threeDScannerApp_data_root().exists() or cls.INFO_JSON_PATHS):
+      return []
+
+    all_info_paths = oputil.all_files_recursive(
+                        str(cls.threeDScannerApp_data_root()),
+                        pattern='info.json')
+    all_info_paths.extend(cls.INFO_JSON_PATHS)
+    uris = []
+    for info_path in all_info_paths:
+      scan_dir = os.path.dirname(info_path)
+      segment_id = threeDScannerApp_get_segment_id(info_path=info_path)
+      uri = datum.URI(
+              dataset=cls.DATASET,
+              split=cls.SPLIT,
+              segment_id=segment_id,
+              extra={'threeDScannerApp.scan_dir': scan_dir})
+      uris.append(uri)
+    return uris
+
+  # @classmethod
+  # def index_root(cls):
+  #   """A r/w place to cache any temp / index data"""
+  #   return C.PS_TEMP / 'psegs_ios_lidar'
+
+  @classmethod
+  def get_all_seg_uris(cls):
+    seg_uris = []
+    seg_uris += cls.get_threeDScannerApp_segment_uris()
+      # Room for other recording sources ...
+    return seg_uris
+
+  ### Testing #################################################################
+
+  TEST_FIXTURES_ROOT = Path('/tmp/psegs_ios_lidar_test_fixtures')
+  
+
+  ### DSUtil Auto-download ####################################################
+
+  @classmethod
+  def maybe_emplace_psegs_ios_lidar_ext(cls):
+    from oarphpy import util as oputil
+
+    if not cls.EXT_DATA_ROOT.exists():
+      util.log.info("Emplacing PSegs iOS Lidar Extension data ...")
+      oputil.mkdir(str(cls.EXT_DATA_ROOT))
+
+      util.log.info("... downloading PSegs iOS Lidar Extension data ...")
+      oputil.run_cmd(
+        "git clone https://github.com/pwais/psegs-ios-lidar-ext %s" % \
+          cls.EXT_DATA_ROOT)
+
+    # if not cls.TEST_FIXTURES_ROOT.exists():
+    #   from oarphpy import util as oputil
+    #   util.log.info("Emplacing PSegs iOS Lidar Extension data ...")
+    #   oputil.mkdir(str(cls.index_root()))
+    #   oputil.mkdir(str(cls.TEST_FIXTURES_ROOT))
+    #   ext_root = cls.index_root() / 'ext_tmp'
+    #   if not ext_root.exists():
+    #     util.log.info("... downloading PSegs iOS Lidar Extension data ...")
+    #     oputil.run_cmd(
+    #       "git clone https://github.com/pwais/psegs-ios-lidar-ext %s" % \
+    #         ext_root)
+
+    #   util.log.info("... emplacing PSegs iOS Lidar Extension data ...")
+    #   def move(src, dest):
+    #     oputil.mkdir(dest.parent)
+    #     oputil.run_cmd("mv %s %s" % (src, dest))
+    #   move(
+    #     ext_root / 'threeDScannerApp_data',
+    #     cls.EXTERNAL_FIXTURES_ROOT / 'threeDScannerApp_data')
+    
+    #   util.log.info("... emplace success!")
+    #   util.log.info("(You can remove %s if needed)" % ext_root)
+
+
+class IOSLidarSDTFactory(StampedDatumTableFactory):
+  
+  FIXTURES = Fixtures
+
+  ## Subclass API
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return sorted(cls.FIXTURES.get_all_seg_uris())
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    from oarphpy import util as oputil
+
+    ## First get the data dirs for the segments we need ...
+    seg_uris = cls.FIXTURES.get_all_seg_uris()
+    if only_segments:
+      util.log.info(
+        f"IOSLidarSDTFactory Filtering to only {len(only_segments)} segments")
+      seg_uris = [
+          u for u in seg_uris
+          if any(
+              suri.soft_matches_segment_of(u)
+              for suri in only_segments)
+      ]
+    
+    ## ... generate URIs for those segments ...
+    seg_uri_rdd = spark.sparkContext.parallelize(
+                    seg_uris, numSlices=len(seg_uris))
+    uri_rdd = seg_uri_rdd.flatMap(cls.get_uris_for_seg_uri)
+
+    ## ... filter if necessary ...
+    if existing_uri_df is not None:
+      def to_datum_id(obj):
+          return (
+            obj.dataset,
+            obj.split,
+            obj.segment_id,
+            obj.topic,
+            obj.timestamp)
+
+      key_uri_rdd = uri_rdd.map(lambda u: (to_datum_id(u), u))
+      existing_keys_nulls = existing_uri_df.rdd.map(to_datum_id).map(
+                                  lambda t: (t, None))
+      uri_rdd = key_uri_rdd.subtractByKey(existing_keys_nulls).map(
+                                      lambda kv: kv[1])
+
+    ## ... now build Datum RDDs ...
+    URIS_PER_CHUNK = (os.cpu_count() or 1) * 128
+    uris = uri_rdd.collect()
+    assert len(uris) > 0, \
+      f"Broken scan(s) ? No URIS for segments {seg_uris}"
+    util.log.info(
+      f"... IOSLidarSDTFactory creating datums for {len(uris)} URIs.")
+
+    datum_rdds = []
+    for chunk in oputil.ichunked(uris, URIS_PER_CHUNK):
+      chunk_uri_rdd = spark.sparkContext.parallelize(chunk)
+      datum_rdd = chunk_uri_rdd.map(cls.create_stamped_datum)
+      datum_rdds.append(datum_rdd)
+    return datum_rdds
+  
+
+  ## Datum Construction Support
+
+  @classmethod
+  def get_bad_seg_dirs(cls):
+    """Some captures are bad / cannot be parsed / are incomplete.
+    Return a list of those segments."""
+    uri_to_seg_dir = cls.FIXTURES.get_uri_to_seg_dir()
+    bad_seg_dirs = [
+      seg_dir for seg_dir in uri_to_seg_dir.values()
+      if not cls.get_uris_for_seg_dir(seg_dir)
+    ]
+    return bad_seg_dirs
+
+  @classmethod
+  def get_uris_for_seg_uri(cls, seg_uri):
+    # For now, we don't need to sniff the seg_dir type, we only
+    # support threeDScannerApp format.  In the future, we'll need
+    # to condition on seg_dir type.
+    
+    scan_dir = seg_uri.extra['threeDScannerApp.scan_dir']
+    datum_uris = threeDScannerApp_get_uris_from_scan_dir(scan_dir)
+    datum_uris = [
+      duri.replaced(
+            dataset=seg_uri.dataset,
+            split=seg_uri.split,
+            segment_id=seg_uri.segment_id)
+      for duri in datum_uris
+    ]
+    return datum_uris
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    # For now, we don't need to sniff the uri, we only
+    # support threeDScannerApp format.  In the future, we'll need
+    # to condition on uri type.
+    return threeDScannerApp_create_stamped_datum(uri)
+
+
+
+###############################################################################
+### IDatasetUtil Impl
+
+class DSUtil(IDatasetUtil):
+
+  FIXTURES = Fixtures
+
+  @classmethod
+  def emplace(cls):
+    cls.FIXTURES.maybe_emplace_psegs_ios_lidar_ext()
+    return True
+
+  @classmethod
+  def test(cls):
+    from oarphpy import util as oputil
+    oputil.run_cmd("cd %s && pytest -s -vvv -k test_ios_lidar" % C.PS_ROOT)
+    return True
+
+  @classmethod
+  def build_table(cls):
+    # IOSLidarSDTFactory.build()
+    return True
diff --git a/psegs/datasets/kitti.py b/psegs/datasets/kitti.py
new file mode 100644
index 0000000..9898756
--- /dev/null
+++ b/psegs/datasets/kitti.py
@@ -0,0 +1,1531 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+from collections import defaultdict
+from pathlib import Path
+
+import attr
+import numpy as np
+from oarphpy import util as oputil
+
+from psegs import util
+from psegs import datum
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+from psegs.datum.transform import Transform
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+from psegs.util import misc
+
+
+
+###############################################################################
+### KITTI Fixtures & Other Constants
+
+class Fixtures(object):
+
+  ROOT = C.EXT_DATA_ROOT / 'kitti_archives'
+
+  OBJECT_BENCHMARK_FNAMES = (
+    'data_object_label_2.zip',
+    'data_object_image_2.zip',
+    'data_object_image_3.zip',
+    'data_object_prev_2.zip',
+    'data_object_prev_3.zip',
+    'data_object_velodyne.zip',
+    'data_object_calib.zip',
+  )
+
+  TRACKING_BENCHMARK_FNAMES = (
+    'data_tracking_label_2.zip',
+    'data_tracking_image_2.zip',
+    'data_tracking_image_3.zip',
+    'data_tracking_velodyne.zip',
+    'data_tracking_oxts.zip',
+    'data_tracking_calib.zip',
+  )
+
+  @classmethod
+  def zip_path(cls, zipname):
+    return cls.ROOT / zipname
+
+
+  ### Extension Data ##########################################################
+  ### See https://github.com/pwais/psegs-kitti-ext
+
+  EXT_DATA_ROOT = C.EXT_DATA_ROOT / 'psegs-kitti-ext'
+
+  @classmethod
+  def bench_to_raw_path(cls):
+    return cls.EXT_DATA_ROOT / 'bench_to_raw_df'
+
+  @classmethod
+  def index_root(cls):
+    """A r/w place to cache any temp / index data"""
+    return C.PS_TEMP / 'kitti'
+
+
+  ### Testing #################################################################
+
+  TEST_FIXTURES_ROOT = Path('/tmp/psegs_kitti_test_fixtures')
+
+  EXTERNAL_FIXTURES_ROOT = C.EXTERNAL_TEST_FIXTURES_ROOT / 'kitti'
+
+  OBJ_TEST_FRAMES= ('002480', '002481', '002482')
+
+  @classmethod
+  def object_fixture_dir(cls):
+    fixture_dir = cls.TEST_FIXTURES_ROOT / 'object'
+    if util.missing_or_empty(fixture_dir):
+      util.log.info(
+        "Putting Object Benchmark test fixtures in %s" % fixture_dir)
+      oputil.cleandir(fixture_dir)
+      
+      ## Extract all data for these frames
+      util.unarchive_entries(
+        cls.zip_path('data_object_image_2.zip'),
+        ['training/image_2/%s.png' % f for f in cls.OBJ_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_object_image_3.zip'),
+        ['training/image_3/%s.png' % f for f in cls.OBJ_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_object_velodyne.zip'),
+        ['training/velodyne/%s.bin' % f for f in cls.OBJ_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_object_calib.zip'),
+        ['training/calib/%s.txt' % f for f in cls.OBJ_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_object_label_2.zip'),
+        ['training/label_2/%s.txt' % f for f in cls.OBJ_TEST_FRAMES],
+        fixture_dir)
+    
+    return fixture_dir
+  
+
+  TRACKING_TEST_FRAMES = (
+    '0009/000214',
+    '0009/000215',
+    '0015/000017',
+    '0019/001055',
+  )
+
+  @classmethod
+  def tracking_fixture_dir(cls):
+    fixture_dir = cls.TEST_FIXTURES_ROOT / 'tracking'
+    if util.missing_or_empty(fixture_dir):
+      util.log.info(
+        "Putting Tracking Benchmark test fixtures in %s" % fixture_dir)
+      oputil.cleandir(fixture_dir)
+      
+      ## Extract all data for these frames
+      util.unarchive_entries(
+        cls.zip_path('data_tracking_image_2.zip'),
+        ['training/image_02/%s.png' % f for f in cls.TRACKING_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_tracking_image_3.zip'),
+        ['training/image_03/%s.png' % f for f in cls.TRACKING_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_tracking_velodyne.zip'),
+        ['training/velodyne/%s.bin' % f for f in cls.TRACKING_TEST_FRAMES],
+        fixture_dir)
+      
+      segs = [f.split('/')[0] for f in cls.TRACKING_TEST_FRAMES]
+      util.unarchive_entries(
+        cls.zip_path('data_tracking_calib.zip'),
+        ['training/calib/%s.txt' % seg for seg in segs],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_tracking_label_2.zip'),
+        ['training/label_02/%s.txt' % seg for seg in segs],
+        fixture_dir)
+    
+    return fixture_dir
+  
+
+  ### DSUtil Auto-download ####################################################
+
+  @classmethod
+  def maybe_emplace_psegs_kitti_ext(cls):
+    if (cls.bench_to_raw_path().exists() and 
+          cls.EXTERNAL_FIXTURES_ROOT.exists()):
+      return
+    
+    from oarphpy import util as oputil
+    util.log.info("Downloading latest PSegs KITTI Extension data ...")
+    oputil.mkdir(str(cls.index_root()))
+    psegs_kitti_ext_root = cls.index_root() / 'psegs_kitti_ext_tmp'
+    if not psegs_kitti_ext_root.exists():
+      oputil.run_cmd(
+        "git clone https://github.com/pwais/psegs-kitti-ext %s" % \
+          psegs_kitti_ext_root)
+
+    util.log.info("... emplacing PSegs KITTI Extension data ...")
+    def move(src, dest):
+      oputil.mkdir(dest.parent)
+      oputil.run_cmd("mv %s %s" % (src, dest))
+    
+    move(
+      psegs_kitti_ext_root / 'assets' / 'bench_to_raw_df',
+      cls.bench_to_raw_path())
+    move(
+      psegs_kitti_ext_root / 'ps_external_test_fixtures',
+      cls.EXTERNAL_FIXTURES_ROOT)
+    
+    util.log.info("... emplace success!")
+    util.log.info("(You can remove %s if needed)" % psegs_kitti_ext_root)
+
+
+
+###############################################################################
+### KITTI Parsing Utils
+
+
+def load_transforms_from_oxts(oxts_str):
+  """Parse Tracking Benchmark oxts files and return ego-to-world transforms.
+  We ignore most of the oxts information.
+
+  Based upon `pykitti <https://github.com/utiasSTARS/pykitti/blob/19d29b665ac4787a10306bbbbf8831181b38eb38/pykitti/utils.py#L107>`_
+
+  See Also:
+    * `KITTI OXTS Docs <https://github.com/pratikac/kitti/blob/eba7ba0f36917f72055060e9e59f344b72456cb9/readme.raw.txt#L105>`_
+
+  Args:
+    oxts_str (str): The string contents of a single Tracking Benchmark oxts
+      file. These are in `data_tracking_oxts.zip`.
+  
+  Returns:
+    Dict[int, :class:`~psegs.datum.transform.Transform`]: A map of frame
+      number to the ego-to-world transform of the car at that frame.
+  """
+  from scipy.spatial.transform import Rotation as R
+
+  lines = [l for l in oxts_str.split('\n') if l]
+  
+  EARTH_RADIUS_METERS = 6378137.
+  scale = None
+
+  frame_to_xform = {}
+  for frame_num, line in enumerate(lines):
+    toks = line.split(' ')
+    lat = float(toks[0])
+    lon = float(toks[1])
+    alt = float(toks[2])
+    roll = float(toks[3])
+    pitch = float(toks[4])
+    yaw = float(toks[5])
+
+    if scale is None:
+      scale = math.cos(lat * math.pi / 180)
+    
+    # Mercator projection
+    tx = scale * lon * math.pi * EARTH_RADIUS_METERS / 180
+    ty = scale * EARTH_RADIUS_METERS * (
+            math.log(math.tan((90 + lat) * math.pi / 360)))
+    tz = alt
+
+    # TODO are these correct ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    rot = R.from_euler('xyz', [roll, pitch, yaw]).as_matrix()
+
+    frame_to_xform[frame_num] = datum.Transform(
+                                        rotation=rot,
+                                        translation=[tx, ty, tz],
+                                        src_frame='world',
+                                        dest_frame='oxts')
+  return frame_to_xform
+
+
+def parse_tracking_label_cuboids(label_str):
+  """Parse Tracking Benchmark labels for **an entire Tracking sequence**
+  and mapping of frame number to lists of `Cuboid` and `BBox2d` instances.
+
+  The label format for the Tracking Benchmark is identical to that for
+  the Object Benchmark except that each line of a Tracking label string
+  is prefixed with the following two values:
+   * frame: An integer starting from 0 indicating the frame number;
+      each frame has synchronized lidar and camera images.
+   * track_id: An id distinct to the tracked object in the sequence
+
+  See `parse_object_label_cuboids()` below.
+
+  Args:
+    label_str (str): The string contents of an Tracking Benchmark label file.
+
+  Returns:
+    Dict[int, List[:class:`~psegs.datum.cuboid.Cuboid`]]: A map of frame id
+      to labels decoded as cuboids
+    Dict[int, List[:class:`~psegs.datum.bbox2d.BBox3D`]]: A map of frame id
+      to labels decoded as bboxes
+  """
+
+  lines = [l for l in label_str.split('\n') if l]
+  frame_to_cuboids = defaultdict(list)
+  frame_to_bboxes = defaultdict(list)
+  for line in lines:
+    toks = line.split(' ')
+    frame_num = int(toks[0])
+    track_id = str(toks[1])
+    cuboids, bboxes = parse_object_label_cuboids(' '.join(toks[2:]))
+    extra = {
+      'kitti.track_id': str(track_id),
+      'kitti.frame_num': str(frame_num),
+    }
+    for c in cuboids:
+      c.track_id = track_id
+      c.extra.update(**extra)
+    for b in bboxes:
+      b.extra.update(**extra)
+    frame_to_cuboids[frame_num].extend(cuboids)
+    frame_to_bboxes[frame_num].extend(bboxes)
+  return frame_to_cuboids, frame_to_bboxes
+
+
+def parse_object_label_cuboids(label_str):
+  """Parse Object Benchmark labels and return a list of `Cuboid` and `BBox2d`
+  instances.
+
+  Notes:
+    Due to KITTI label format and the unavailability of calibration in this
+    helper, the `Cuboid` instance returned has `obj_from_ego` from the
+    **camera** frame, and not the ego / lidar frame.  Furthermore,
+    the `length_meters`, `width_meters`, and `height_meters` attributes are
+    assigned for camera frame semantics.
+
+  See also:
+    * The `KITTI robot frame reference <http://www.cvlibs.net/datasets/kitti/setup.php>`_
+    * `Label description <https://github.com/bostondiditeam/kitti/blob/71d51b8a66c9226369797d437315c3ca2b56f312/resources/devkit_object/readme.txt#L31>`_
+    * `The KITTI Object Benchmark devkit <https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip>`_
+    * `Google Lingvo parsing code <https://github.com/tensorflow/lingvo/blob/96eaa85c648c45585ca76493bba5991212bac38a/lingvo/tasks/car/tools/kitti_data.py#L44>`_
+    * `SECOND (PointPillars) <https://github.com/traveller59/second.pytorch/blob/e42e4a0e17262ab7d180ee96a0a36427f2c20a44/second/data/kitti_dataset.py#L38>`_
+
+  Args:
+    label_str (str): The string contents of an Object Benchmark label file.
+
+  Returns:
+    List[:class:`~psegs.datum.cuboid.Cuboid`]: labels decoded as cuboids
+    List[:class:`~psegs.datum.bbox2d.BBox3D`]: labels decoded as bboxes
+  """
+  from scipy.spatial.transform import Rotation as R
+
+  lines = [l for l in label_str.split('\n') if l]
+
+  cuboids = []
+  bboxes = []
+  for line in lines:
+    toks = line.split(' ')
+    assert len(toks) in (15, 16), "Invalid line %s" % line
+
+    # The last column is score, which is optional (or lacking in label files)
+    if len(toks) == 15:
+      toks.append(-1.)
+
+    # Context
+    category_name = str(toks[0])
+    truncated = float(toks[1])
+    occluded = int(toks[2])
+    alpha = float(toks[3])
+      # The yaw from the camera principle ray to the obj; approximately
+      # the yaw about the car's Z-axis.
+    
+    # BBox in camera frame
+    left = float(toks[4])
+    top = float(toks[5])
+    right = float(toks[6])
+    bottom = float(toks[7])
+
+    # Cuboid in left color camera frame
+    # height_meters = float(toks[8])~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # width_meters = float(toks[9])
+    # length_meters = float(toks[10])
+    # y_size = float(toks[8])
+    # x_size = float(toks[9])
+    # z_size = float(toks[10])
+    kheight = float(toks[8])
+    kwidth = float(toks[9])
+    klength = float(toks[10])
+    bottom_x = float(toks[11])
+    bottom_y = float(toks[12])
+    bottom_z = float(toks[13])
+    rotation_y = float(toks[14])
+      # The yaw of the object versus the camera's y-axis, which points down
+      # (i.e. approximately antiparallel with the car's z-axis).
+    score = float(toks[15])
+
+
+    extra = {
+      'kitti.truncated': str(truncated),
+      'kitti.occluded': str(occluded),
+      'kitti.score': str(score),
+      'kitti.cam_relative_yaw': str(alpha),
+    }
+
+    bbox = datum.BBox2D(
+                  x=left,
+                  y=top,
+                  width=right - left + 1,
+                  height=bottom - top + 1,
+                  category_name=category_name,
+                  extra=extra)
+    bbox.quantize()
+    bboxes.append(bbox)
+    
+    # import math # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+
+    # Rotation about the y-axis, which in KITTI camera frame is yaw, where
+    # clockwise is to the right of the car.
+    # https://github.com/xinshuoweng/AB3DMOT/blob/4009ba5855bda9a347d9f0a8bd72f351e3b00daf/kitti_utils.py#L313
+    # # 
+    # c = math.cos(rotation_y)
+    # s = math.sin(rotation_y)
+    # rot = np.array([[c,  0,  s],
+    #                 [0,  1,  0],
+    #                 [-s, 0,  c]])
+
+
+    cuboids.append(
+      datum.Cuboid(
+        category_name=category_name,
+
+        # See https://github.com/pratikac/kitti/blob/master/readme.tracking.txt#L84
+        # length_meters=klength,
+        # width_meters=kheight, # ~~~~ 
+        # height_meters=kwidth, # ~~~~
+        length_meters=klength,
+        width_meters=kwidth, # ~~~~ 
+        height_meters=kheight, # ~~~~
+        # length_meters=z_size,
+        # width_meters=y_size,
+        # height_meters=x_size,
+        obj_from_ego=datum.Transform(
+          # rotation=rot,#R.from_euler('zxy', [rotation_y-math.pi, -math.pi/2, 0]).as_matrix(),
+          # rotation=R.from_euler('yzx', [rotation_y, 0, 0]).as_matrix(),
+          rotation=R.from_euler('zyx', [-rotation_y, 0, math.pi/2]).as_matrix(),
+            # In addition to including the yaw label `rotation_y`, we apply
+            # a pi/2 roll to account for the camera/lidar z-axis swap.
+          # translation=[bottom_x, bottom_y - .5 * height_meters, bottom_z],
+          # translation=[bottom_x, bottom_y - .5 * y_size, bottom_z],
+          translation=[bottom_x, bottom_y - .5 * kheight, bottom_z],
+          src_frame='camera|left',
+          dest_frame='obj'),
+        extra=extra))
+
+  return cuboids, bboxes
+
+
+@attr.s(eq=False)
+class Calibration(object):
+  """TODO more docs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
+  
+  Note that this class is designed to interface with the calibration data
+  provided in the Benchmark datasets.  This calibration data is a *subset* of
+  that available in the Raw Sync data (e.g. Benchmarks only have camera `P`
+  Projective matrices, but Raw Sync data has explicit stereo baseline
+  transforms).  Why?  While the Benchmark **training** data overlaps with
+  the Raw Sync data, for the **test** split there is NO PUBLIC Raw Sync data
+  available (not even calibration params), hence we stick to the data provided
+  in the Benchmark releases.
+
+  We don't use pykitti directly because:
+    * It has odd dependencies, and not all are included in its setup.py
+    * It's not compatible with the calibration data in the KITTI Benchmark zips
+    * The pykitti code confounds file objects with other parsing / data
+      structures
+    * The pykitti code doesn't have much support for the Benchmarks; most
+      support is for the Raw Sync data.
+    
+  See also:
+    * `Google / Waymo's KITTI parsing code <https://github.com/tensorflow/lingvo/blob/96eaa85c648c45585ca76493bba5991212bac38a/lingvo/tasks/car/tools/kitti_data.py>`_
+    * `kitti-object-eval-python <https://github.com/traveller59/kitti-object-eval-python/blob/9f385f8fd40c195a6370ae3682889d8d5dddf42b/kitti_common.py#L75>`_
+    * `pykitti <https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/tracking.py#L125>`_
+
+  """
+
+  ### Camera Intrinsics (Rectified)
+
+  # NB: We ignore the grey cameras (numbered 0 and 1) because the Benchmarks
+  # do not contain images for them.
+
+  P2 = attr.ib(type=np.ndarray, default=np.zeros((3, 4)))
+  """3-by-4 Projective Matrix for Camera 2 (left color stereo)"""
+
+  P3 = attr.ib(type=np.ndarray, default=np.zeros((3, 4)))
+  """3-by-4 Projective Matrix for Camera 3 (right color stereo)"""
+
+
+  ## Derived Attributes
+
+  K2 = attr.ib(type=np.ndarray, default=np.zeros((3, 3)))
+  """3-by-3 Camera Matrix for Camera 2 (left color stero).
+  Derived from `P2`"""
+
+  K3 = attr.ib(type=np.ndarray, default=np.zeros((3, 3)))
+  """3-by-3 Camera Matrix for Camera 3 (right color stero).
+  Derived from `P3`"""
+
+  T2 = attr.ib(type=np.ndarray, default=np.zeros((1, 3)))
+  """3-by-1 Translation vector from Camera 2 center from Lidar frame.
+  We estimate this vector from `P2`.  See `velo_to_cam_2_rect` below."""
+
+  T3 = attr.ib(type=np.ndarray, default=np.zeros((1, 3)))
+  """3-by-1 Translation vector from Camera 3 center from Lidar frame.
+  We estimate this vector from `P3`.  See `velo_to_cam_3_rect` below."""
+
+
+  ### Raw Extrinsics
+
+  R0_rect = attr.ib(type=datum.Transform, default=datum.Transform())
+  """A rotation-only transform for projecting lidar points into the *rectified*
+  camera frame.  Neglecting this transform will result in a skew between
+  projected points and the center of rectified images.  Called `R0_rect` in
+  Benchmark calibration data."""
+
+  velo_to_cam_unrectified = attr.ib(
+    type=datum.Transform, default=datum.Transform())
+  """Raw transform from velodye to left color camera (camera 2) unrectified
+  frame.  Called `Tr_velo_to_cam` in Benchmark calibration data."""
+
+  imu_to_velo = attr.ib(type=datum.Transform, default=datum.Transform())
+  """Raw transform from IMU to velodyne frame.  Called `Tr_imu_to_velo` in
+  Benchmark calibration data."""
+
+  ### Derived Extrinsics
+
+  velo_to_cam_2_rect = attr.ib(type=datum.Transform, default=datum.Transform())
+  """Transform from velodyne to left color camera rectified frame.  Use this
+  transform with PSegs versus `velo_to_cam_unrectified`.
+  
+  In PSegs, we project points from lidar to camera using:
+    pxpyd = K * [R|T] * xyz
+  where uvd is pxpyd is a pixel (x, y, depth) value, K is the camera matrix,
+  and [R|T] transforms from lidar to camera frame. However, KITTI only provides
+  the projective matrix P and a transform [R|T] to the **left** camera frame.
+  KITTI says to project points using:
+    pxpyd = P * R0 * Tr_velo_to_cam * xyz
+  We pick apart K and [R|T] from P for each camera for compatibility with
+  PSegs.
+  """
+
+  velo_to_cam_3_rect = attr.ib(type=datum.Transform, default=datum.Transform())
+  """Transform from velodyne to right color camera rectified frame.  Use this
+  transform with PSegs versus `velo_to_cam_unrectified`."""
+
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  @staticmethod
+  def derive_T_from_P(P):
+    """KITTI provides only the camera Projective Matrix `P` for the Benchmarks;
+    the KITTI authors compute `P` from the intrinsic Calibration Matrix `K`
+    and other extrinsic calibration.  In this utility we extract `K` and a
+    compatible transform [R|T] for projecing 3d points into the camera image.
+
+    Problem: we want to extract K and [R|T] from the given P matrix.
+    Reference: `Zisserman "Multiple View Gemoetric (2nd ed.) pg. 163
+    <http://cvrs.whu.edu.cn/downloads/ebooks/Multiple%20View%20Geometry%20in%20Computer%20Vision%20(Second%20Edition).pdf>`_
+
+    We can obtain K and R using an RQ decomposition on P.  For example:
+      `K, RT = scipy.linalg.rq(P[:3, :3])`  
+    However, we note that the P matrices given in the Benchmark data tend have
+    the structure
+    ::
+            | a 0 b e |
+            | 0 c d f |
+            | 0 0 1 g |
+    where the left-hand block mimics the structure of K.  Unsurprisingly, a
+    RQ decomposition finds that R is the 3x3 identity and suggests:
+    ::
+        K = | a 0 b | 
+            | 0 c d | 
+            | 0 0 1 | 
+    To use this decomposition, though, we'd have to deduce T from K.  If we 
+    indeed assume that R = I, and further accept the raw values of P as
+    intrinsics for K, we can solve for T as follows:
+    ::
+      P = K [R|T] = | fx  0 cx | | 1 0 0  Tx | 
+                    |  0 fy cy | | 0 1 0  Ty |
+                    |  0  0  1 | | 0 0 1  Tz |
+
+                  = | fx  0 cx | | 1 0 0  (1/fx) * (P[0,3] - cx * P[2,3]) | 
+                    |  0 fy cy | | 0 1 0  (1/fy) * (P[1,3] - cy * P[2,3]) |
+                    |  0  0  1 | | 0 0 1  P[2,3]                          |
+
+    From the above, we recover a T like:
+    ::
+      T_left_cam =     [ 0.05984926, -0.00035793,  0.0027459]^T
+      T_right_cam =    [-0.47286266,  0.00239497,  0.0027299]^T
+      Tr_velo_to_cam = [-0.00406977, -0.07631618, -0.2717806]^T
+
+    So, `T_left_cam` is ~6cm long, and `T_right_cam` is ~47.3cm long;
+    these figures tend to agree with the KITTI vehicle reference diagram:
+    http://www.cvlibs.net/datasets/kitti/setup.php
+
+    What's also notable is that the **raw** KITTI `Tr_velo_to_cam` transform
+    (which has a translation norm of about 28cm) appears to be a transform to
+    the left *grey* camera (camera 0) and not the left *color* camera,
+    which is what we want.
+
+    Qualitatively, the deduced `T` values appear to give good lidar-to-camera
+    projections; see the test `test_kitti_object_lidar_camera_projection()`.
+
+    Args:
+      P (np.ndarray): A 3x4 projective matrix.
+    
+    Returns:
+      T (np.ndarray): A derived 3x1 translation vector.
+    """
+
+    fx = P[0, 0]
+    fy = P[1, 1]
+    cx = P[0, 2]
+    cy = P[1, 2]
+    Tx = (1/fx) * (P[0,3] - cx * P[2,3])
+    Ty = (1/fy) * (P[1,3] - cy * P[2,3])
+    Tz = P[2, 3]
+    
+    return np.array([[Tx, Ty, Tz]]).T
+
+  def __attrs_post_init__(self):
+    # As noted above in `derive_T_from_P()`, we interpret raw intrinsics from
+    # the provided P matrices and deduce T
+    self.K2 = self.P2[:3, :3]
+    self.K3 = self.P3[:3, :3]
+    self.T2 = Calibration.derive_T_from_P(self.P2)
+    self.T3 = Calibration.derive_T_from_P(self.P3)
+
+    vel_to_cam_left_grey = self.R0_rect @ self.velo_to_cam_unrectified
+
+    RT_left_color = datum.Transform(translation=self.T2)
+    self.velo_to_cam_2_rect = RT_left_color @ vel_to_cam_left_grey
+
+    # Bless this transform; explicitly set frame
+    self.velo_to_cam_2_rect.src_frame = 'ego' # For KITTI, lidar is ego
+    self.velo_to_cam_2_rect.dest_frame = 'camera|left'
+
+    RT_right_color = datum.Transform(translation=self.T3)
+    self.velo_to_cam_3_rect = RT_right_color @ vel_to_cam_left_grey
+
+    # Bless this transform; explicitly set frame
+    self.velo_to_cam_3_rect.src_frame = 'ego' # For KITTI, lidar is ego
+    self.velo_to_cam_3_rect.dest_frame = 'camera|right'
+
+  @classmethod
+  def from_kitti_str(cls, calib_txt):
+    """Create and return a `Calibration` instance from the given calibration
+    file string `calib_txt`.  This string may originate from the calibration
+    text files embedded in the `data_object_calib.zip` or
+    `data_tracking_calib.zip` zips.  Note that these calibration files are 
+    different than those provided with the KITTI Raw Sync data.
+    """
+    
+    # Parse raw data. Based upon pykitti.  We don't use pykitt directly due to
+    # its dependency issues and the way it confounds files objecs with parsing
+    # code and data structures.
+    # https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/utils.py#L68
+    lines = [l for l in calib_txt.split('\n') if l]
+    data = {}
+    for line in lines:
+      # P0: 7.115377000000e+02 0.000000000000e+00 -> P0: np.array([...])
+      # OR P0 7.115377000000e+02 0.000000000000e+00 -> P0: np.array([...])
+      toks = [t for t in line.split(' ') if t]
+      k = toks[0]
+      if ':' in k:
+        k = k.replace(':', '')
+      # k, v = line.split(':', 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      # data[k] = np.array([float(vv) for vv in v.split()])
+      data[k] = np.array([float(t) for t in toks[1:]])
+
+    kwargs = {}
+    
+    # Load camera projective matrices
+    CAMERAS = ('P2', 'P3') # Ignore grey cameras!
+    for cam in CAMERAS:
+      kwargs[cam] = np.reshape(data[cam], (3, 4))
+
+    ## Decide on keys
+    # Object and Tracking use different keys.  Default to Object, fall back
+    # to Tracking.
+    R0_rect_key = 'R0_rect'
+    if R0_rect_key not in data:
+      R0_rect_key = 'R_rect' # Tracking
+
+    Tr_velo_to_cam_key = 'Tr_velo_to_cam'
+    if Tr_velo_to_cam_key not in data:
+      Tr_velo_to_cam_key = 'Tr_velo_cam' # Tracking
+    
+    Tr_imu_to_velo_key = 'Tr_imu_to_velo'
+    if Tr_imu_to_velo_key not in data:
+      Tr_imu_to_velo_key = 'Tr_imu_velo'
+
+    ## Load extrinsics
+    kwargs['R0_rect'] = datum.Transform(
+                          rotation=np.reshape(data[R0_rect_key], (3, 3)),
+                          src_frame='camera|left_raw',
+                          dest_frame='camera|left_sensor')
+
+    kwargs['velo_to_cam_unrectified'] = (
+      datum.Transform.from_transformation_matrix(
+        np.reshape(data[Tr_velo_to_cam_key], (3, 4)),
+        src_frame='lidar', dest_frame='camera|left_grey_raw'))
+    
+    kwargs['imu_to_velo'] = datum.Transform.from_transformation_matrix(
+      np.reshape(data[Tr_imu_to_velo_key], (3, 4)),
+      src_frame='oxts', dest_frame='lidar')
+    
+    return cls(**kwargs)
+
+
+###############################################################################
+### StampedDatum Table
+
+def _is_image_or_scan_or_oxt(path):
+  return not path.endswith('dataformat.txt') and (
+      path.endswith('.png') or
+      path.endswith('.bin') or
+      ('oxt' in path and path.endswith('.txt')) or
+      ('label' in path and path.endswith('.txt')))
+
+def _rdd_of_all_archive_datafiles(spark, archive_paths):
+  from oarphpy import spark as S
+  
+  rdds = []
+  for path in archive_paths:
+    archive_rdd = S.archive_rdd(spark, str(path))
+    archive_rdd = archive_rdd.filter(
+                      lambda fw: _is_image_or_scan_or_oxt(fw.name))
+    archive_rdd = archive_rdd.map(
+                    lambda fw:
+                      (os.path.basename(fw.archive.archive_path), fw.name))
+    rdds.append(archive_rdd)
+  rdd = spark.sparkContext.union(rdds)
+  return rdd
+
+
+class BenchmarkToRawMapper(object):
+  """This utility leverages artifacts from the 
+  [PSegs-KITTI-Ext](https://github.com/pwais/psegs-kitti-ext) project to 
+  look up contextual info from the KITTI Raw Data using Benchmark data.
+  """
+
+  ### Public API
+
+  # Cache derived index files in the fixtures index_root directory. Saves users
+  # a minute or two that it takes to sift through about 250k rows of the
+  # bench_to_raw table.
+  FIXTURES = Fixtures
+
+  @classmethod
+  def setup(cls, spark=None):
+    util.log.info("Creating BenchmarkToRawMapper index ...")
+
+    bench_to_raw_path = cls.FIXTURES.bench_to_raw_path()
+    if os.path.exists(bench_to_raw_path):
+      from psegs.spark import Spark
+      with Spark.sess(spark) as spark:
+        bench_to_raw_df = spark.read.parquet(str(bench_to_raw_path))
+        bench_file_to_context = \
+          cls._create_bench_file_to_context(bench_to_raw_df)
+    else:
+      bench_file_to_context = {}
+
+    # Save index
+    index_path = cls._bench_file_to_context_path()
+    import pickle
+    from oarphpy import util as oputil
+    oputil.mkdir(str(index_path.parent))
+    with open(index_path, 'wb') as f:
+      pickle.dump(bench_file_to_context, f, protocol=pickle.HIGHEST_PROTOCOL)
+      util.log.info(
+        "Saved %s entries of BenchmarkToRawMapper index to %s ." % (
+          len(bench_file_to_context), f.name))
+
+  def __init__(self):
+    assert os.path.exists(self._bench_file_to_context_path()), \
+      "User needs to run setup() first"
+    
+    import pickle
+    with open(self._bench_file_to_context_path(), 'rb') as f:
+      self._bench_file_to_context = pickle.load(f)
+
+  def get_extra(self, uri):
+    key = self._bench_file_key(uri)
+    # print(key in self._bench_file_to_context, key, list(self._bench_file_to_context.keys())[:10])
+    extra = self._bench_file_to_context.get(key, {})
+    extra = dict(
+      (k, str(v)) for k, v in extra.items()
+      if v)
+    
+    # Labels map to images, but don't claim the label is an image file
+    if 'labels' in uri.topic:
+      extra.pop('kitti.raw.sha-1', None)
+      extra.pop('kitti.raw.filename', None)
+    
+    return extra
+
+  def fill_timestamp(self, uri):
+    """Fill the real timestamp of `uri` for the *train* split of the Object and
+    Tracking Benchmarks; these timestamps are derived from the Raw Sync Data
+    release of KITTI.  For the *text* split, we interpolate a plausible
+    timestamp using the frame number and observation that training
+    frames [are consistently sampled at 10Hz](https://github.com/pwais/psegs-kitti-ext/#sensor-sample-rates-are-consistently-10hz)
+    """
+
+    if uri.split == 'train':
+      extra = self.get_extra(uri)
+      t = int(extra.get('kitti.raw.timestamp', 0))
+      if t > 0:
+        uri.timestamp = t
+        return
+
+    ## Fallback for test split and/or absence of backing bench to raw data
+    # For Tracking Benchmark, kitti.frame distinct per tracking segement and
+    # indexes frames starting at 0.
+    # For Object Benchmark, kitti.frame is just a split-global index starting
+    # at 0.
+    # We'll make synthetic timestamps start at unix time 1 in order to 
+    # distinguish them from null (zero-value) timestamps, which can be 
+    # recognized as erroneous.
+    BASE_NS = int(1e9)
+    frame = int(uri.extra['kitti.frame'])
+    uri.timestamp = BASE_NS + int(frame * 1e8)
+
+
+  ### Support
+
+  @classmethod
+  def _bench_file_to_context_path(cls):
+    return cls.FIXTURES.index_root() / 'bench_file_to_context.pkl'
+
+  @classmethod
+  def _create_bench_file_to_context(cls, bench_to_raw_df):
+    # We need to reconstruct the Tracking Benchmark segment <-> Raw Segment
+    # mapping in order to deduce timstamps for oxts; oxts are single files in
+    # benchmarks but multiple files in raw, so `bench_to_raw_pdf` does not
+    # map benchmark oxts to raw oxts.  For simplicitly, we recover this
+    # mapping using just the first velodyne file for each segment.
+    df = bench_to_raw_df.filter(
+      (bench_to_raw_df.benchmark == 'data_tracking_velodyne.zip') &
+      (bench_to_raw_df.topic == 'velodyne_points') &
+      (bench_to_raw_df.frame == 0))
+    def to_segment_pair(row):
+      vuri = kitti_archive_file_to_uri(row['benchmark'], row['b_filename'])
+      return (row['segment'], vuri.segment_id)
+    pair_rdd = df.rdd.map(to_segment_pair)
+    raw_segment_to_bench_segment = dict(pair_rdd.collect())
+  
+
+    # Now collect context for each benchmark file
+    df = bench_to_raw_df.filter(
+      (bench_to_raw_df['b_filename'].isNotNull()) |
+      (bench_to_raw_df['topic'] == 'oxts'))
+    
+    def to_index_entry(row):
+      if row['topic'] == 'oxts':
+        if row['segment'] not in raw_segment_to_bench_segment:
+          # This OXTS is probably from the test split or a non-benchmark
+          # segment
+          return (None, None)
+        key = cls._bench_file_key(
+                    datum.URI(
+                      topic='oxts',
+                      segment_id=raw_segment_to_bench_segment[row['segment']],
+                      extra={'kitti.frame': row['frame']}))
+      else:
+        key = cls._bench_file_key(datum.URI(extra={
+          'kitti.archive': row['benchmark'],
+          'kitti.archive.file': row['b_filename'],
+        }))
+
+      extra = {
+        'kitti.raw.timestamp': row['nanostamp'],
+        'kitti.raw.segment_category': row['segment_category'],
+        'kitti.raw.segment': row['segment'],
+        'kitti.raw.filename': row['r_filename'],
+        'kitti.raw.sha-1': row['r_digest'],
+      }
+      return (key, extra)
+    
+    bench_file_to_context = dict(df.rdd.map(to_index_entry).collect())
+    return bench_file_to_context
+
+  @classmethod
+  def _bench_file_key(cls, uri):
+    if uri.topic == 'oxts' or 'oxts' in uri.extra.get('kitti.archive.file', ''):
+      return (uri.segment_id, int(uri.extra.get('kitti.frame', 0)))
+    elif 'labels' in uri.topic: 
+      if 'object' in uri.extra['kitti.archive']:
+        # Labels are in camera frame; map them to corresponding camera image
+        archive = str(uri.extra['kitti.archive'])
+        archive = archive.replace('label', 'image')
+        cam_file = str(uri.extra['kitti.archive.file'])
+        cam_file = cam_file.replace('label', 'image').replace('txt', 'png')
+        return (archive, cam_file)
+      elif 'tracking' in uri.extra['kitti.archive']:
+        # Labels are in camera frame; map them to corresponding camera image
+        archive = str(uri.extra['kitti.archive'])
+        archive = archive.replace('label', 'image')
+        cam_file = str(uri.extra['kitti.archive.file'])
+        cam_file = cam_file.replace('label', 'image')
+        cam_file = cam_file.replace('.txt', '/%s.png' % uri.extra['kitti.frame'])
+        return (archive, cam_file)
+      else:
+        raise ValueError(uri)
+    else:
+      return (uri.extra['kitti.archive'], uri.extra['kitti.archive.file'])
+
+  
+
+
+def kitti_archive_file_to_uri(archive_name, entryname):
+  if 'object' in archive_name:
+    return kitti_object_file_to_uri(archive_name, entryname)
+  elif 'tracking' in archive_name:
+    return kitti_tracking_file_to_uri(archive_name, entryname)
+  else:
+    raise ValueError("Unsupported %s %s" % (archive_name, entryname))
+
+
+def kitti_get_topic_for_filename(filename):
+  if 'image_2' in filename or 'image_02' in filename or 'prev_2' in filename:
+    return 'camera|left'
+  elif 'image_3' in filename or 'image_03' in filename or 'prev_3' in filename:
+    return 'camera|right'
+  elif 'label_2' in filename or 'label_02' in filename:
+    return 'labels|cuboids'
+    # bboxes ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  elif 'velodyne' in filename:
+    return 'lidar'
+  elif 'oxts' in filename:
+    return 'ego_pose'
+  else:
+    raise ValueError(filename)
+
+  # if archive_name in ('data_object_image_2.zip', 'data_object_image_3.zip'):
+  #   if ktopic == 'image_2':
+  #     uri.topic = 'camera|left'
+  #   elif ktopic == 'image_3':
+  #     uri.topic = 'camera|right'
+  #   else:
+  #     raise ValueError()
+  #   uri.extra['kitti.frame'] = fname_prefix
+  # elif archive_name in ('data_object_prev_2.zip', 'data_object_prev_3.zip'):
+  #   if ktopic == 'prev_2':
+  #     uri.topic = 'camera|left'
+  #   elif ktopic == 'prev_3':
+  #     uri.topic = 'camera|right'
+  #   else:
+  #     raise ValueError()
+  #   prefix = fname.split('.')[0]
+  #   frame, seqnum = prefix.split('_')
+  #   uri.extra['kitti.frame'] = frame
+  #   uri.extra['kitti.prev'] = seqnum
+  # elif archive_name == 'data_object_label_2.zip':
+  #   uri.topic = 'labels|cuboids'
+  #   uri.extra['kitti.frame'] = fname_prefix
+  #   # TODO bboxes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # elif archive_name == 'data_object_velodyne.zip':
+  #   uri.topic = 'lidar'
+  #   uri.extra['kitti.frame'] = fname_prefix
+  # elif archive_name == 'data_object_calib.zip':
+    
+
+def kitti_object_file_to_uri(archive_name, entryname):
+  """Create and return a URI for the given KITTI Object Benchmark file."""
+
+  assert archive_name in Fixtures.OBJECT_BENCHMARK_FNAMES, archive_name
+
+  split = 'test' if 'test' in entryname else 'train'
+  uri = datum.URI(
+            dataset='kitti-object',
+            split=split,
+            segment_id='kitti-object-benchmark-' + split,
+              # NB: The Object Benchmark is just a bunch of scans rather than a
+              # sequence of timed scans; we stuff all data in a fake segment
+              # with this name.  We use the split to ensure that the training
+              # and testing sets have distinct 'segments', since the data
+              # is distinct.
+            topic=kitti_get_topic_for_filename(entryname),
+            extra={
+              'kitti.archive': archive_name,
+              'kitti.archive.file': entryname,
+            })
+  
+  # Object Benchmark has filenames like
+  # training/label_2/006415.txt
+  # training/prev_2/007464_03.png
+  # The 6-digit number is the frame number and links camera, lidar,
+  # calibration etc.
+  parts = Path(entryname).parts
+  assert parts[0] in ('training', 'testing')
+  assert len(parts) == 3
+  ktopic = parts[1]
+  fname = parts[2]
+  fname_prefix = fname.split('.')[0]
+
+  if archive_name in (
+      'data_object_image_2.zip', 'data_object_image_3.zip',
+      'data_object_label_2.zip', 'data_object_velodyne.zip'):
+
+    # TODO bboxes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    uri.extra['kitti.frame'] = fname_prefix
+  elif archive_name in ('data_object_prev_2.zip', 'data_object_prev_3.zip'):
+    prefix = fname.split('.')[0]
+    frame, seqnum = prefix.split('_')
+    uri.extra['kitti.frame'] = frame
+    uri.extra['kitti.prev'] = seqnum
+  elif archive_name == 'data_object_calib.zip':
+    raise ValueError("Can't address calibration!")
+  else:
+    raise ValueError("Dont know what to do with %s" % archive_name)
+
+  return uri
+
+
+def kitti_tracking_file_to_uri(archive_name, entryname):
+  """Create and return a URI for the given KITTI Tracking Benchmark file."""
+
+  assert archive_name in Fixtures.TRACKING_BENCHMARK_FNAMES, archive_name
+
+  # Tracking Benchmark has filenames like
+  # training/image_02/0009/000667.png
+  # training/label_02/0002.txt
+  # training/oxts/0002.txt
+  # For sensor data:
+  # The 6-digit number in the end file name is the frame number and links
+  # camera, lidar, calibration etc.
+  # The 4-digit number in the directory name is the segment_id
+  # For context files:
+  # The 4-digit number in the file name is the segment_id
+  
+  parts = Path(entryname).parts
+  assert parts[0] in ('training', 'testing')
+  assert len(parts) in (3, 4)
+  ktopic = parts[1]
+  if len(parts) == 4:
+    ksegment_id = parts[2]
+    frame = parts[3].split('.')[0]
+  else:
+    ksegment_id = parts[2].split('.')[0]
+    frame = None
+  
+  # Train and Test segments have IDs with overlapping ranges of numbers; to
+  # indicate that they are indeed however distinct, we prefix them with split.
+  split = 'test' if 'test' in entryname else 'train'
+  segment_id = 'kitti-tracking-' + split + '-' + ksegment_id
+
+  uri = datum.URI(
+          dataset='kitti-tracking',
+          split='test' if 'test' in entryname else 'train',
+          segment_id=segment_id,
+          topic=kitti_get_topic_for_filename(entryname),
+          extra={
+            'kitti.archive': archive_name,
+            'kitti.archive.file': entryname,
+          })
+  if frame:
+    uri.extra['kitti.frame'] = frame
+
+  return uri
+
+
+class KITTISDTable(StampedDatumTableFactory):
+  
+  FIXTURES = Fixtures
+
+  INCLUDE_OBJ_PREV_FRAMES = True
+
+  INCLUDE_OBJECT_BENCHMARK = True
+  INCLUDE_TRACKING_BENCHMARK = True
+
+  ## Subclass API
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    import zipfile
+    
+    uris = set()
+    for archive_path in cls._get_all_archive_paths():
+      if os.path.exists(archive_path):
+        for entryname in zipfile.ZipFile(archive_path).namelist():
+          if _is_image_or_scan_or_oxt(entryname):
+            uri = kitti_archive_file_to_uri(archive_path.name, entryname)
+            uris.add(str(uri.to_segment_uri()))
+    
+    return sorted(datum.URI.from_str(uri) for uri in uris)
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+
+    ## First build indices (saves several minutes per worker per chunk) ...
+    class SDBenchmarkToRawMapper(BenchmarkToRawMapper):
+      FIXTURES = cls.FIXTURES
+    SDBenchmarkToRawMapper.setup(spark=spark)
+
+    ## ... now build a set of tasks to do ...
+    archive_paths = cls._get_all_archive_paths()
+    task_rdd = _rdd_of_all_archive_datafiles(spark, archive_paths)
+    task_rdd = task_rdd.cache()
+    util.log.info("Discovered %s tasks ..." % task_rdd.count())
+    
+    ## ... convert to URIs and filter those tasks if necessary ...
+    if existing_uri_df is not None:
+      # Since we keep track of the original archives and file names, we can
+      # just filter on those.  We'll collect them in this process b/c the
+      # maximal set of URIs is smaller than RAM.
+      def to_task(row):
+        return (row.extra.get('kitti.archive'),
+                row.extra.get('kitti.archive.file'))
+      skip_tasks = set(
+        existing_uri_df.select('extra').rdd.map(to_task).collect())
+      
+      task_rdd = task_rdd.filter(lambda t: t not in skip_tasks)
+      util.log.info(
+        "Resume mode: have datums for %s datums; dropped %s tasks" % (
+          existing_uri_df.count(), len(skip_tasks)))
+    
+    uri_rdd = task_rdd.map(lambda task: kitti_archive_file_to_uri(*task))
+    if only_segments:
+      util.log.info(
+        "Filtering to only %s segments" % len(only_segments))
+      uri_rdd = uri_rdd.filter(
+        lambda uri: any(
+          suri.soft_matches_segment(uri) for suri in only_segments))
+
+    ## ... run tasks and create stamped datums.
+    # from oarphpy.spark import cluster_cpu_count
+    URIS_PER_CHUNK = os.cpu_count() * 64 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make class member so can configure to RAM
+    uris = uri_rdd.collect()
+    util.log.info("... creating datums for %s URIs." % len(uris))
+
+    datum_rdds = []
+    for chunk in oputil.ichunked(uris, URIS_PER_CHUNK):
+      chunk_uri_rdd = spark.sparkContext.parallelize(chunk)
+      datum_rdd = chunk_uri_rdd.flatMap(cls._iter_datums_from_uri)
+      datum_rdds.append(datum_rdd)
+      # if len(datum_rdds) >= 10:
+      #   break # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    return datum_rdds
+  
+  @classmethod
+  def _get_all_archive_paths(cls):
+    archives = []
+    if cls.INCLUDE_OBJECT_BENCHMARK:
+      archives += list(cls.FIXTURES.OBJECT_BENCHMARK_FNAMES)
+      if not cls.INCLUDE_OBJ_PREV_FRAMES:
+        archives = [arch for arch in archives if 'prev' not in arch]
+    if cls.INCLUDE_TRACKING_BENCHMARK:
+      archives += list(cls.FIXTURES.TRACKING_BENCHMARK_FNAMES)
+    archives = [arch for arch in archives if 'calib' not in arch]
+    paths = [cls.FIXTURES.zip_path(arch) for arch in archives]
+    return paths
+
+
+  ## Datum Construction Support
+
+  @classmethod
+  def _get_file_bytes(cls, uri=None, archive=None, entryname=None):
+    """Read bytes for the file referred to by `uri`"""
+
+    if uri is not None:
+      archive = uri.extra['kitti.archive']
+      entryname = uri.extra['kitti.archive.file']
+    assert archive and entryname
+
+    # Cache the Zipfiles for faster loading
+    if not hasattr(cls, '_get_file_bytes_archives'):
+      cls._get_file_bytes_archives = {}
+    if archive not in cls._get_file_bytes_archives:
+      import zipfile
+      path = cls.FIXTURES.zip_path(archive)
+      cls._get_file_bytes_archives[archive] = zipfile.ZipFile(path)
+
+    try:
+      return cls._get_file_bytes_archives[archive].read(entryname)
+    except Exception as e:
+        raise Exception((e, archive, uri))
+
+  @classmethod
+  def _get_segment_frame_to_pose(cls, segment_id):
+    """Get the frame -> pose map for the given `segment_id`.  Cache these since
+    multiple datum constructors will need to look up poses."""
+    if not hasattr(cls, '_seg_to_poses'):
+      cls._seg_to_poses = {}
+    if segment_id not in cls._seg_to_poses:
+      split, segnum = segment_id.split('-')[-2:]
+      entryname = split + 'ing/oxts/' + segnum + '.txt'
+      oxts_str = cls._get_file_bytes(
+        archive='data_tracking_oxts.zip', entryname=entryname)
+      oxts_str = oxts_str.decode()
+      frame_to_xform = load_transforms_from_oxts(oxts_str)
+      cls._seg_to_poses[segment_id] = frame_to_xform
+    return cls._seg_to_poses[segment_id]
+
+  @classmethod
+  def _get_ego_pose(cls, uri):
+    # Pose information for Object Benchmark not available
+    if 'kitti-object-benchmark' in uri.segment_id:
+      return datum.Transform(src_frame='world', dest_frame='ego')
+    else:
+      frame_to_xform = cls._get_segment_frame_to_pose(uri.segment_id)
+      return frame_to_xform[int(uri.extra['kitti.frame'])]
+
+  @classmethod
+  def _get_calibration(cls, uri):
+    """Get the `Calibration` instance for the given `uri`.  Cache these since
+    multiple datum constructors will need to look up calibration."""
+
+    if not hasattr(cls, '_obj_frame_to_calib'):
+      cls._obj_frame_to_calib = {}
+    if not hasattr(cls, '_tracking_seg_to_calib'):
+      cls._tracking_seg_to_calib = {}
+    
+    if 'kitti-object-benchmark' in uri.segment_id:
+      frame = uri.extra['kitti.frame']
+      if frame not in cls._obj_frame_to_calib:
+        entryname = uri.split + 'ing/calib/' + frame + '.txt'
+        calib_str = cls._get_file_bytes(
+          archive='data_object_calib.zip', entryname=entryname)
+        calib_str = calib_str.decode()
+        calib = Calibration.from_kitti_str(calib_str)
+        cls._obj_frame_to_calib[frame] = calib
+      return cls._obj_frame_to_calib[frame]
+    
+    else: # Tracking
+      if uri.segment_id not in cls._tracking_seg_to_calib:
+        split, segnum = uri.segment_id.split('-')[-2:]
+        entryname = split + 'ing/calib/' + segnum + '.txt'
+        calib_str = cls._get_file_bytes(
+          archive='data_tracking_calib.zip', entryname=entryname)
+        calib_str = calib_str.decode()
+        calib = Calibration.from_kitti_str(calib_str)
+        cls._tracking_seg_to_calib[uri.segment_id] = calib
+      return cls._tracking_seg_to_calib[uri.segment_id]
+
+  @classmethod
+  def _project_cuboids_to_lidar_frame(cls, uri, cuboids):
+    """Project the given `cuboids` from the camera frame to the lidar frame
+    (using calibration for `uri`) and return a transformed copy.
+
+    See also the tests:
+     * `test_kitti_object_label_lidar_projection()`
+     * `test_kitti_tracking_label_lidar_projection()`
+    """
+    import copy
+
+    ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ## Note: KITTI Cuboids are in the *camera* frame and must be projected
+    ## into the lidar frame for plotting. This test helps document and 
+    ## ensure this assumption holds.
+    ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    calib = cls._get_calibration(uri)
+    lidar_to_cam = calib.R0_rect @ calib.velo_to_cam_unrectified
+    cam_to_lidar = lidar_to_cam.get_inverse()
+
+    cuboids = copy.deepcopy(cuboids)
+    for c in cuboids:
+      from psegs.datum.transform import Transform
+      obj_from_ego_lidar = cam_to_lidar @ c.obj_from_ego
+      c.obj_from_ego = obj_from_ego_lidar
+      c.obj_from_ego.src_frame = 'ego' # In KITTI, lidar is the ego frame ~~~~~~~~~~
+      c.obj_from_ego.dest_frame = 'obj'
+
+    return cuboids
+
+  @classmethod
+  def _get_bench2raw_mapper(cls):
+    if not hasattr(cls, '_bench2raw_mapper'):
+      class SDBenchmarkToRawMapper(BenchmarkToRawMapper):
+        FIXTURES = cls.FIXTURES
+      cls._bench2raw_mapper = SDBenchmarkToRawMapper()
+    return cls._bench2raw_mapper
+
+
+  ## Datum Construction
+
+  @classmethod
+  def _iter_datums_from_uri(cls, uri):
+    if uri.topic.startswith('camera'):
+      yield cls._create_camera_image(uri)
+    elif uri.topic.startswith('lidar'):
+      yield cls._create_point_cloud(uri)
+    elif uri.topic.startswith('labels'):
+      for sd in cls._iter_labels(uri):
+        yield sd
+    elif uri.topic == 'ego_pose':
+      for sd in cls._iter_ego_poses(uri):
+        yield sd
+    else:
+      raise ValueError(uri)
+  
+  @classmethod
+  def _create_camera_image(cls, uri):
+    from psegs.util import misc
+
+    image_png = cls._get_file_bytes(uri=uri)
+    width, height = misc.get_png_wh(image_png)
+
+    def _get_image(uri):
+      import imageio
+      im_bytes = cls._get_file_bytes(uri=uri)
+      return imageio.imread(bytearray(im_bytes))
+
+    mapper = cls._get_bench2raw_mapper()
+    mapper.fill_timestamp(uri)
+
+    ego_pose = cls._get_ego_pose(uri)
+
+    calib = cls._get_calibration(uri)
+    K = calib.K2
+    ego_to_sensor = calib.velo_to_cam_2_rect
+    if 'right' in uri.topic:
+      K = calib.K3
+      ego_to_sensor = calib.velo_to_cam_3_rect
+
+    extra = mapper.get_extra(uri)
+
+    ci = datum.CameraImage(
+          sensor_name=uri.topic,
+          image_factory=_get_image,
+          # image_png=bytearray(image_png),
+          width=width,
+          height=height,
+          timestamp=uri.timestamp,
+          ego_pose=ego_pose,
+          K=K,
+          ego_to_sensor=ego_to_sensor,
+          extra=extra)
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+
+  @classmethod
+  def _create_point_cloud(cls, uri):
+    lidar_bytes = cls._get_file_bytes(uri=uri)
+    raw_lidar = np.frombuffer(lidar_bytes, dtype=np.float32).reshape((-1, 4))
+    cloud = raw_lidar[:, :3]
+    # unused: reflectance = raw_lidar[:, 3:]
+
+    # timestamp = int(int(uri.extra['kitti.frame']) * 1e8)
+    # # TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+    # uri.timestamp = timestamp
+    mapper = cls._get_bench2raw_mapper()
+    mapper.fill_timestamp(uri)
+
+    # In KITTI, lidar is the ego frame
+    ego_to_sensor = Transform(src_frame='ego', dest_frame='lidar')
+
+    ego_pose = cls._get_ego_pose(uri)
+
+    extra = mapper.get_extra(uri)
+
+    pc = datum.PointCloud(
+          sensor_name=uri.topic,
+          timestamp=uri.timestamp,
+          cloud=cloud,
+          ego_to_sensor=ego_to_sensor,
+          ego_pose=ego_pose,
+          extra=extra)
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+
+  @classmethod
+  def _iter_labels(cls, uri):
+    # KITTI has no labels for test.
+    # FMI see https://github.com/pwais/psegs-kitti-ext
+    if uri.split == 'test':
+      return
+    
+    if 'kitti-object-benchmark' in uri.segment_id:
+      yield cls._get_object_labels(uri)
+    else: # Tracking
+      for sd in cls._iter_tracking_labels(uri):
+        yield sd
+  
+  @classmethod
+  def _get_object_labels(cls, uri):
+    frame = uri.extra['kitti.frame']
+    entryname = uri.split + 'ing/label_2/' + frame + '.txt'
+    label_str = cls._get_file_bytes(
+        archive='data_object_label_2.zip', entryname=entryname)
+    label_str = label_str.decode()
+    cuboids, bboxes = parse_object_label_cuboids(label_str)
+
+    # FIXME bboxes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    cuboids = cls._project_cuboids_to_lidar_frame(uri, cuboids)
+
+    # timestamp = int(int(frame) * 1e8)
+    # # TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+    # uri.timestamp = timestamp
+    mapper = cls._get_bench2raw_mapper()
+    mapper.fill_timestamp(uri)
+
+    for c in cuboids:
+      c.timestamp = uri.timestamp
+      c.ego_pose = cls._get_ego_pose(uri)
+      c.extra = mapper.get_extra(uri)
+    
+    return datum.StampedDatum(uri=uri, cuboids=cuboids)
+  
+  @classmethod
+  def _iter_tracking_labels(cls, uri):
+    import copy
+    
+    split, segnum = uri.segment_id.split('-')[-2:]
+    entryname = split + 'ing/label_02/' + segnum + '.txt'
+    labels_str = cls._get_file_bytes(
+      archive='data_tracking_label_2.zip', entryname=entryname)
+    labels_str = labels_str.decode()
+
+    f_to_cuboids, _ = parse_tracking_label_cuboids(labels_str)
+      # NB: We ignore bboxes for the Tracking Benchmark
+    
+    mapper = cls._get_bench2raw_mapper()
+    for frame, cuboids in f_to_cuboids.items():
+      datum_uri = copy.deepcopy(uri)
+      datum_uri.extra['kitti.frame'] = str(frame).zfill(6)
+
+      cuboids = cls._project_cuboids_to_lidar_frame(uri, cuboids)
+
+      # timestamp = int(int(frame) * 1e8)
+      # # TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+      # datum_uri.timestamp = timestamp
+      mapper.fill_timestamp(datum_uri)
+
+      for c in cuboids:
+        c.timestamp = datum_uri.timestamp
+        c.ego_pose = cls._get_ego_pose(datum_uri)
+        c.extra = mapper.get_extra(datum_uri)
+
+      yield datum.StampedDatum(uri=datum_uri, cuboids=cuboids)
+
+  @classmethod
+  def _iter_ego_poses(cls, uri):
+    import copy
+
+    # Pose information for Object Benchmark not available
+    if 'kitti-object-benchmark' in uri.segment_id:
+      return
+    
+    mapper = cls._get_bench2raw_mapper()
+    frame_to_xform = cls._get_segment_frame_to_pose(uri.segment_id)
+    for frame, xform in frame_to_xform.items():
+      datum_uri = copy.deepcopy(uri)
+      # datum_uri.timestamp = int(int(frame) * 1e8) # FIXME ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      datum_uri.extra['kitti.frame'] = str(frame).zfill(6)
+      mapper.fill_timestamp(datum_uri)
+      yield datum.StampedDatum(uri=datum_uri, transform=xform)
+
+
+###############################################################################
+### IDatasetUtil Impl
+
+class DSUtil(IDatasetUtil):
+
+  FIXTURES = Fixtures
+
+  @classmethod
+  def all_zips(cls):
+    import itertools
+    all_zips = itertools.chain(
+                  cls.FIXTURES.OBJECT_BENCHMARK_FNAMES,
+                  cls.FIXTURES.TRACKING_BENCHMARK_FNAMES)
+    return list(all_zips)
+
+  @classmethod
+  def emplace(cls):
+    cls.FIXTURES.maybe_emplace_psegs_kitti_ext()
+
+    if not cls.FIXTURES.ROOT.exists():
+      zips = '\n        '.join('  * %s' % fname for fname in cls.all_zips())
+      cls.show_md("""
+        Due to KITTI license constraints, you need to manually accept the KITTI
+        license to obtain the download URLs for the
+        [Tracking](http://www.cvlibs.net/datasets/kitti/eval_tracking.php) and
+        [Object Benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php)
+        zip files.  But once you have the URL, it's easy to write a short bash
+        loop with `wget` to fetch them in parallel.
+
+        You'll want to download all the following zip files (do not decompress
+        them) to a single directory on a local disk (spinning disk OK):
+
+        %s
+
+        Once you've downloaded the archives, we'll need the path to where
+        you put them.  Enter that below, or exit this program.
+
+      """ % (zips,))
+      kitti_root = input(
+        "Please enter the directory containing your KITTI zip archives; "
+        "PSegs will create a (read-only) symlink to them: ")
+      kitti_root = Path(kitti_root.strip())
+      assert kitti_root.exists()
+      assert kitti_root.is_dir()
+
+      from oarphpy import util as oputil
+      oputil.mkdir(str(cls.FIXTURES.ROOT.parent))
+
+      cls.show_md("Symlink: \n%s <- %s" % (kitti_root, cls.FIXTURES.ROOT))
+      os.symlink(kitti_root, cls.FIXTURES.ROOT)
+
+      # Make symlink read-only
+      import stat
+      os.chmod(
+        kitti_root,
+        stat.S_IREAD|stat.S_IRGRP|stat.S_IROTH,
+        follow_symlinks=False)
+
+    cls.show_md("Validating KITTI archives ...")
+    zips_needed = set(cls.all_zips())
+    zips_have = set()
+    for entry in cls.FIXTURES.ROOT.iterdir():
+      if entry.name in zips_needed:
+        zips_needed.remove(entry.name)
+        zips_have.add(entry.name)
+    
+    if zips_needed:
+      s_have = \
+        '\n        '.join('  * %s' % fname for fname in zips_have)
+      s_needed = \
+        '\n        '.join('  * %s' % fname for fname in zips_needed)
+      cls.show_md("""
+        Missing some expected archives!
+
+        Found:
+        
+        %s
+
+        Missing:
+
+        %s
+      """ % (s_have, s_needed))
+      return False
+    
+    cls.show_md("... all KITTI archives found!")
+    return True
+
+  @classmethod
+  def test(cls):
+    from oarphpy import util as oputil
+    oputil.run_cmd("cd %s && pytest -s -vvv -k test_kitti" % C.PS_ROOT)
+    return True
+
+  @classmethod
+  def build_table(cls):
+    KITTISDTable.build()
+    return True
diff --git a/psegs/datasets/kitti_360.py b/psegs/datasets/kitti_360.py
new file mode 100644
index 0000000..1ea88cd
--- /dev/null
+++ b/psegs/datasets/kitti_360.py
@@ -0,0 +1,1162 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import os
+
+import attr
+import numpy as np
+
+from oarphpy import util as oputil
+
+from psegs import util
+from psegs import datum
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+from psegs.util import misc
+
+
+
+###############################################################################
+### KITTI-360 Fixtures & Other Constants
+
+class Fixtures(object):
+
+  ROOT = C.EXT_DATA_ROOT / 'kitti-360'
+
+  FRONT_CAMERAS = ('image_00', 'image_01')
+  FISHEYE_CAMERAS = ('image_02', 'image_03')
+
+  TRAIN_SEQUENCES = (
+    '2013_05_28_drive_0000_sync',
+    '2013_05_28_drive_0002_sync',
+    '2013_05_28_drive_0003_sync',
+    '2013_05_28_drive_0004_sync',
+    '2013_05_28_drive_0005_sync',
+    '2013_05_28_drive_0006_sync',
+    '2013_05_28_drive_0007_sync',
+    '2013_05_28_drive_0009_sync',
+    '2013_05_28_drive_0010_sync',
+  )
+
+  TEST_SEQUENCES = tuple() # Data not released yet?
+
+  @classmethod
+  def filepath(cls, rpath):
+    return cls.ROOT / rpath
+
+  @classmethod
+  def frame_id_to_fname(cls, frame_id):
+    return str(frame_id).rjust(10, '0')
+
+
+  @classmethod
+  def camera_image_path(cls, sequence, camera_name, frame_id):
+    if camera_name in ('image_00', 'image_01'):
+      return (
+        cls.ROOT / 'data_2d_raw' / 
+          sequence / camera_name / 'data_rect'/ 
+            (cls.frame_id_to_fname(frame_id) + ".png"))
+    elif camera_name in ('image_02', 'image_03'):
+      return (
+        cls.ROOT / 'data_2d_raw' / 
+          sequence / camera_name / 'data_rgb'/ 
+            (cls.frame_id_to_fname(frame_id) + ".png"))
+    else:
+      raise ValueError("Unsupported camera %s" % camera_name)
+  
+  @classmethod
+  def get_camera_frame_ids(cls, sequence, camera_name):
+    paths = oputil.all_files_recursive(
+      str(cls.ROOT / 'data_2d_raw' / sequence / camera_name),
+      pattern='*.png')
+    frame_ids = [
+      int(os.path.split(path)[-1].split('.')[0])
+      for path in paths
+      if not oputil.is_stupid_mac_file(path)
+    ]
+    return frame_ids
+
+  @classmethod
+  def camera_timestamps_path(cls, sequence, camera_name):
+    return (
+      cls.ROOT / 'data_2d_raw' / sequence / camera_name / 'timestamps.txt')
+
+  @classmethod
+  def velodyne_cloud_path(cls, sequence, frame_id):
+    return (
+      cls.ROOT / 'data_3d_raw' / 
+        sequence / 'velodyne_points' / 'data' / 
+          (cls.frame_id_to_fname(frame_id) + ".bin"))
+
+  @classmethod
+  def velodyne_timestamps_path(cls, sequence):
+    return (
+      cls.ROOT / 'data_3d_raw' / 
+        sequence / 'velodyne_points' / 'timestamps.txt')
+
+  @classmethod
+  def sick_cloud_path(cls, sequence, frame_id):
+    return (
+      cls.ROOT / 'data_3d_raw' / 
+        sequence / 'sick_points' / 'data' / 
+          (cls.frame_id_to_fname(frame_id) + ".bin"))
+
+  @classmethod
+  def sick_timestamps_path(cls, sequence):
+    return (
+      cls.ROOT / 'data_3d_raw' / 
+        sequence / 'sick_points' / 'timestamps.txt')
+
+  @classmethod
+  def get_raw_scan_frame_ids(cls, sequence, sensor):
+    paths = oputil.all_files_recursive(
+      str(cls.ROOT / 'data_3d_raw' / sequence / sensor),
+      pattern='*.bin')
+    frame_ids = [
+      int(os.path.split(path)[-1].split('.')[0])
+      for path in paths
+      if not oputil.is_stupid_mac_file(path)
+    ]
+    return frame_ids
+  
+
+  @classmethod
+  def get_fused_scan_frame_ids(cls, sequence):
+    paths = []
+    paths += oputil.all_files_recursive(
+      str(cls.ROOT / 'data_3d_semantics' / sequence / 'static'),
+      pattern='*.ply')
+    paths += oputil.all_files_recursive(
+      str(cls.ROOT / 'data_3d_semantics' / sequence / 'dynamic'),
+      pattern='*.ply')
+    
+    fnames = [
+      os.path.split(path)[-1].split('.')[0]
+      for path in paths
+      if not oputil.is_stupid_mac_file(path)
+    ]
+
+    # 004631_004927.ply -> (4631, 4927)
+    frame_intervals = [
+      tuple(int(v) for v in fnames.split('.')[0].split('_'))
+      for fname in fnames
+    ]
+    assert all(len(fi) == 2 for fi in frame_intervals)
+
+    frame_ids = sorted(set(
+      range(start, end + 1) for (start, end) in frame_intervals))
+    return frame_ids
+
+  @classmethod
+  def get_fused_scan_frame_id_to_chan_to_path(cls, sequence):
+    def build_frame_id_to_path(channel):
+      paths = oputil.all_files_recursive(
+                str(cls.ROOT / 'data_3d_semantics' / sequence / channel),
+                pattern='*.ply')
+
+      paths = [p for p in paths if not oputil.is_stupid_mac_file(p)]
+      fnames = [
+        os.path.split(path)[-1].split('.')[0]
+        for path in paths
+      ]
+
+      # E.g. 004631_004927.ply -> (4631, 4927)
+      frame_intervals = [
+        tuple(int(v) for v in fname.split('.')[0].split('_'))
+        for fname in fnames
+      ]
+      assert all(len(fi) == 2 for fi in frame_intervals)
+
+      frame_id_to_path = {}
+      for ((start, end), path) in zip(frame_intervals, paths):
+        for frame_id in range(start, end + 1):
+          frame_id_to_path[frame_id] = path
+      return frame_id_to_path
+    
+    from collections import defaultdict
+    frame_id_to_chan_to_path = defaultdict(dict)
+    for frame_id, path in build_frame_id_to_path('static').items():
+      frame_id_to_chan_to_path[frame_id]['static'] = path
+    for frame_id, path in build_frame_id_to_path('dynamic').items():
+      frame_id_to_chan_to_path[frame_id]['dynamic'] = path
+    
+    return frame_id_to_chan_to_path
+
+
+  @classmethod
+  def cuboids_path(cls, sequence, split='train'):
+    return (
+      cls.ROOT / 'data_3d_bboxes' / split / (sequence + ".xml"))
+  
+
+  @classmethod
+  def ego_poses_path(cls, sequence):
+    return (
+      cls.ROOT / 'data_poses' / sequence / 'poses.txt')
+
+  @classmethod
+  def cam0_poses_path(cls, sequence):
+    return (
+      cls.ROOT / 'data_poses' / sequence / 'cam0_to_world.txt')
+
+
+
+###############################################################################
+### KITTI Parsing Utils
+
+def kitti_360_timestamps_to_nanostamps(txt):
+  def line_to_nanostamp(line):
+    # Timestamps are in the format:
+    # YYYY-MM-DD HH:MM::SS.fffffffff (ISO 8601 format)
+    # FMI https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/devkits/accumuLaser/src/commons.cpp#L158
+    # Numpy can parse these directly!
+    t = np.datetime64(line.strip())
+    return t.astype(np.uint64)
+  
+  lines = txt.split('\n')
+  return [line_to_nanostamp(l) for l in lines if l]
+
+def kitti_360_3d_bboxes_get_parsed_node(d):
+  """Parse a node in a data_3d_bboxes XML file"""
+
+  def to_ndarray(d):
+    import numpy as np
+    r = int(d['rows'])
+    c = int(d['cols'])
+    dtype = str(d['dt'])
+    parse = float if dtype == 'f' else int
+    data = [parse(t) for t in d['data'].split() if t]
+    a = np.array(data)
+    return a.reshape((r, c))
+
+  def fill_cuboid(d):
+    # Appears the cuboid bounds are encoded as a scaling transform in the
+    # transform itself; in the raw XML, the vertices are +/- 0.5m for all
+    # objects in the XML
+    # FMI https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/annotation.py#L125
+    R = d['transform'][:3, :3]
+    T = d['transform'][:3, 3]
+    v = d['vertices']
+    d['cuboid'] = np.matmul(R, v.T).T + T
+
+  # ??? Not sure what this is about
+  # FMI https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/annotation.py#L154
+  def to_class(label_value):
+    K360_CLASSMAP = {
+      'driveway': 'parking',
+      'ground': 'terrain',
+      'unknownGround': 'ground', 
+      'railtrack': 'rail track'
+    }
+    if label_value in K360_CLASSMAP:
+      return K360_CLASSMAP[label_value]
+    else:
+      return label_value
+
+  out = {
+    'index':            int(d['index']),
+    'label':            str(d['label']),
+    'k360_class_name':  to_class(str(d['label'])),
+    'semanticId_orig':  int(d['semanticId_orig']),
+    'semanticId':       int(d['semanticId']),
+    'instanceId':       int(d['instanceId']),
+    'category':         str(d['category']),
+    
+    # 'timestamp':        int(d['timestamp']),
+    'is_static':        bool(d['timestamp'] == '-1'),
+    'active_frame_id':  int(d['timestamp']),
+      # `timestamp` is -1 if object is static, and `timestamp` is actually
+      # a frame ID, not a unix time
+    
+    # 'dynamic':          int(d['dynamic']),
+    #   # In the current release, dynamic is always 0 (?)
+    
+    'start_frame':      int(d['start_frame']),
+    'end_frame':        int(d['end_frame']),
+    
+    'transform':        to_ndarray(d['transform']),
+    'vertices':         to_ndarray(d['vertices']),
+    'faces':            to_ndarray(d['faces']),
+  }
+
+  fill_cuboid(out)
+  return out
+
+@attr.s(eq=False)
+class Calibration(object):
+
+  ### Camera Extrinsics (Ego is world / IMU)
+
+  cam_left_rect_to_ego = attr.ib(type=datum.Transform, default=None)
+  cam_right_rect_to_ego = attr.ib(type=datum.Transform, default=None)
+  cam_left_fisheye_to_ego = attr.ib(type=datum.Transform, default=None)
+  cam_right_fisheye_to_ego = attr.ib(type=datum.Transform, default=None)
+
+  ### Camera Intrinsics (Rectified)
+
+  cam0_K = attr.ib(type=np.ndarray, default=np.zeros((3, 3)))
+  cam1_K = attr.ib(type=np.ndarray, default=np.zeros((3, 3)))
+
+  ### Laser/Lidar Extrinsics
+
+  cam_left_rect_to_velo = attr.ib(type=datum.Transform, default=None)
+
+  sick_to_velo = attr.ib(type=datum.Transform, default=None)
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  @classmethod
+  def from_kitti_360_strs(
+        cls,
+        calib_cam_to_pose,
+        calib_cam_to_velo,
+        calib_sick_to_velo,
+        perspective):
+    """Create and return a `Calibration` instance from calibration data
+    included in KITTI-360.  Each argument is a string with the contents
+    of the file with the same name; FMI see 
+    http://www.cvlibs.net/datasets/kitti-360/documentation.php
+    """
+    
+    def str_to_arr(s, shape):
+      from io import StringIO
+      a = np.loadtxt(StringIO(s.strip()))
+      return a.reshape(shape)
+    
+    def str_to_RT(s):
+      return str_to_arr(s, shape=(3, 4))
+
+    calib = cls()
+
+    ## Extrinsics
+
+    calib.sick_to_velo = datum.Transform.from_transformation_matrix(
+              str_to_RT(calib_sick_to_velo),
+              src_frame='laser|sick',
+              dest_frame='lidar')
+
+    # # It appears this one has an incorrect name-- in the KITTI-360 code, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # # the authors always use the inverse.
+    # cam_left_raw_from_velo = datum.Transform.from_transformation_matrix(
+    #   str_to_RT(calib_cam_to_velo))
+    
+    calib.cam_left_rect_to_velo = datum.Transform.from_transformation_matrix(
+              str_to_RT(calib_cam_to_velo),
+              src_frame='camera|left_rect',
+              dest_frame='lidar')
+    # calib.cam_left_rect_to_velo = cam_left_raw_from_velo.get_inverse()~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # calib.cam_left_rect_to_velo.src_frame = 'camera|left_rect'
+    # calib.cam_left_rect_to_velo.dest_frame = 'lidar'
+
+    # Tr cam -> ego
+    lines = [l.strip() for l in calib_cam_to_pose.split('\n')]
+    cam_to_sRT = dict(l.split(':') for l in lines if l)
+    calib.cam_left_rect_to_ego = datum.Transform.from_transformation_matrix(
+              str_to_RT(cam_to_sRT['image_00']),
+              dest_frame='ego',
+              src_frame='camera|left_rect')
+    calib.cam_right_rect_to_ego = datum.Transform.from_transformation_matrix(
+              str_to_RT(cam_to_sRT['image_01']),
+              dest_frame='ego',
+              src_frame='camera|right_rect')
+    calib.cam_left_fisheye_to_ego = datum.Transform.from_transformation_matrix(
+              str_to_RT(cam_to_sRT['image_02']),
+              dest_frame='ego',
+              src_frame='camera|left_fisheye')
+    calib.cam_right_fisheye_to_ego = datum.Transform.from_transformation_matrix(
+              str_to_RT(cam_to_sRT['image_03']),
+              dest_frame='ego',
+              src_frame='camera|left_fisheye')
+
+    ## Intrinsics
+
+    # https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/project.py#L76
+
+    lines = [
+      l.strip() for l in perspective.split('\n') if 'calib_time' not in l
+    ]
+    perspective_kv = dict(l.split(':') for l in lines if l)
+    calib.cam0_K = str_to_arr(perspective_kv['P_rect_00'], (3, 4))[:3, :3]
+    calib.cam1_K = str_to_arr(perspective_kv['P_rect_01'], (3, 4))[:3, :3]
+    
+    return calib
+
+
+###############################################################################
+### StampedDatumTable Impl
+
+class KITTI360SDTable(StampedDatumTableFactory):
+
+  # TODO: allow inclusion of oxts data.  for now we just use KITTI's refined poses
+
+  FIXTURES = Fixtures
+
+  INCLUDE_FISHEYES = False
+  """bool: Should we emit datums for the fisheye / side cameras?
+  At the time of writing, the distortion parameters for the fisheyes are
+  not available, so we can't make much use of the images / labels for
+  these cameras.
+  """
+
+  INCLUDE_FUSED_CLOUDS = False
+  """bool: Should we emit label datums for the kitti-360 fused lidar clouds?
+  Note: these are the fused clouds in the `data_3d_semantics` portion of
+  KITTI-360.
+  """
+
+  DATASET_NAME = 'kitti-360'
+
+  ## Dataset API
+
+  @classmethod
+  def get_uris_for_sequence(cls, sequence):
+    if sequence in cls.FIXTURES.TRAIN_SEQUENCES:
+      split = 'train'
+    elif sequence in cls.FIXTURES.TEST_SEQUENCES:
+      split = 'test'
+    else:
+      raise ValueError("Unknown sequence %s" % sequence)
+    
+    base_uri = datum.URI(
+      dataset=cls.DATASET_NAME,
+      split=split,
+      segment_id=sequence)
+
+    iter_uris = itertools.chain(
+      cls._iter_ego_pose_uris(base_uri),
+      cls._iter_camera_image_uris(base_uri),
+      cls._iter_point_cloud_uris(base_uri),
+      cls._iter_cuboid_uris(base_uri),
+    )
+    return list(iter_uris)
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    if uri.topic.startswith('camera'):
+      return cls._create_camera_image(uri)
+    elif uri.topic.startswith('lidar') or uri.topic.startswith('laser'):
+      return cls._create_point_cloud(uri)
+    elif uri.topic == 'ego_pose':
+      return cls._create_ego_pose(uri)
+    elif uri.topic == 'labels|cuboids':
+      return cls._create_cuboids(uri)
+    else:
+      raise ValueError(uri)
+
+
+  ## Subclass API
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    uris = [
+      datum.URI(
+        dataset=cls.DATASET_NAME,
+        split='train',
+        segment_id=seq)
+      for seq in cls.FIXTURES.TRAIN_SEQUENCES
+    ]
+    return uris
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    
+    ## First build a set of sequences to read ...
+
+    # from psegs.spark import Spark
+    # from oarphpy.spark import cluster_cpu_count
+    
+    util.log.info("Creating datums for KITTI-360 ...")
+
+    seg_uris = cls.get_all_segment_uris()
+    if only_segments:
+      util.log.info(
+        "Filtering to only %s segments" % len(only_segments))
+      seg_uris = [
+        uri for uri in seg_uris
+        if any(
+          suri.soft_matches_segment_of(uri) for suri in only_segments)
+      ]
+    
+    ## ... now construct datum RDDS in chunks.
+    URIS_PER_PARTITION = 256
+      # Requires about 256 Megabytes of memory per chunk
+
+    if cls.INCLUDE_FUSED_CLOUDS:
+      URIS_PER_PARTITION = max(10, URIS_PER_PARTITION // 4)
+        # Fused clouds are much larger
+
+    # from oarphpy import util as oputil
+    datum_rdds = []
+    for seg_uri in seg_uris:
+      uris = cls.get_uris_for_sequence(seg_uri.segment_id)
+
+      # Some datums are more expensive to create than others, so distribute
+      # them evenly in the RDD
+      uris = sorted(uris, key=lambda u: u.timestamp)
+      # uris = uris[5000:10000]
+
+      seg_span_sec = 1e-9 * (uris[-1].timestamp - uris[0].timestamp)
+
+      n_partitions = max(1, int(len(uris) / URIS_PER_PARTITION))
+
+      util.log.info(
+        "... seq %s has %s URIs spanning %2.f sec, creating %s slices ..." % (
+          seg_uri.segment_id, len(uris), seg_span_sec, n_partitions))
+      
+      uri_rdd = spark.sparkContext.parallelize(uris, numSlices=n_partitions)
+
+      # Are we trying to resume? Filter URIs if necessary.
+      if existing_uri_df is not None:
+        util.log.info("... checking existing URIs ...")
+        def to_datum_id(obj):
+          return (
+            obj.dataset,
+            obj.split,
+            obj.segment_id,
+            obj.topic,
+            obj.timestamp)
+        key_uri_rdd = uri_rdd.map(lambda u: (to_datum_id(u), u))
+        existing_keys_nulls = existing_uri_df.rdd.map(to_datum_id).map(
+                                    lambda t: (t, None))
+        uri_rdd = key_uri_rdd.subtractByKey(existing_keys_nulls).map(
+                                        lambda kv: kv[1])
+        uris = uri_rdd.collect()
+        if not uris:
+          util.log.info("... all datums already exist, skipping this chunk ...")
+          continue
+
+      datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+      
+      # from pyspark import StorageLevel
+      # datum_rdd = datum_rdd.persist(StorageLevel.DISK_ONLY) # hacks? ~~~~~~~~~~~~~~~~~~~
+      datum_rdds.append(datum_rdd)
+    
+    util.log.info("... partitioned datums into %s RDDs." % len(datum_rdds))
+    return datum_rdds
+
+
+    #   URIS_PER_TASK
+
+
+
+    # import itertools
+    # iter_tasks = itertools.chain.from_iterable(
+    #   ((seg_uri, p) for p in range(cls.PARTITIONS_PER_SEGMENT))
+    #   for seg_uri in seg_uris)
+    
+    
+    # for task_chunk in oputil.ichunked(iter_tasks, TASKS_PER_RDD):
+    #   util.log.info([(str(u), p) for u, p in task_chunk])
+    #   task_rdd = spark.sparkContext.parallelize(task_chunk)
+    #   def iter_uris_for_task(task):
+    #     seg_uri, partition = task
+    #     uris = cls.get_uris_for_sequence(seg_uri.segment_id)
+    #     for i, uri in enumerate(uris):
+    #       if (i % cls.PARTITIONS_PER_SEGMENT) == partition:
+    #         yield uri
+      
+    #   uri_rdd = task_rdd.flatMap(iter_uris_for_task)
+
+      
+      
+    #   # Some datums are more expensive to materialize than others.  Force
+    #   # a repartition to avoid stragglers.
+    #   uri_rdd = uri_rdd.repartition(TASKS_PER_RDD)
+    #   util.log.info(uri_rdd.count())
+
+    #   datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+    #   datum_rdds.append(datum_rdd)
+    # return datum_rdds
+
+
+
+
+  ## Private API: Utils
+
+  @classmethod
+  def _get_frame_id(cls, uri):
+    return int(uri.extra['kitti-360.frame_id'])
+
+  CAMERA_NAME_TO_TOPIC = {
+    'image_00': 'camera|left_rect',
+    'image_01': 'camera|right_rect',
+    'image_02': 'camera|left_fisheye',
+    'image_03': 'camera|right_fisheye',
+  }
+
+  @classmethod
+  def _get_calib(cls):
+    if not hasattr(cls, '_calib'):
+      def open_and_read(rpath):
+        return open(cls.FIXTURES.filepath(rpath)).read()
+
+      calib_cam_to_pose = open_and_read('calibration/calib_cam_to_pose.txt')
+      calib_cam_to_velo = open_and_read('calibration/calib_cam_to_velo.txt')
+      calib_sick_to_velo = open_and_read('calibration/calib_sick_to_velo.txt')
+      perspective = open_and_read('calibration/perspective.txt')
+
+      cls._calib = Calibration.from_kitti_360_strs(
+                    calib_cam_to_pose,
+                    calib_cam_to_velo,
+                    calib_sick_to_velo,
+                    perspective)
+    return cls._calib
+
+  @classmethod
+  def _get_nanostamp(cls, sequence, channel, frame_id):
+    if not hasattr(cls, '_seq_to_chan_to_ts'):
+      cls._seq_to_chan_to_ts = {}
+    
+    if sequence not in cls._seq_to_chan_to_ts:
+      def read_ts(chan):
+        if chan in cls.CAMERA_NAME_TO_TOPIC.keys():
+          path = cls.FIXTURES.camera_timestamps_path(sequence, chan)
+        elif chan == 'velodyne':
+          path = cls.FIXTURES.velodyne_timestamps_path(sequence)
+        elif chan == 'sick':
+          path = cls.FIXTURES.sick_timestamps_path(sequence)
+        else:
+          raise ValueError(chan)
+        txt = open(path, 'r').read()
+        return kitti_360_timestamps_to_nanostamps(txt)
+      
+      cls._seq_to_chan_to_ts[sequence] = dict(
+        (chan, read_ts(chan))
+        for chan in (
+          ['velodyne', 'sick'] + list(cls.CAMERA_NAME_TO_TOPIC.keys())))
+    
+    return cls._seq_to_chan_to_ts[sequence][channel][frame_id]
+
+  ## Private API: Ego Pose
+
+  @classmethod
+  def _iter_ego_pose_uris(cls, base_uri):
+    poses = np.loadtxt(cls.FIXTURES.ego_poses_path(base_uri.segment_id))
+    frame_ids = poses[:,0]
+    frame_ids = [int(f) for f in frame_ids]
+    for frame_id in frame_ids:
+      # KITTI-360 poses are derived from their own lidar-heavy SLAM;
+      # we'll use lidar timestamps for ego poses for convenience.
+      timestamp = cls._get_nanostamp(
+                      base_uri.segment_id, 'velodyne', frame_id)
+      yield base_uri.replaced(
+              topic='ego_pose',
+              timestamp=timestamp,
+              extra={'kitti-360.frame_id': str(frame_id)})
+
+  @classmethod
+  def _create_ego_pose(cls, uri):
+    transform = cls._get_ego_pose(uri.segment_id, cls._get_frame_id(uri))
+    assert transform, "Programming Error: no pose available for %s" % uri
+    return datum.StampedDatum(uri=uri, transform=transform)
+
+  @classmethod
+  def _get_ego_pose(cls, sequence, frame_id):
+    if not hasattr(cls, '_ego_pose_cache'):
+      cls._ego_pose_cache = {}
+    if not sequence in cls._ego_pose_cache:
+      poses = np.loadtxt(cls.FIXTURES.ego_poses_path(sequence))
+      frame_ids = poses[:,0]
+      frame_ids = [int(f) for f in frame_ids]
+      poses_raw = np.reshape(poses[:, 1:],[-1, 3, 4])
+      frame_to_RT = dict(zip(frame_ids, poses_raw))
+      cls._ego_pose_cache[sequence] = frame_to_RT
+    
+    if frame_id not in cls._ego_pose_cache[sequence]:
+      # Poses are incomplete :(  There are even gaps for several seconds.
+      return None
+    
+    RT_world_to_ego = cls._ego_pose_cache[sequence][frame_id]
+    return datum.Transform.from_transformation_matrix(
+                RT_world_to_ego,
+                src_frame='world',
+                dest_frame='ego')
+
+
+  ## Private API: Camera Images
+
+  @classmethod
+  def _iter_camera_image_uris(cls, base_uri):
+    cameras = list(cls.FIXTURES.FRONT_CAMERAS)
+    if cls.INCLUDE_FISHEYES:
+      cameras += list(cls.FIXTURES.FISHEYE_CAMERAS)
+    for camera in cameras:
+      frame_ids = cls.FIXTURES.get_camera_frame_ids(base_uri.segment_id, camera)
+      for frame_id in frame_ids:
+        yield base_uri.replaced(
+                topic=cls.CAMERA_NAME_TO_TOPIC[camera],
+                timestamp=
+                  cls._get_nanostamp(base_uri.segment_id, camera, frame_id),
+                extra={
+                  'kitti-360.frame_id': str(frame_id),
+                  'kitti-360.camera': camera,
+                })
+
+  @classmethod
+  def _create_camera_image(cls, uri):
+    frame_id = cls._get_frame_id(uri)
+    calib = cls._get_calib()
+
+    # TODO: use the camera0 pose? not sure why separate from IMU / ego pose
+    # path = cls.FIXTURES.cam0_poses_path(uri.segment_id)
+    
+    img_path = cls.FIXTURES.camera_image_path(
+                  uri.segment_id,
+                  uri.extra['kitti-360.camera'],
+                  cls._get_frame_id(uri))
+    # image_png = open(img_path, 'rb').read()
+    
+    def _load_image(path):
+      import imageio
+      return imageio.imread(path)
+    image_factory = lambda: _load_image(img_path)
+    
+    from psegs.util import misc
+    width, height = misc.get_png_wh(open(img_path, 'rb').read(100))
+
+    K = np.eye(3, 3)
+    if uri.topic == 'camera|left_rect':
+      K = calib.cam0_K
+      ego_to_sensor = calib.cam_left_rect_to_ego.get_inverse()
+    elif uri.topic == 'camera|right_rect':
+      K = calib.cam1_K
+      ego_to_sensor = calib.cam_right_rect_to_ego.get_inverse()
+    elif uri.topic == 'camera|left_fisheye':
+      K = calib.cam0_K # NB: no K for fisheyes -- this is just for debugging
+      ego_to_sensor = calib.cam_left_fisheye_to_ego.get_inverse()
+    elif uri.topic == 'camera|right_fisheye':
+      K = calib.cam0_K # NB: no K for fisheyes -- this is just for debugging
+      ego_to_sensor = calib.cam_right_fisheye_to_ego.get_inverse()
+    else:
+      raise ValueError(uri)
+
+    ego_pose = cls._get_ego_pose(uri.segment_id, frame_id)
+    ci = datum.CameraImage(
+          sensor_name=uri.topic,
+          # image_png=bytearray(image_png),
+          image_factory=image_factory,
+          width=width,
+          height=height,
+          timestamp=uri.timestamp,
+          ego_pose=ego_pose or datum.Transform(),
+          K=K,
+          ego_to_sensor=ego_to_sensor,
+          extra={'kitti-360.has-valid-ego-pose': str(bool(ego_pose))})
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+
+
+  ## Private API: Point Clouds
+
+  @classmethod
+  def _get_fused_scan_idx(cls, sequence):
+    if not hasattr(cls, '_fused_scan_idx'):
+      cls._fused_scan_idx = {}
+    if not sequence in cls._fused_scan_idx:
+      frame_id_to_chan_to_path = (
+        cls.FIXTURES.get_fused_scan_frame_id_to_chan_to_path(sequence))
+      cls._fused_scan_idx[sequence] = frame_id_to_chan_to_path
+    return cls._fused_scan_idx[sequence]
+
+  @classmethod
+  def _iter_point_cloud_uris(cls, base_uri):
+    frame_ids = cls.FIXTURES.get_raw_scan_frame_ids(
+                    base_uri.segment_id, 'velodyne_points')
+    for frame_id in frame_ids:
+      yield base_uri.replaced(
+              topic='lidar',
+              timestamp=
+                cls._get_nanostamp(base_uri.segment_id, 'velodyne', frame_id),
+              extra={
+                'kitti-360.frame_id': str(frame_id),
+              })
+
+    frame_ids = cls.FIXTURES.get_raw_scan_frame_ids(
+                    base_uri.segment_id, 'sick_points')
+    timestamps_path = cls.FIXTURES.sick_timestamps_path(base_uri.segment_id)
+    # TODO Use timestamps ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    for frame_id in frame_ids:
+      yield base_uri.replaced(
+              topic='laser|sick',
+              timestamp=
+                cls._get_nanostamp(base_uri.segment_id, 'sick', frame_id),
+              extra={
+                'kitti-360.frame_id': str(frame_id),
+              })
+
+    if cls.INCLUDE_FUSED_CLOUDS:
+      frame_id_to_chan_to_path = cls._get_fused_scan_idx(base_uri.segment_id)
+      for frame_id, chan_to_path in frame_id_to_chan_to_path.items():
+        # Fused clouds are in the world frame, so they are only useful for
+        # frames where we have an ego pose
+        has_pose = (
+          cls._get_ego_pose(base_uri.segment_id, frame_id) is not None)
+        if has_pose:
+          for chan, path in chan_to_path.items():
+            yield base_uri.replaced(
+                topic='lidar|fused_' + chan,
+                timestamp=
+                  cls._get_nanostamp(base_uri.segment_id, 'velodyne', frame_id),
+                extra={
+                  'kitti-360.frame_id': str(frame_id),
+                  'kitti-360.fused_cloud_path': path,
+                    # path for uniquely identifying the fused cloud files,
+                    # which each span multiple frames
+                })
+
+  @classmethod
+  def _create_point_cloud(cls, uri):
+    frame_id = cls._get_frame_id(uri)
+    calib = cls._get_calib()
+
+    velo_to_ego = (
+      calib.cam_left_rect_to_ego @ calib.cam_left_rect_to_velo.get_inverse())
+
+    cloud_colnames = ['x', 'y', 'z']
+
+    if uri.topic == 'lidar':
+      vel_path = cls.FIXTURES.velodyne_cloud_path(
+                      uri.segment_id, cls._get_frame_id(uri))
+
+      def _get_vel_cloud(path):
+        import numpy as np
+        cloud = np.fromfile(path, dtype=np.float32)
+        cloud = np.reshape(cloud, [-1, 4])
+        return cloud
+      
+      cloud_factory = lambda: _get_vel_cloud(vel_path)
+
+      ego_to_sensor = velo_to_ego.get_inverse()
+    
+    elif uri.topic == 'laser|sick':
+      sick_path = cls.FIXTURES.sick_cloud_path(
+                      uri.segment_id, cls._get_frame_id(uri))
+      
+      def _get_sick_cloud(path):
+        import numpy as np
+        cloud = np.fromfile(path, dtype=np.float32)
+        cloud = np.reshape(cloud, [-1, 2])
+        cloud = np.concatenate([
+                    np.zeros_like(cloud[:, 0:1]), # ? no x-dimension?
+                    -cloud[:, 0:1],
+                    cloud[:, 1:2],
+                  ],
+                  axis=1)
+        return cloud
+
+      cloud_factory = lambda: _get_sick_cloud(sick_path)
+      
+      sick_from_ego = calib.sick_to_velo['laser|sick', 'lidar'] @ velo_to_ego
+      ego_to_sensor = sick_from_ego.get_inverse()
+    
+    elif uri.topic in ('lidar|fused_static', 'lidar|fused_dynamic'):
+
+      T_world_to_ego = cls._get_ego_pose(uri.segment_id, frame_id)
+      assert T_world_to_ego is not None, \
+        "Programming error: no pose %s" % uri
+      # T_world_to_velo = (velo_to_ego.get_inverse() @ T_world_to_ego).get_inverse()
+      T_world_to_velo = (T_world_to_ego @ velo_to_ego).get_inverse()
+      
+      chan = 'static' if 'static' in uri.topic else 'dynamic'
+      frame_id_to_chan_to_path = cls._get_fused_scan_idx(uri.segment_id)
+      fused_path = frame_id_to_chan_to_path[frame_id][chan]
+
+      def _get_fused_cloud(T_world_to_velo, fused_path):
+        import numpy as np
+        from plyfile import PlyData
+        with open(fused_path, 'rb') as f:
+          plydata = PlyData.read(f)
+        assert len(plydata.elements) >= 1, plydata
+        data = plydata.elements[0].data
+        cloud_xyz = np.array([data['x'], data['y'], data['z']]).T
+        
+        # import open3d
+        # import numpy as np
+        # pcd = open3d.io.read_point_cloud(str(fused_path))
+        # cloud = np.asarray(pcd.points)
+        # cloud = T_world_to_velo.apply(cloud).T
+
+        cloud_xyz = T_world_to_velo.apply(cloud_xyz).T
+        cloud_rgbsiv = np.array([
+          data['red'],
+          data['green'],
+          data['blue'],
+          data['semantic'],
+          data['instance'],
+          data['visible'],
+        ]).T
+
+        return np.hstack([cloud_xyz, cloud_rgbsiv])
+
+      cloud_factory = lambda: _get_fused_cloud(T_world_to_velo, fused_path)
+      cloud_colnames = [
+          'x', 'y', 'z',
+          'r', 'g', 'b',
+          'semantic_id',
+          'instance_id',
+          'is_visible',
+        ]
+
+      ego_to_sensor = velo_to_ego.get_inverse()
+
+    else:
+      raise ValueError(uri)
+
+    ego_pose = cls._get_ego_pose(uri.segment_id, frame_id)
+
+    pc = datum.PointCloud(
+          sensor_name=uri.topic,
+          timestamp=uri.timestamp,
+          # cloud=cloud,
+          cloud_factory=cloud_factory,
+          cloud_colnames=cloud_colnames,
+          ego_to_sensor=ego_to_sensor,
+          ego_pose=ego_pose or datum.Transform(),
+          extra={'kitti-360.has-valid-ego-pose': str(bool(ego_pose))})
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+
+
+  ## Private API: Cuboids
+  
+  @classmethod
+  def _get_raw_cuboids_for_segment(cls, sequence):
+    if not hasattr(cls, '_seq_to_raw_cuboids_cache'):
+      cls._seq_to_raw_cuboids_cache = {}
+    if not sequence in cls._seq_to_raw_cuboids_cache:
+      import xmltodict
+      path = cls.FIXTURES.cuboids_path(sequence)
+      d = xmltodict.parse(open(path).read())
+        # NB: this parse() takes 4-6 sec and we can't make it any faster
+      objects = d['opencv_storage']
+      obj_name_to_value = dict(
+        (k, kitti_360_3d_bboxes_get_parsed_node(v))
+        for (k, v) in objects.items())
+      objs = [
+        dict(v, obj_name=k)
+        for (k, v) in obj_name_to_value.items()
+      ]
+      cls._seq_to_raw_cuboids_cache[sequence] = objs
+    return cls._seq_to_raw_cuboids_cache[sequence]
+
+  @classmethod
+  def _iter_cuboid_uris(cls, base_uri):
+    raw_cuboids = cls._get_raw_cuboids_for_segment(base_uri.segment_id)
+    frame_ids = sorted(set(
+      itertools.chain.from_iterable(
+        range(c['start_frame'], c['end_frame'] + 1)
+        for c in raw_cuboids)))
+    for frame_id in frame_ids:
+      # KITTI-360 poses are derived from their own lidar-heavy SLAM;
+      # we'll use lidar timestamps for cuboid labels because we believe
+      # they are posed in this SLAM-based world frame.
+      timestamp = cls._get_nanostamp(
+                      base_uri.segment_id, 'velodyne', frame_id)
+      yield base_uri.replaced(
+              topic='labels|cuboids',
+              timestamp=timestamp,
+              extra={
+                'kitti-360.frame_id': str(frame_id),
+              })
+  
+  @classmethod
+  def _create_cuboids(cls, uri):
+    frame_id = cls._get_frame_id(uri)
+    raw_cuboids = cls._get_raw_cuboids_for_segment(uri.segment_id)
+
+    def is_in_current_frame(obj):
+      return (
+        # Every object has a frame range
+        (obj['start_frame'] <= frame_id <= obj['end_frame']) and
+        # Static objects are active for the entire frame range; dynamic
+        #  objects have different annotations for each frame.
+        (obj['is_static'] or (obj['active_frame_id'] == frame_id)))
+    
+    raw_cuboids = [
+      obj for obj in raw_cuboids
+      if is_in_current_frame(obj)
+    ]
+
+    cuboids = []
+    for obj in raw_cuboids:
+      # Kitti-360 cuboids are in the IMU (world) frame:
+      # x = forward, y = right, z = down
+      # And vertices are in the (weird) order:
+      # +x +y +z
+      # +x +y -z
+      # +x -y +z
+      # +x -y -z | psegs convention differs for rear face:
+      # -x +y -z |        -x +y +z
+      # -x +y +z |        -x +y -z
+      # -x -y -z |        -x -y +z
+      # -x -y +z |        -x -y -z
+
+      # We'll permute the faces to match psegs convention.
+      front_world = obj['cuboid'][[0, 1, 2, 3], :]
+      rear_world = obj['cuboid'][[5, 4, 7, 6], :]
+
+      # Now get dimensions
+      w = np.linalg.norm(front_world[0, :] - front_world[2, :])
+      l = np.linalg.norm(front_world[0, :] - rear_world[0, :])
+      h = np.linalg.norm(front_world[0, :] - front_world[1, :])
+
+      # Now get pose (in world frame)
+      T_world = np.mean(obj['cuboid'], axis=0)
+
+      # KITTI-360 Transform confounds R and S; we need to separate them.
+      # See also https://math.stackexchange.com/a/1463487
+      obj_sR = obj['transform'][:3, :3]
+      sx = np.linalg.norm(obj_sR[:, 0])
+      sy = np.linalg.norm(obj_sR[:, 1])
+      sz = np.linalg.norm(obj_sR[:, 2])
+      R_world = obj_sR.copy()
+      R_world[:, 0] *= 1. / sx
+      R_world[:, 1] *= 1. / sy
+      R_world[:, 2] *= 1. / sz
+
+      T_world_to_obj = datum.Transform.from_transformation_matrix(
+                          np.column_stack([R_world, T_world]),
+                          src_frame='world',
+                          dest_frame='obj')
+
+      # Convert pose to ego frame (PSegs standard)
+      T_world_to_ego = cls._get_ego_pose(uri.segment_id, frame_id)
+      if not T_world_to_ego:
+        # Labels are in world frame, so can't put them in ego frame
+        continue
+
+      T_obj_from_ego = (
+        T_world_to_obj.get_inverse() @ T_world_to_ego).get_inverse()
+      T_obj_from_ego.src_frame = 'ego' # fixme ... our names here are broken but matrix is right? ~~~~~~~~~~~~~~~``
+      T_obj_from_ego.dest_frame = 'obj'
+
+      # Instance IDs are only distinct within each class:
+      # https://github.com/autonomousvision/kitti360Scripts/issues/5#issuecomment-722217758
+      # https://github.com/autonomousvision/kitti360Scripts/blob/feb142bd8d99df6cbde77ae46b17e912cb3a633b/kitti360scripts/helpers/annotation.py#L37
+      track_id = 1000 * obj['semanticId'] + obj['instanceId']
+
+      cuboids.append(datum.Cuboid(
+          track_id=track_id,
+          category_name=obj['k360_class_name'],
+          ps_category='TODO', # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+          timestamp=uri.timestamp,
+          length_meters=l,
+          width_meters=w,
+          height_meters=h,
+          obj_from_ego=T_obj_from_ego,
+          ego_pose=T_world_to_ego,
+          extra={
+            'kitt-360.cuboid.index': str(obj['index']),
+            'kitt-360.cuboid.label': str(obj['label']),
+            'kitt-360.cuboid.category': str(obj['category']),
+            'kitt-360.cuboid.start_frame': str(obj['start_frame']),
+            'kitt-360.cuboid.end_frame': str(obj['end_frame']),
+          }))
+    return datum.StampedDatum(uri=uri, cuboids=cuboids)
+
+
+
+###############################################################################
+### IDatasetUtil Impl
+
+class DSUtil(IDatasetUtil):
+
+  FIXTURES = Fixtures
+
+  REQUIRED_DIRS = (
+    'calibration',
+    'data_2d_raw',
+    'data_3d_raw',
+    'data_3d_semantics',
+    'data_3d_bboxes',
+    'data_poses',
+  )
+
+  OPTIONAL_DIRS = (
+    'data_2d_semantics',
+  )
+
+  @classmethod
+  def emplace(cls):
+    DIRS_REQUIRED = set(cls.FIXTURES.filepath(d) for d in cls.REQUIRED_DIRS)
+    has_all_req = all(p.exists() for p in DIRS_REQUIRED)
+    if not has_all_req:
+      req = '\n        '.join('  * %s' % fname for fname in cls.all_zips())
+      opt = '\n        '.join('  * %s' % fname for fname in cls.all_zips())
+      cls.show_md("""
+        Due to KITTI-360 license constraints, you need to manually accept the
+        KITTI-360 license and download the files at
+        [the KITTI-360 website](http://www.cvlibs.net/datasets/kitti-360/download.php).
+        
+        The KITTI-360 team provides download scripts that will help unzip
+        files into place.  The total dataset is about 650GB unzipped
+        (spinning disk OK).
+
+        Required KITTI-360 data dirs:
+
+        %s
+
+        Optional KITTI-360 data dirs:
+
+        %s
+        """ % (req, opt))
+      
+      kitti_root = input(
+        "Please enter the directory containing your KITTI zip archives; "
+        "PSegs will create a (read-only) symlink to them: ")
+      kitti_root = Path(kitti_root.strip())
+      assert kitti_root.exists()
+      assert kitti_root.is_dir()
+
+      oputil.mkdir(str(cls.FIXTURES.ROOT.parent))
+
+      cls.show_md("Symlink: \n%s <- %s" % (kitti_root, cls.FIXTURES.ROOT))
+      os.symlink(kitti_root, cls.FIXTURES.ROOT)
+
+      # Make symlink read-only
+      import stat
+      os.chmod(
+        kitti_root,
+        stat.S_IREAD|stat.S_IRGRP|stat.S_IROTH,
+        follow_symlinks=False)
+
+    cls.show_md("Validating KITTI archives ...")
+    dirs_needed = set(cls.all_zips())
+    dirs_have = set()
+    for entry in cls.FIXTURES.ROOT.iterdir():
+      if entry.name in cls.REQUIRED_DIRS:
+        dirs_needed.remove(entry.name)
+        dirs_have.add(entry.name)
+    
+    if dirs_needed:
+      s_have = \
+        '\n        '.join('  * %s' % fname for fname in dirs_have)
+      s_needed = \
+        '\n        '.join('  * %s' % fname for fname in dirs_needed)
+      cls.show_md("""
+        Missing some expected data dirs!
+
+        Found:
+        
+        %s
+
+        Missing:
+
+        %s
+      """ % (s_have, s_needed))
+      return False
+    
+    cls.show_md("... all KITTI-360 data found!")
+    return True
diff --git a/psegs/datasets/kitti_sf.py b/psegs/datasets/kitti_sf.py
new file mode 100644
index 0000000..0735548
--- /dev/null
+++ b/psegs/datasets/kitti_sf.py
@@ -0,0 +1,777 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from pathlib import Path
+
+import numpy as np
+from oarphpy import util as oputil
+
+from psegs import datum
+from psegs import util
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+"""
+
+first load just stereo, maybe get from scene flow..
+  * (CameraImage left, CameraImage right, Matches uvleft_uv_right)
+
+test trimesh viz
+
+"""
+
+
+###############################################################################
+### KittiSceneFlow Fixtures & Other Constants
+
+class Fixtures(object):
+
+  ROOT = C.EXT_DATA_ROOT / 'kitti_sf_archives'
+
+  ZIPS = (
+    'data_scene_flow.zip',
+    'data_scene_flow_calib.zip',
+  )
+
+  @classmethod
+  def zip_path(cls, zipname):
+    return cls.ROOT / zipname
+
+  @classmethod
+  def maybe_emplace_psegs_kitti_sf_ext(cls):
+    print('todo')
+
+
+  @classmethod
+  def get_all_train_test_frame_ids(cls):
+    import zipfile
+    entries = zipfile.ZipFile(cls.zip_path('data_scene_flow.zip')).namelist()
+    
+    def get_frame_id(path):
+      return Path(path).name.split('.')[0]
+    
+    train_frame_ids = [
+      get_frame_id(p)
+      for p in entries
+      if ('training/image_2' in p and '.png' in p and '_10' in p)
+    ]
+
+    test_frame_ids = [
+      get_frame_id(p)
+      for p in entries
+      if ('testing/image_2' in p and '.png' in p and '_10' in p)
+    ]
+
+    return train_frame_ids, test_frame_ids
+
+
+
+  ### Unit Test Support #######################################################
+
+  TEST_FIXTURES_ROOT = Path('/tmp/psegs_kitti_sf_test_fixtures')
+
+  EXTERNAL_FIXTURES_ROOT = C.EXTERNAL_TEST_FIXTURES_ROOT / 'kitti_sf'
+
+  STEREO_TEST_FRAMES = ('000016_10', '000024_10', '000177_10')
+
+  @classmethod
+  def stereo_fixture_dir(cls):
+    fixture_dir = cls.TEST_FIXTURES_ROOT / 'stereo'
+    if util.missing_or_empty(fixture_dir):
+      util.log.info(
+        "Putting Stereo Benchmark test fixtures in %s" % fixture_dir)
+      oputil.cleandir(fixture_dir)
+      
+      # Disparity
+      util.unarchive_entries(
+        cls.zip_path('data_scene_flow.zip'),
+        ['training/disp_occ_0/%s.png' % f for f in cls.STEREO_TEST_FRAMES],
+        fixture_dir)
+      
+      # RGB
+      util.unarchive_entries(
+        cls.zip_path('data_scene_flow.zip'),
+        ['training/image_2/%s.png' % f for f in cls.STEREO_TEST_FRAMES],
+        fixture_dir)
+      util.unarchive_entries(
+        cls.zip_path('data_scene_flow.zip'),
+        ['training/image_3/%s.png' % f for f in cls.STEREO_TEST_FRAMES],
+        fixture_dir)
+      
+      # Calib
+      util.unarchive_entries(
+        cls.zip_path('data_scene_flow_calib.zip'),
+        [
+          'training/calib_cam_to_cam/%s.txt' % f.replace('_10', '') 
+          for f in cls.STEREO_TEST_FRAMES
+        ],
+        fixture_dir)
+    
+    return fixture_dir
+
+
+###############################################################################
+### KITTI Parsing Utils
+
+def kittisf15_load_flow(path):
+  # Based upon https://github.com/liruoteng/OpticalFlowToolkit/blob/master/lib/flowlib.py#L559
+  import png
+  import numpy as np
+
+  flow_object = png.Reader(filename=path)
+  flow_direct = flow_object.asDirect()
+  flow_data = list(flow_direct[2])
+  w, h = flow_direct[3]['size']
+  flow = np.zeros((h, w, 3), dtype=np.float64)
+  for i in range(len(flow_data)):
+    flow[i, :, 0] = flow_data[i][0::3]
+    flow[i, :, 1] = flow_data[i][1::3]
+    flow[i, :, 2] = flow_data[i][2::3]
+
+  invalid_idx = (flow[:, :, 2] == 0)
+  flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
+  flow[invalid_idx, 0] = 0
+  flow[invalid_idx, 1] = 0
+  return flow[:, :, :2]
+
+
+def kittisf15_load_disp(disp_data):
+  import imageio
+  
+  # From KITTI SF Devkit:
+  # "Disparity maps are saved as uint16 PNG images, which can be opened with
+  # either MATLAB or libpng++. A 0 value indicates an invalid pixel (ie, no
+  # ground truth exists, or the estimation algorithm didn't produce an estimate
+  # for that pixel). Otherwise, the disparity for a pixel can be computed by
+  # converting the uint16 value to float and dividing it by 256.0"
+
+  img = imageio.imread(disp_data)
+  disp = img.astype('float32') / 256.
+  return disp
+
+
+def kittisf15_load_calib(cam_to_cam_str):
+  import numpy as np
+  
+  # TODO: See psegs.datasets.kitti.Calibration -- we want to migrate to that
+  # data structure eventually.
+
+  # Notes from KITTI Raw Devkit: https://github.com/pratikac/kitti/blob/eba7ba0f36917f72055060e9e59f344b72456cb9/readme.raw.txt#L169
+  # calib_cam_to_cam.txt: Camera-to-camera calibration
+  # --------------------------------------------------
+  #   - S_xx: 1x2 size of image xx before rectification
+  #   - K_xx: 3x3 calibration matrix of camera xx before rectification
+  #   - D_xx: 1x5 distortion vector of camera xx before rectification
+  #   - R_xx: 3x3 rotation matrix of camera xx (extrinsic)
+  #   - T_xx: 3x1 translation vector of camera xx (extrinsic)
+  #   - S_rect_xx: 1x2 size of image xx after rectification
+  #   - R_rect_xx: 3x3 rectifying rotation to make image planes co-planar
+  #   - P_rect_xx: 3x4 projection matrix after rectification
+  # Note: When using this dataset you will most likely need to access only
+  # P_rect_xx, as this matrix is valid for the rectified image sequences.
+  #
+  # And: https://github.com/pratikac/kitti/blob/eba7ba0f36917f72055060e9e59f344b72456cb9/readme.raw.txt#L206
+  # example transformations
+  # -----------------------
+  # As the transformations sometimes confuse people, here we give a short
+  # example how points in the velodyne coordinate system can be transformed
+  # into the camera left coordinate system.
+  #
+  # In order to transform a homogeneous point X = [x y z 1]' from the velodyne
+  # coordinate system to a homogeneous point Y = [u v 1]' on image plane of
+  # camera xx, the following transformation has to be applied:
+  # Y = P_rect_xx * R_rect_00 * (R|T)_velo_to_cam * X
+  #
+  # To transform a point X from GPS/IMU coordinates to the image plane:
+  # Y = P_rect_xx * R_rect_00 * (R|T)_velo_to_cam * (R|T)_imu_to_velo * X
+  #
+  # The matrices are:
+  # - P_rect_xx (3x4):         rectfied cam 0 coordinates -> image plane
+  # - R_rect_00 (4x4):         cam 0 coordinates -> rectified cam 0 coord.
+  # - (R|T)_velo_to_cam (4x4): velodyne coordinates -> cam 0 coordinates
+  # - (R|T)_imu_to_velo (4x4): imu coordinates -> velodyne coordinates
+  #
+  # Note that the (4x4) matrices above are padded with zeros and:
+  # R_rect_00(4,4) = (R|T)_velo_to_cam(4,4) = (R|T)_imu_to_velo(4,4) = 1.
+
+  K2_line = None
+  K3_line = None
+  R_02_line = None
+  R_03_line = None
+  T_02_line = None
+  T_03_line = None
+  for l in cam_to_cam_str.split('\n'):
+    if 'P_rect_02' in l:
+      K2_line = l
+    if 'P_rect_03' in l:
+      K3_line = l
+    if 'T_02' in l:
+      T_02_line = l
+    if 'T_03' in l:
+      T_03_line = l
+    if 'R_02' in l:
+      R_02_line = l
+    if 'R_03' in l:
+      R_03_line = l
+  
+  assert K2_line
+  params = K2_line.split('P_rect_02: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  P_2 = np.array(params).reshape([3, 4])
+  K_2 = P_2[:3, :3]
+  
+  assert K3_line
+  params = K3_line.split('P_rect_03: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  P_3 = np.array(params).reshape([3, 4])
+  K_3 = P_3[:3, :3]
+
+  assert R_02_line
+  assert R_03_line
+  params = R_02_line.split('R_02: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  R_02 = np.array(params)
+  params = R_03_line.split('R_03: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  R_03 = np.array(params)
+
+  assert T_02_line
+  assert T_03_line
+  params = T_02_line.split('T_02: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  T_02 = np.array(params)
+  params = T_03_line.split('T_03: ')[-1]
+  params = [float(tok.strip()) for tok in params.split(' ') if tok]
+  T_03 = np.array(params)
+  
+  # Baseline appears to be in meters, resulting images will have depth to
+  # about 78 meters
+
+  baseline = np.linalg.norm(T_02 - T_03)
+  
+  # Seems calibration is in mm, if we use this baseline then resulting
+  # images have depth of ~56,000 (millimeters?)
+  # baseline = np.linalg.norm(P_3[:, 3] - P_2[:, 3])
+  
+  return K_2, K_3, baseline, R_02, T_02, R_03, T_03, P_2, P_3
+
+def kittisf15_to_stereo_matches(disp, baseline, K_2):
+  fx = K_2[0, 0]
+  
+  disp_valid = disp[:, :] > 0
+  depth = fx * baseline / (disp + 1e-5)
+  depth[~disp_valid] = 0
+
+  h, w = disp.shape[:2]
+  px_y_2 = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+  px_x_2 = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+  pyx_2 = np.concatenate([px_y_2[:,:,np.newaxis], px_x_2[:, :, np.newaxis]], axis=-1)
+  pyx_2 = pyx_2.astype(np.float32)
+
+  vud_2 = np.dstack([pyx_2, depth]).reshape([-1, 3])
+  uvd_2 = np.zeros((vud_2.shape[0], 3))
+  uvd_2[:, :3] = vud_2[:, (1, 0, 2)]
+  
+  uv_3 = uvd_2[:, :2].copy()
+  uv_3[:, 0] -= disp.reshape(-1)
+  uv_2_uv_3_depth = np.hstack([uvd_2[:, :2], uv_3, uvd_2[:, (-1,)]])
+  return uv_2_uv_3_depth
+
+
+
+# def kittisf15_load_sflow(flow, K, baseline, disp0_path, disp1_path):
+#   fx = K[0, 0]
+  
+#   disp0 = kittisf15_load_disp(disp0_path)
+#   disp0_valid = disp0[:, :] > 0
+#   d0 = fx * baseline / (disp0 + 1e-5)
+#   d0[~disp0_valid] = 0
+  
+#   disp1 = kittisf15_load_disp(disp1_path)
+#   disp1_valid = disp1[:, :] > 0
+#   d1 = fx * baseline / (disp1 + 1e-5)
+#   d1[~disp1_valid] = 0
+  
+#   h, w = d1.shape[:2]
+#   px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+#   px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+#   pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+#   pyx = pyx.astype(np.float32)
+  
+#   vud1 = np.dstack([pyx, d0]).reshape([-1, 3])
+#   uvdviz_im1 = np.zeros((vud1.shape[0], 4))
+#   uvdviz_im1[:, :3] = vud1[:, (1, 0, 2)]
+#   uvdviz_im1[:, -1] = np.logical_and(
+#               (flow > 0).reshape([-1, 2])[:, 0], # Flow is valid
+#               (d0 > 0).reshape([-1]))      # Depth is valid
+
+#   vu2 = (pyx + flow[:, :, (1, 0)]).reshape([-1, 2])
+#   d2_valid = (d1 > 0).reshape([-1])
+#   invalid = np.where(
+#       (np.rint(vu2[:, 0]) < 0) | (np.rint(vu2[:, 0]) >= h) |
+#       (np.rint(vu2[:, 1]) < 0) | (np.rint(vu2[:, 1]) >= w) |
+#       (flow[:, :, 0] == 0).reshape([-1]) |
+#       (~d2_valid))
+#   j2 = np.rint(vu2[:, 0]).astype(np.int64)
+#   i2 = np.rint(vu2[:, 1]).astype(np.int64)
+#   j2[invalid] = 0
+#   i2[invalid] = 0
+#   d2_col = d1[j2, i2]
+#   vud2 = np.hstack([vu2, d2_col[:, np.newaxis]])
+  
+#   uvdviz_im2 = np.ones((vud1.shape[0], 4))
+#   uvdviz_im2[:, :3] = vud2[:, (1, 0, 2)]
+#   uvdviz_im2[invalid, -1] = 0
+  
+# #   vudviz_im2[:, -1] = (vudviz_im2[:, 0] != -np.Inf)
+# #   vudviz_im1[:, -1] = np.logical_and(vudviz_im1[:, -1], (vudviz_im1[:, 2] > 0))
+  
+#   visible_either = ((uvdviz_im1[:, -1] == 1) | (uvdviz_im2[:, -1] == 1))
+#   uvdviz_im1 = uvdviz_im1[visible_either]
+#   uvdviz_im2 = uvdviz_im2[visible_either]
+# #     xyz1 = uvd_to_xyzrgb(uvd1, fp.K)[:, :3]
+# #     xyz2 = uvd_to_xyzrgb(uvd2, fp.K)[:, :3]   
+  
+#   return uvdviz_im1, uvdviz_im2
+
+# def kittisf15_create_fp(uri):
+#   flow = kittisf15_load_flow(os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.flow_gt']))
+#   K, baseline = kittisf15_load_K_baseline(os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.K']))
+#   uvdviz_im1, uvdviz_im2 = kittisf15_load_sflow(
+#                                   flow, K, baseline,
+#                                   os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.disp0']),
+#                                   os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.disp1']))
+  
+#   return OpticalFlowPair(
+#               uri=uri,
+#               dataset="KITTI Scene Flow 2015",
+#               id1=uri.extra['ksf15.input'],
+#               img1='file://' + os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.input']),
+#               id2=uri.extra['ksf15.expected_out'],
+#               img2='file://' + os.path.join(KITTI_SF15_DATA_ROOT, uri.extra['ksf15.expected_out']),
+#               flow=flow,
+      
+#               K=K,
+#               uvdviz_im1=uvdviz_im1,
+#               uvdviz_im2=uvdviz_im2)
+
+
+###############################################################################
+### StampedDatumTableFactory Impl
+
+class KITTISF15SDTable(StampedDatumTableFactory):
+  
+  FIXTURES = Fixtures
+
+  # The dataset has about 400 total frames; tune here to control memory usage
+  FRAMES_PER_PARTITION = 50
+
+  SPLITS = ('train', 'test')
+
+  # Table will include MatchedPair datums
+  INCLUDE_MPS = True
+
+  # Table will include CameraImage datums, induced from MatchedPair datums
+  INCLUDE_CIS = True
+  
+  # Table will include depth CameraImage datums, induced from MatchedPair datums
+  INCLUDE_DCIS = True
+
+
+  ## Public API
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    # In KITTI SF15, there are no sequences, so we make every scene flow
+    # example a distinct segment
+    train_ids, test_ids = cls.FIXTURES.get_all_train_test_frame_ids()
+
+    segs = []
+
+    if 'train' in cls.SPLITS:
+      segs += [
+        datum.URI(
+              dataset='kitti-sf15',
+              split='train',
+              segment_id=frame_id,
+              extra={
+                'kitti_sf15.frame_id': frame_id,
+              })
+        for frame_id in train_ids
+      ]
+
+    if 'test' in cls.SPLITS:
+      segs += [
+        datum.URI(
+              dataset='kitti-sf15',
+              split='test',
+              segment_id=frame_id,
+              extra={
+                'kitti_sf15.frame_id': frame_id,
+              })
+        for frame_id in test_ids
+      ]
+
+    return segs
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+
+    ## For KITTI SF15, each frame ID becomes a segment / task ...
+    seg_uris_to_build = cls._get_all_segment_uris()
+    util.log.info(f"Discovered {len(seg_uris_to_build)} segments ...")
+    
+    ## ... skip any segments we already have ...
+    if existing_uri_df is not None:
+      def get_frame_id(v):
+        return v.split + v.extra.get('kitti_sf15.frame_id')
+      skip_frame_ids = set(
+        existing_uri_df.rdd.map(get_frame_id).collect())
+      seg_uris_to_build = [
+        suri for suri in seg_uris_to_build
+        if get_frame_id(suri) not in skip_frame_ids
+      ]
+      util.log.info(
+        f"Resume mode: have datums for {len(skip_frame_ids)} frames; "
+        f"reduced to {len(seg_uris_to_build)} tasks")
+    
+    if only_segments:
+      util.log.info(f"Filtering to only {len(only_segments)} segments")
+      seg_uris_to_build = [
+        uri for uri in seg_uris_to_build
+        if any(suri.soft_matches_segment_of(uri) for suri in only_segments)
+      ]
+
+    ## ... now run tasks and create stamped datums.
+    util.log.info(
+      f"... creating datums for {len(seg_uris_to_build)} segments.")
+    datum_rdds = []
+    for chunk in oputil.ichunked(seg_uris_to_build, cls.FRAMES_PER_PARTITION):
+      chunk_uri_rdd = spark.sparkContext.parallelize(chunk)
+      datum_rdd = chunk_uri_rdd.flatMap(cls._create_datums_for_segement_uri)
+      datum_rdds.append(datum_rdd)
+    return datum_rdds
+
+
+  ## Datum Construction Support
+
+  @classmethod
+  def _create_datums_for_segement_uri(cls, seg_uri):
+    datums = []
+    mp = None
+    if cls.INCLUDE_MPS:
+      mp_datum = cls._create_matched_pair(seg_uri)
+      mp = mp_datum.matched_pair
+      datums += [mp]
+    
+    if cls.INCLUDE_CIS:
+      img1_uri = datum.URI.from_str(mp.extra['kitti_sf15.img1_uri'])
+      img1_ci = cls._create_camera_image(img1_uri)
+      img1_sd = datum.StampedDatum(uri=img1_uri, camera_image=img1_ci)
+      datums += [img1_sd]
+      
+      img2_uri = datum.URI.from_str(mp.extra['kitti_sf15.img2_uri'])
+      img2_ci = cls._create_camera_image(img2_uri)
+      img2_sd = datum.StampedDatum(uri=img2_uri, camera_image=img2_ci)
+      datums += [img2_sd]
+    
+    if cls.INCLUDE_DCIS:
+      img1_uri = datum.URI.from_str(mp.extra['kitti_sf15.img1_uri'])
+      img1_ci = cls._create_camera_image(img1_uri, create_depth_image=True)
+      img1_sd = datum.StampedDatum(uri=img1_uri, camera_image=img1_ci)
+      datums += [img1_sd]
+      
+      img2_uri = datum.URI.from_str(mp.extra['kitti_sf15.img2_uri'])
+      img2_ci = cls._create_camera_image(img2_uri, create_depth_image=True)
+      img2_sd = datum.StampedDatum(uri=img2_uri, camera_image=img2_ci)
+      datums += [img2_sd]
+
+    return datums
+
+  @classmethod
+  def _get_file_bytes(cls, uri=None, archive=None, entryname=None):
+    """Read bytes for the file referred to by `uri`"""
+
+    if uri is not None:
+      archive = uri.extra['kitti_sf15.archive']
+      entryname = uri.extra['kitti_sf15.archive.path']
+    assert archive and entryname
+
+    # Cache the Zipfiles for faster loading
+    if not hasattr(cls, '_get_file_bytes_archives'):
+      cls._get_file_bytes_archives = {}
+    if archive not in cls._get_file_bytes_archives:
+      import zipfile
+      path = cls.FIXTURES.zip_path(archive)
+      cls._get_file_bytes_archives[archive] = zipfile.ZipFile(path)
+
+    try:
+      return cls._get_file_bytes_archives[archive].read(entryname)
+    except Exception as e:
+        raise Exception((e, archive, uri))
+
+  @classmethod
+  def _get_calib(cls, uri):
+    frame_id = uri.extra['kitti_sf15.frame_id']
+    ksplit = uri.split + 'ing'
+    calib_key = frame_id.replace("_10", "").replace("_11", "")
+    
+    calib_uri = copy.deepcopy(uri)
+    calib_uri.extra['kitti_sf15.archive.path'] = (
+      f'{ksplit}/calib_cam_to_cam/{calib_key}.txt')
+    calib_uri.extra['kitti_sf15.archive'] = 'data_scene_flow_calib.zip'
+    cam_to_cam_str = cls._get_file_bytes(uri=calib_uri)
+    cam_to_cam_str = cam_to_cam_str.decode('utf-8')
+    calib = kittisf15_load_calib(cam_to_cam_str)
+    return calib
+
+  @classmethod
+  def _create_camera_image(cls, uri, create_depth_image=False):
+    from psegs.util import misc
+
+    image_png = cls._get_file_bytes(uri=uri)
+    width, height = misc.get_png_wh(image_png)
+    extra = copy.deepcopy(uri.extra)
+
+    calib = cls._get_calib(uri)
+    K_2, K_3, baseline, R_02, T_02, R_03, T_03, P_2, P_3 = calib
+
+    if uri.topic == 'camera|left':
+      K = K_2
+
+      # Hack: assume the system in in camera_left frame. Validate that now.
+      K_diff = K - P_2[:3, :3]
+      assert K_diff.sum() == 0, "Did calib change?"
+      rotation = np.eye(3)
+      translation = np.zeros(3)
+      
+      ego_to_sensor = datum.Transform(
+                  rotation=rotation,
+                  translation=translation,
+                  src_frame='ego',
+                  dest_frame='camera|left')
+    elif uri.topic == 'camera|right':
+      K = K_3
+
+      # Hack: assume the system in in camera_left frame. Validate that now.
+      K_diff = K - P_3[:3, :3]
+      assert K_diff.sum() == 0, f"Did calib change? {K_diff}"
+      R_diff = P_3[:, :3] - P_2[:, :3]
+      assert R_diff.sum() == 0, f"Did calib change? {R_diff}"
+      RT = np.linalg.inv(K) @ P_3
+      rotation = RT[:3, :3]
+      translation = RT[:3, 3:]
+      ego_to_sensor = datum.Transform(
+                  rotation=rotation,
+                  translation=translation,
+                  src_frame='ego',
+                  dest_frame='camera|right')
+    else:
+      raise ValueError(uri.topic)
+
+    if create_depth_image:
+      def _get_depth_image(uri=None, h=1, w=1):
+        assert uri is not None
+        uv_2_uv_3_depth = cls._get_uv_2_uv_3_depth(uri)
+        if uri.topic == 'camera|left':
+          uv_depth = uv_2_uv_3_depth[:, (0, 1, -1)]
+        elif uri.topic == 'camera|right':
+          uv_depth = uv_2_uv_3_depth[:, (2, 3, -1)]
+        else:
+          raise ValueError('depth image' + str(uri.topic))
+      
+        print('todo check depth~~~~~~is correct? is euclidean or is z-depth~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
+        depth_image = np.zeros((h, w, 1), dtype=np.float32)
+        uu, vv = uv_depth[:, 0].astype(int), uv_depth[:, 1].astype(int)
+        depth_image[vv, uu, 0] = uv_depth[:, -1]
+        return depth_image
+
+      image_factory = lambda: _get_depth_image(uri=uri, h=height, w=width)
+      channel_names = ['depth']
+      extra['psegs.depth.rgb_uri'] = str(uri)
+      sensor_name = uri.topic + '|depth'
+    else:
+      def _get_image(uri=None):
+        import io
+        import imageio
+        im_bytes = cls._get_file_bytes(uri=uri)
+        return imageio.imread(io.BytesIO(im_bytes))
+      sensor_name = uri.topic
+      image_factory = lambda: _get_image(uri=uri)
+      channel_names = ['r', 'g', 'b']
+
+    # for KITTI SF15, left camera is ego
+    ego_pose = datum.Transform(src_frame='world', dest_frame='ego')
+    ci = datum.CameraImage(
+          sensor_name=sensor_name,
+          image_factory=image_factory,
+          channel_names=channel_names,
+          width=width,
+          height=height,
+          timestamp=uri.timestamp,
+          ego_pose=ego_pose,
+          K=K,
+          ego_to_sensor=ego_to_sensor,
+          extra=extra)
+    return ci
+  
+  @classmethod
+  def _create_matched_pair(cls, uri):
+    frame_id = uri.extra['kitti_sf15.frame_id']
+    ksplit = uri.split + 'ing'
+
+    img1_uri = copy.deepcopy(uri)
+    img1_uri.topic = 'camera|left'
+    img1_uri.extra['kitti_sf15.archive.path'] = (
+        f'{ksplit}/image_2/{frame_id}.png')
+    img1_uri.extra['kitti_sf15.archive'] = 'data_scene_flow.zip'
+    
+    img2_uri = copy.deepcopy(uri)
+    img2_uri.topic = 'camera|right'
+    img2_uri.extra['kitti_sf15.archive.path'] = (
+        f'{ksplit}/image_3/{frame_id}.png')
+    img2_uri.extra['kitti_sf15.archive'] = 'data_scene_flow.zip'
+
+    extra = copy.deepcopy(uri.extra)
+    extra['kitti_sf15.img1_uri'] = str(img1_uri)
+    extra['kitti_sf15.img2_uri'] = str(img2_uri)
+
+    mp = datum.MatchedPair(
+                matcher_name='kitti_gt',
+                timestamp=uri.timestamp,
+                img1=cls._create_camera_image(img1_uri),
+                img2=cls._create_camera_image(img2_uri),
+                matches_factory=lambda: cls.uv_2_uv_3_depth(uri),
+                matches_colnames=['x1', 'y1', 'x2', 'y2', 'depth_meters_left'],
+                extra=extra)
+    
+    sd_uri = copy.deepcopy(uri)
+    sd_uri.topic = 'camera|matches'
+
+    return datum.StampedDatum(uri=sd_uri, matched_pair=mp)
+
+  @classmethod
+  def _get_uv_2_uv_3_depth(cls, uri):
+      if uri.split == 'test':
+        # Test dataset has no provided labels
+        return np.zeros((0, 5), dtype=np.float32)
+
+      frame_id = uri.extra['kitti_sf15.frame_id']
+
+      disp_uri = copy.deepcopy(uri)
+      ksplit = uri.split + 'ing'
+      disp_uri.extra['kitti_sf15.archive.path'] = (
+        f'{ksplit}/disp_occ_0/{frame_id}.png')
+      disp_uri.extra['kitti_sf15.archive'] = 'data_scene_flow.zip'
+      disp_bytes = cls._get_file_bytes(uri=disp_uri)
+      disp = kittisf15_load_disp(disp_bytes)
+      
+      calib = cls._get_calib(uri)
+      K_2, K_3, baseline, R_02, T_02, R_03, T_03, P_2, P_3 = calib
+      
+      uv_2_uv_3_depth = kittisf15_to_stereo_matches(disp, baseline, K_2)
+      uv_2_uv_3_depth = uv_2_uv_3_depth[uv_2_uv_3_depth[:, -1] > 0]
+      return uv_2_uv_3_depth
+
+class KITTISF15SDTableTrainOnly(KITTISF15SDTable):
+  SPLITS = ('train',)
+
+
+
+###############################################################################
+### IDatasetUtil Impl
+
+class DSUtil(IDatasetUtil):
+
+  FIXTURES = Fixtures
+
+  @classmethod
+  def all_zips(cls):
+    return cls.FIXTURES.ZIPS
+
+  @classmethod
+  def emplace(cls):
+    import os
+
+    cls.FIXTURES.maybe_emplace_psegs_kitti_sf_ext()
+
+    if not cls.FIXTURES.ROOT.exists():
+      zips = '\n        '.join('  * %s' % fname for fname in cls.all_zips())
+      cls.show_md("""
+        Due to KITTI license constraints, you need to manually accept the KITTI
+        license to obtain the download URLs for the *Stereo / Scene Flow*[1]
+        zip files.  But once you have the URL, it's easy to write a short bash
+        loop with `wget` to fetch them in parallel.
+
+        You'll want to download all the following zip files (do not decompress
+        them) to a single directory on a local disk (spinning disk OK):
+
+        %s
+
+        Once you've downloaded the archives, we'll need the path to where
+        you put them.  Enter that below, or exit this program.
+
+        [1] https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo
+
+      """ % (zips,))
+      kitti_sf_root = input(
+        "Please enter the directory containing your KITTI Scene Flow 2015 zip "
+        "archives; PSegs will create a (read-only) symlink to them: ")
+      kitti_sf_root = Path(kitti_sf_root.strip())
+      assert kitti_sf_root.exists()
+      assert kitti_sf_root.is_dir()
+
+      from oarphpy import util as oputil
+      oputil.mkdir(str(cls.FIXTURES.ROOT.parent))
+
+      cls.show_md("Symlink: \n%s <- %s" % (kitti_sf_root, cls.FIXTURES.ROOT))
+      os.symlink(kitti_sf_root, cls.FIXTURES.ROOT)
+
+    cls.show_md("Validating KITTI SF 2015 archives ...")
+    zips_needed = set(cls.all_zips())
+    zips_have = set()
+    for entry in cls.FIXTURES.ROOT.iterdir():
+      if entry.name in zips_needed:
+        zips_needed.remove(entry.name)
+        zips_have.add(entry.name)
+    
+    if zips_needed:
+      s_have = \
+        '\n        '.join('  * %s' % fname for fname in zips_have)
+      s_needed = \
+        '\n        '.join('  * %s' % fname for fname in zips_needed)
+      cls.show_md("""
+        Missing some expected archives!
+
+        Found:
+        
+        %s
+
+        Missing:
+
+        %s
+      """ % (s_have, s_needed))
+      return False
+    
+    cls.show_md("... all KITTI SF 2015 archives found!")
+    return True
+
+  @classmethod
+  def test(cls):
+    from oarphpy import util as oputil
+    oputil.run_cmd("cd %s && pytest -s -vvv -k test_kitti_sf" % C.PS_ROOT)
+    return True
diff --git a/psegs/datasets/nuscenes.py b/psegs/datasets/nuscenes.py
new file mode 100644
index 0000000..f6ca369
--- /dev/null
+++ b/psegs/datasets/nuscenes.py
@@ -0,0 +1,1398 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import os
+import pickle
+import shelve
+from pathlib import Path
+
+import numpy as np
+
+from oarphpy import util as oputil
+
+from psegs import util
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+
+
+###############################################################################
+### NuScenes Fixtures & Other Constants
+
+class FixturesBase(object):
+  # Subclasses should configure
+  FLAVOR = ''
+  ROOT = None
+  DATASET_VERSIONS = tuple()
+
+  @classmethod
+  def index_root(cls, nusc_ds_version):
+    """A r/w place to cache any temp / index data"""
+    return C.PS_TEMP / cls.FLAVOR / nusc_ds_version
+
+  @classmethod
+  def version_exists(cls, version):
+    return (cls.ROOT / version).exists()
+
+  ### DSUtil Auto-download ####################################################
+
+  @classmethod
+  def maybe_emplace_psegs_ext(cls):
+    print('todo maybe_emplace_psegs_ext')
+    print('todo maybe_emplace_psegs_ext')
+    print('todo maybe_emplace_psegs_ext')
+    print('todo maybe_emplace_psegs_ext') #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    return 
+
+    if (cls.bench_to_raw_path().exists() and 
+          cls.EXTERNAL_FIXTURES_ROOT.exists()):
+      return
+    
+    from oarphpy import util as oputil
+    util.log.info("Downloading latest PSegs NuScenes Extension data ...")
+    oputil.mkdir(str(cls.index_root()))
+    psegs_kitti_ext_root = cls.index_root() / 'psegs_kitti_ext_tmp'
+    if not psegs_kitti_ext_root.exists():
+      oputil.run_cmd(
+        "git clone https://github.com/pwais/psegs-kitti-ext %s" % \
+          psegs_kitti_ext_root)
+
+    util.log.info("... emplacing PSegs KITTI Extension data ...")
+    def move(src, dest):
+      oputil.mkdir(dest.parent)
+      oputil.run_cmd("mv %s %s" % (src, dest))
+    
+    move(
+      psegs_kitti_ext_root / 'assets' / 'bench_to_raw_df',
+      cls.bench_to_raw_path())
+    move(
+      psegs_kitti_ext_root / 'ps_external_test_fixtures',
+      cls.EXTERNAL_FIXTURES_ROOT)
+    
+    util.log.info("... emplace success!")
+    util.log.info("(You can remove %s if needed)" % psegs_kitti_ext_root)
+
+
+class NuscFixtures(FixturesBase):
+  FLAVOR = 'nuscenes'
+  ROOT = C.EXT_DATA_ROOT / 'nuscenes'
+  DATASET_VERSIONS = ('v1.0-mini', 'v1.0-trainval', 'v1.0-test')
+
+class LyftFixtures(FixturesBase):
+  FLAVOR = 'lyft_level_5_detection'
+  ROOT = C.EXT_DATA_ROOT / 'lyft_level_5_detection'
+  DATASET_VERSIONS = ('train',)
+
+
+# nuscenes is a soft dependency.
+# To enable: pip3 install nuscenes-devkit==1.1.2
+try:
+  from nuscenes.nuscenes import NuScenes
+  BASE = NuScenes
+except ImportError:
+  class NuscNotInstalled(object):
+    def __new__(self, *args, **kwargs):
+      raise ValueError("This code requires nuscenes-devkit==1.1.2")
+  BASE = NuscNotInstalled
+
+class PSegsNuScenes(BASE):
+  """A PSegs wrapper around the NuScenes dataset handle featuring reduced
+  memory usage and utilities for easier PSegs interop.
+
+  ## Reduced Memory Usage & Faster Loading
+  The base NuScenes object uses ~8GB resident RAM (each instance) and takes
+  about 30sec to instantiate due to the "tables" of JSON data that it loads.  
+  Below we replace these "tables" with disk-based `shelve`s in order to
+  dramatically reduce memory usage (to less than 1GB resident) and make
+  object instantiation faster (down to about 3 sec). We designed this change 
+  to support instantiating multiple NuScenes readers per machine (e.g. in
+  Spark or any parallel use case).
+
+  To warm the disk--based caches (requires one-time temporary ~8GB memory
+  usage), use `maybe_warm_caches()` below.  At the time of writing
+  the cache for the `v1.0-trainval` split requires about 4GB of cache and takes
+  about 2-3 minutes on a 2.2GHz Intel Core i7.
+
+  ## Utils
+  See `print_sensor_sample_rates()` in particular for important data about
+  NuScenes / Lyft Level 5 dataset sample rates.
+  """
+
+  FIXTURES = NuscFixtures
+
+  DATASET = 'nuscenes'
+
+  def _get_cache_path(self, table_name):
+    """Return a path to a shelve cache of the given nuscenes `table_name`"""
+    return self.FIXTURES.index_root(self.version) / table_name
+
+  @classmethod
+  def maybe_warm_caches(cls, versions=None):
+    versions = versions or cls.FIXTURES.DATASET_VERSIONS
+    for version in versions:
+      if (cls.FIXTURES.ROOT / version).exists():
+        # To warm the cache, simply instantiate and delete. To bust caches,
+        # user needs to delete paths that will be logged to stdout.
+        nusc = cls(version=version)
+        del nusc
+
+  def __init__(self, **kwargs):
+    """FMI see NuScenes.__init__().  The parent class will read JSON blobs
+    and load 8GB+ data into resident memory.  In this override, we load data
+    using the parent NuScenes implemenation (thus, temporarily, using the same
+    amount of resident memory) but then cache the table data to disk using
+    python shelve`.  We then free the resident memory and use the disk-based
+    for token-based access.
+    """
+
+    self.version = kwargs.get('version', 'v1.0-mini')
+    self.dataroot = kwargs.get('dataroot')
+    self.verbose = kwargs.get('verbose', True)
+      # Base ctor sets these members, but we'll do it here so that
+      # path-resolving superclass methods below work properly
+    
+    # If needed, default to PSegs-configured dataroot
+    if not (self.dataroot and os.path.exists(self.dataroot)):
+      if self.FIXTURES.ROOT.exists():
+        self.dataroot = str(self.FIXTURES.ROOT)
+        kwargs['dataroot'] = self.dataroot
+      else:
+        raise FileNotFoundError(
+          "Could not find provided dataroot %s nor default "
+          "PSegs dataroot %s" % (self.dataroot, self.FIXTURES.ROOT))
+
+    FLOCK_PATH = self._get_cache_path('cache.flock')
+    if not FLOCK_PATH.exists():
+      oputil.mkdir(FLOCK_PATH.parent)
+      with open(FLOCK_PATH, 'a') as f:
+        pass
+    
+    DONE_PATH = self._get_cache_path('cache.done')
+    if not DONE_PATH.exists():
+      import fasteners
+      lock = fasteners.InterProcessLock(FLOCK_PATH)
+      with lock:
+        if not DONE_PATH.exists():
+          util.log.info(
+            "Creating shelve caches; will use ~8-10GB of RAM ...")
+          nusc = NuScenes(**kwargs)
+          # NB: The above ctor call not only loads all JSON but also runs
+          # 'reverse indexing', which **EDITS** the data loaded into memory.
+          # We'll then write the edited data below using `shelve` so that we
+          # don't have to try to make `PSegsNuScenes` support reverse indexing
+          # itself.
+          util.log.info("... NuScenes done loading & indexing JSON data ...")
+          
+          table_names = list(nusc.table_names)
+          if (Path(self.table_root) / 'lidarseg.json').exists():
+            table_names += ['lidarseg']
+          
+          for table_name in nusc.table_names:
+            cache_path = self._get_cache_path(table_name)
+            oputil.mkdir(cache_path.parent)
+
+            util.log.info(
+              "Building shelve cache for %s (in %s) ..." % (
+                table_name, cache_path))
+            
+            d = shelve.open(str(cache_path), protocol=pickle.HIGHEST_PROTOCOL)
+            rows = getattr(nusc, table_name) # E.g. self.sample_data
+            d.update((r['token'], r) for r in rows)
+            d.close()
+          
+          # This cache helps speed up full-scene lookups by 10x
+          util.log.info("Building scene name -> sample_data token cache ...")
+          scene_name_to_sd_token = self._build_scene_name_to_sd_token(nusc)
+          cache_path = self._get_cache_path('scene_name_to_sd_token')
+          d = shelve.open(str(cache_path), protocol=pickle.HIGHEST_PROTOCOL)
+          d.update(scene_name_to_sd_token)
+          d.close()
+
+          util.log.info("... done.")
+          del nusc # Free several GB memory
+
+          with open(DONE_PATH, 'a') as f:
+            pass
+
+    super(PSegsNuScenes, self).__init__(**kwargs)
+
+  def _get_table(self, table_name):
+    attr = '_cached_' + table_name
+    if not hasattr(self, attr):
+      cache_path = self._get_cache_path(table_name)
+      if self.verbose:
+        util.log.info(
+          "Using shelve cache for %s at %s" % (table_name, cache_path))
+      d = shelve.open(str(cache_path))
+      setattr(self, attr, d)
+    return getattr(self, attr)
+
+  def __load_table__(self, table_name):
+    return self._get_table(table_name).values()
+      # NB: Despite the type annotation in the parent class, the base method
+      # actually returns a list of dicts and not a single dict.  This
+      # subclass method returns a Values View (a generator-like thing)
+      # and does not break any core NuScenes functionality.
+  
+  def __make_reverse_index__(self, verbose):
+    # NB: Shelve data files have, built-in, the reverse indicies that the
+    # base `NuScenes` creates.  See above.  This override allows the subclass
+    # to safely invoke the parent class CTor.
+    pass
+  
+  def get(self, table_name, token):
+    assert table_name in self.table_names, \
+      "Table {} not found".format(table_name)
+    return self._get_table(table_name)[token]
+  
+  def getind(self, table_name, token):
+    # This override should be safe due to our override of `get()` above"""
+    raise ValueError("Unsupported / unnecessary; provided by shelve")
+
+  @classmethod
+  def _build_scene_name_to_sd_token(cls, nusc):
+    scene_name_to_sd_token = {}
+
+    sample_to_scene = {}
+    for sample in nusc.sample:
+      scene = nusc.get('scene', sample['scene_token'])
+      sample_to_scene[sample['token']] = scene['token']
+
+    for sd in nusc.sample_data:
+      scene_tok = sample_to_scene[sd['sample_token']]
+      cur_scene = nusc.get('scene', scene_tok)['name']
+      scene_name_to_sd_token.setdefault(cur_scene, [])
+      scene_name_to_sd_token[cur_scene].append(sd['token'])
+    
+    # For NuScenes trainval-1.0, this is about 400MB of memory
+    return scene_name_to_sd_token
+
+  def iter_sample_data_for_scene(self, scene_name):
+    sd_tokens = self._get_table('scene_name_to_sd_token')[scene_name]
+    for sd_token in sd_tokens:
+      yield self.get('sample_data', sd_token)
+
+
+  #### PSegs Adhoc Utils
+
+  def has_lidarseg(self):
+    return hasattr(self, 'lidarseg')
+
+  @classmethod
+  def get_split_for_scene(cls, scene):
+    if not hasattr(cls, '_scene_to_split'):
+
+      ## NuScenes
+      from nuscenes.utils.splits import create_splits_scenes
+      split_to_scenes = create_splits_scenes()
+
+      scene_to_split = {}
+      for split, scenes in split_to_scenes.items():
+        # Ignore mini splits because they duplicate train/val
+        if 'mini' not in split:
+          for s in scenes:
+            scene_to_split[s] = split
+      
+      # ## Lyft
+      # # NB: Lyft Level 5 does not have a proper train/test split of labeled
+      # # data, so we induced an arbitrary one for now.
+      # from au.fixtures.datasets import lyft_level_5_constants as lyft_consts
+      # AU_SCENES = lyft_consts.AU_VAL_SCENES + lyft_consts.AU_TRAIN_SCENES
+      # assert sorted(AU_SCENES) == sorted(lyft_consts.TRAIN_SCENES)
+      # scene_to_split.update(
+      #   dict((s, 'train') for s in  lyft_consts.AU_TRAIN_SCENES))
+      # scene_to_split.update(
+      #   dict((s, 'val') for s in    lyft_consts.AU_VAL_SCENES))
+      # scene_to_split.update(
+      #   dict((s, 'test') for s in   lyft_consts.TEST_SCENES))
+
+      ## Done!
+      cls._scene_to_split = scene_to_split
+
+    return cls._scene_to_split[scene]
+
+  def get_all_sensors(self):
+    return set(itertools.chain.from_iterable(
+      s['data'].keys() for s in self.sample))
+    # NuScenes:
+    # 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', 'CAM_FRONT',
+    #   'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT',
+    # 'LIDAR_TOP',
+    # 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT', 'RADAR_FRONT',
+    #   'RADAR_FRONT_LEFT', 'RADAR_FRONT_RIGHT'
+    # Lyft Level 5:
+    # 'CAM_FRONT_ZOOMED', 'CAM_BACK', 'LIDAR_FRONT_RIGHT', 'CAM_FRONT_LEFT',
+    # 'CAM_BACK_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'LIDAR_TOP',
+    # 'LIDAR_FRONT_LEFT', 'CAM_BACK_RIGHT'
+  
+  def get_all_classes(self):
+    return set(anno['category_name'] for anno in self.sample_annotation)
+    # NuScenes:
+    # 'animal',
+    # 'human.pedestrian.adult',
+    # 'human.pedestrian.child',
+    # 'human.pedestrian.construction_worker',
+    # 'human.pedestrian.personal_mobility',
+    # 'human.pedestrian.police_officer',
+    # 'human.pedestrian.stroller',
+    # 'human.pedestrian.wheelchair',
+    # 'movable_object.barrier',
+    # 'movable_object.debris',
+    # 'movable_object.pushable_pullable',
+    # 'movable_object.trafficcone',
+    # 'static_object.bicycle_rack',
+    # 'vehicle.bicycle',
+    # 'vehicle.bus.bendy',
+    # 'vehicle.bus.rigid',
+    # 'vehicle.car',
+    # 'vehicle.construction',
+    # 'vehicle.emergency.ambulance',
+    # 'vehicle.emergency.police',
+    # 'vehicle.motorcycle',
+    # 'vehicle.trailer',
+    # 'vehicle.truck'
+    # Lyft Level 5:
+    # 'other_vehicle', 'bus', 'truck', 'car', 'bicycle', 'pedestrian', 
+    # 'animal', 'emergency_vehicle', 'motorcycle'
+
+  def get_table_to_length(self):
+    return dict(
+      (table, len(getattr(self, table)))
+      for table in self.table_names)
+
+  def print_sensor_sample_rates(self, scene_names=None):
+    """Print a report to stdout describing the sample rates of all sensors,
+    labels, and localization objects."""
+
+    if not scene_names:
+      scene_names = [s['name'] for s in self.scene]
+    scene_names = set(scene_names)
+
+    scene_tokens = set(
+      s['token'] for s in self.scene if s['name'] in scene_names)
+    for scene_token in scene_tokens:
+      scene_samples = [
+        s for s in self.sample if s['scene_token'] == scene_token
+      ]
+
+      def to_sec(timestamp):
+        # NuScenes and Lyft Level 5 timestamps are in microseconds
+        return timestamp * 1e-6
+
+      # Samples are supposed to be 'best groupings' of lidar and camera data.
+      # Let's measure their sample rate.
+      from collections import defaultdict
+      name_to_tss = defaultdict(list)
+      for sample in scene_samples:
+        name_to_tss['sample (annos)'].append(to_sec(sample['timestamp']))
+      
+      # Now measure individual sensor sample rates.
+      sample_toks = set(s['token'] for s in scene_samples)
+      scene_sample_datas = [
+        sd for sd in self.sample_data if sd['sample_token'] in sample_toks
+      ]
+      for sd in scene_sample_datas:
+        name_to_tss[sd['channel']].append(to_sec(sd['timestamp']))
+        ego_pose = self.get('ego_pose', sd['ego_pose_token'])
+        name_to_tss['ego_pose'].append(to_sec(ego_pose['timestamp']))
+        name_to_tss['sample_data'].append(to_sec(sd['timestamp']))
+      
+      annos = [
+        a for a in self.sample_annotation if a['sample_token'] in sample_toks
+      ]
+      num_tracks = len(set(a['instance_token'] for a in annos))
+
+      import itertools
+      all_ts = sorted(itertools.chain.from_iterable(name_to_tss.values()))
+      from datetime import datetime
+      dt = datetime.utcfromtimestamp(all_ts[0])
+      start = dt.strftime('%Y-%m-%d %H:%M:%S')
+      duration = (all_ts[-1] - all_ts[0])
+
+      # Print a report
+      print('---')
+      print('---')
+
+      scene = self.get('scene', scene_token)
+      print('Scene %s %s' % (scene['name'], scene['token']))
+      print('Start %s \tDuration %s sec' % (start, duration))
+      print('Num Annos %s (Tracks %s)' % (len(annos), num_tracks))
+      import pandas as pd
+      from collections import OrderedDict
+      rows = []
+      for name in sorted(name_to_tss.keys()):
+        def get_series(name):
+          return np.array(sorted(name_to_tss[name]))
+        
+        series = get_series(name)
+        freqs = series[1:] - series[:-1]
+
+        lidar_series = get_series('LIDAR_TOP')
+        diff_lidar_ms = 1e3 * np.mean(
+          [np.abs(lidar_series - t).min() for t in series])
+
+        rows.append(OrderedDict((
+          ('Series',              name),
+          ('Freq Hz',             1. / np.mean(freqs)),
+          ('Diff Lidar (msec)',   diff_lidar_ms),
+          ('Duration',            series[-1] - series[0]),
+          ('Support',             len(series)),
+        )))
+      print(pd.DataFrame(rows))
+
+      print()
+      print()
+
+      # NuScenes:
+      # ---
+      # ---
+      # Scene scene-1000 09f67057dd8346388b28f79d9bb1cf04
+      # Start 2018-11-14 11:01:41       Duration 19.922956943511963 sec
+      # Num Annos 493 (Tracks 27)
+      #                Series     Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0            CAM_BACK   11.738035          10.554540  19.850000      234
+      # 1       CAM_BACK_LEFT   11.536524           0.918133  19.850000      230
+      # 2      CAM_BACK_RIGHT   11.788413          20.070969  19.850000      235
+      # 3           CAM_FRONT   11.637280          14.986247  19.850000      232
+      # 4      CAM_FRONT_LEFT   11.536524           7.586523  19.850000      230
+      # 5     CAM_FRONT_RIGHT   11.536524          22.528135  19.850000      230
+      # 6           LIDAR_TOP   19.747937           0.000000  19.850175      393
+      # 7     RADAR_BACK_LEFT   12.593898          12.635898  19.850883      251
+      # 8    RADAR_BACK_RIGHT   13.211602          12.786931  19.831054      263
+      # 9         RADAR_FRONT   12.976290          12.728528  19.882416      259
+      # 10   RADAR_FRONT_LEFT   13.510020          12.710849  19.911148      270
+      # 11  RADAR_FRONT_RIGHT   13.724216          12.571387  19.891846      274
+      # 12           ego_pose  155.599393          11.128198  19.922957     3101
+      # 13     sample (annos)    2.015096           0.000000  19.850175       41
+      # 14        sample_data  155.599393          11.128198  19.922957     3101
+      # ---
+      # ---
+      # Scene scene-0293 6308d6d934074a028fc3145eedf3e65f
+      # Start 2018-08-31 15:25:42       Duration 19.525898933410645 sec
+      # Num Annos 3548 (Tracks 277)
+      #                Series     Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0            CAM_BACK   11.773779          10.441362  19.450000      230
+      # 1       CAM_BACK_LEFT   11.825193           1.573582  19.450000      231
+      # 2      CAM_BACK_RIGHT   11.928021          19.624993  19.450000      233
+      # 3           CAM_FRONT   11.876607          14.872485  19.450000      232
+      # 4      CAM_FRONT_LEFT   11.876607           7.329719  19.450000      232
+      # 5     CAM_FRONT_RIGHT   11.979434          22.875966  19.450000      234
+      # 6           LIDAR_TOP   19.844216           0.000000  19.451512      387
+      # 7     RADAR_BACK_LEFT   13.157897          12.698666  19.455997      257
+      # 8    RADAR_BACK_RIGHT   13.490288          12.647669  19.421380      263
+      # 9         RADAR_FRONT   13.082394          12.616018  19.491845      256
+      # 10   RADAR_FRONT_LEFT   13.305139          12.709008  19.466163      260
+      # 11  RADAR_FRONT_RIGHT   13.209300          12.723777  19.455989      258
+      # 12           ego_pose  157.329504          11.144872  19.525899     3073
+      # 13     sample (annos)    2.004986           0.000000  19.451512       40
+      # 14        sample_data  157.329504          11.144872  19.525899     3073
+      # ---
+      # ---
+      # Scene scene-1107 89f20737ec344aa48b543a9e005a38ca
+      # Start 2018-11-21 11:59:53       Duration 19.820924997329712 sec
+      # Num Annos 496 (Tracks 47)
+      #                Series     Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0            CAM_BACK   11.696203          11.014590  19.750000      232
+      # 1       CAM_BACK_LEFT   11.848101           1.382666  19.750000      235
+      # 2      CAM_BACK_RIGHT   11.746835          20.483792  19.750000      233
+      # 3           CAM_FRONT   11.848103          14.481831  19.749997      235
+      # 4      CAM_FRONT_LEFT   11.898734           7.063236  19.750000      236
+      # 5     CAM_FRONT_RIGHT   11.898734          22.159666  19.750000      236
+      # 6           LIDAR_TOP   19.797377           0.000000  19.750091      392
+      # 7     RADAR_BACK_LEFT   13.578124          12.646209  19.811279      270
+      # 8    RADAR_BACK_RIGHT   13.271911          12.721395  19.740940      263
+      # 9         RADAR_FRONT   13.198245          12.553021  19.775357      262
+      # 10   RADAR_FRONT_LEFT   13.730931          12.645984  19.736462      272
+      # 11  RADAR_FRONT_RIGHT   13.778595          12.488227  19.740764      273
+      # 12           ego_pose  158.317536          11.102567  19.820925     3139
+      # 13     sample (annos)    2.025307           0.000000  19.750091       41
+      # 14        sample_data  158.317536          11.102567  19.820925     3139
+
+      # Lyft Level 5:
+      # ---
+      # ---
+      # Scene host-a015-lidar0-1235423635198474636-1235423660098038666 755e4564756ad5c92243b7f77039d07ab1cce40662a6a19b67c820647666a3ef
+      # Start 2019-02-28 21:13:55       Duration 24.99979877471924 sec
+      # Num Annos 1637 (Tracks 44)
+      #               Series    Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0           CAM_BACK   5.020080          98.882582  24.900000      126
+      # 1      CAM_BACK_LEFT   5.020080          16.919276  24.900000      126
+      # 2     CAM_BACK_RIGHT   5.020080          82.887411  24.900000      126
+      # 3          CAM_FRONT   5.020080          50.027272  24.900000      126
+      # 4     CAM_FRONT_LEFT   5.020080          33.542296  24.900000      126
+      # 5    CAM_FRONT_RIGHT   5.020080          66.427920  24.900000      126
+      # 6   CAM_FRONT_ZOOMED   5.020080          50.096270  24.900000      126
+      # 7          LIDAR_TOP   5.020156           0.000000  24.899626      126
+      # 8           ego_pose  40.442375           0.000000  24.899626     1008
+      # 9     sample (annos)   5.020156           0.000000  24.899626      126
+      # 10       sample_data  40.280324          49.847878  24.999799     1008
+      # ---
+      # ---
+      # Scene host-a004-lidar0-1233947108297817786-1233947133198765096 114b780b2efd6f73f134fc3a8f9db628e43131dc47f90e9b5dfdb886400d70f2
+      # Start 2019-02-11 19:05:08       Duration 25.000741004943848 sec
+      # Num Annos 4155 (Tracks 137)
+      #               Series    Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0           CAM_BACK   5.020080          98.790201  24.900000      126
+      # 1      CAM_BACK_LEFT   5.020080          17.030725  24.900000      126
+      # 2     CAM_BACK_RIGHT   5.020080          83.033195  24.900000      126
+      # 3          CAM_FRONT   5.020080          50.151564  24.900000      126
+      # 4     CAM_FRONT_LEFT   5.020080          33.667718  24.900000      126
+      # 5    CAM_FRONT_RIGHT   5.020080          66.649443  24.900000      126
+      # 6   CAM_FRONT_ZOOMED   5.020080          50.265691  24.900000      126
+      # 7          LIDAR_TOP   5.019934           0.000000  24.900724      126
+      # 8           ego_pose  40.440592           0.000000  24.900724     1008
+      # 9     sample (annos)   5.019934           0.000000  24.900724      126
+      # 10       sample_data  40.278806          49.948567  25.000741     1008
+      # ---
+      # ---
+      # Scene host-a101-lidar0-1241886983298988182-1241887008198992182 7b4640d63a9c62d07a8551d4b430d0acd88eaba8249c843248feb888f4630070
+      # Start 2019-05-14 16:36:23       Duration 25.002139806747437 sec
+      # Num Annos 4777 (Tracks 173)
+      #                Series    Freq Hz  Diff Lidar (msec)   Duration  Support
+      # 0            CAM_BACK   5.020080          93.825297  24.900000      126
+      # 1       CAM_BACK_LEFT   5.020080          85.205165  24.900000      126
+      # 2      CAM_BACK_RIGHT   5.020080          19.099243  24.900000      126
+      # 3           CAM_FRONT   5.020080          52.394347  24.900000      126
+      # 4      CAM_FRONT_LEFT   5.020080          68.799780  24.900000      126
+      # 5     CAM_FRONT_RIGHT   5.020080          35.769232  24.900000      126
+      # 6    CAM_FRONT_ZOOMED   5.020080          52.394347  24.900000      126
+      # 7    LIDAR_FRONT_LEFT   5.020060           0.000000  24.900101      126
+      # 8   LIDAR_FRONT_RIGHT   5.020060           0.000000  24.900101      126
+      # 9           LIDAR_TOP   5.020060           0.000000  24.900101      126
+      # 10           ego_pose  50.562044           0.000000  24.900101     1260
+      # 11     sample (annos)   5.020060           0.000000  24.900101      126
+      # 12        sample_data  50.355690          40.748741  25.002140     1260
+
+
+
+
+
+
+
+
+
+
+
+from psegs import datum
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+def transform_from_record(rec, src_frame='', dest_frame=''):
+  from pyquaternion import Quaternion
+  return datum.Transform(
+          rotation=Quaternion(rec['rotation']).rotation_matrix,
+          translation=np.array(rec['translation']),
+          src_frame=src_frame,
+          dest_frame=dest_frame)
+
+def get_camera_normal(K, extrinsic):
+    """FMI see au.fixtures.datasets.auargoverse.get_camera_normal()"""
+
+    # Build P
+    # P = |K 0| * | R |T|
+    #             |000 1|
+    K_h = np.zeros((3, 4))
+    K_h[:3, :3] = K
+    P = K_h.dot(extrinsic)
+
+    # Zisserman pg 161 The principal axis vector.
+    # P = [M | p4]; M = |..|
+    #                   |m3|
+    # pv = det(M) * m3
+    pv = np.linalg.det(P[:3,:3]) * P[2,:3].T
+    pv_hat = pv / np.linalg.norm(pv)
+    return pv_hat
+
+def to_nanostamp(timestamp_micros):
+  return int(timestamp_micros) * 1000
+
+class NuscStampedDatumTableFactory(StampedDatumTableFactory):
+
+  API_CLS = PSegsNuScenes
+
+  NUSC_VERSION = 'v1.0-trainval' # E.g. v1.0-mini, v1.0-trainval, v1.0-test
+
+  SENSORS_KEYFRAMES_ONLY = False
+  """bool: Should we only emit datums for Keyframes?
+  NuScenes: If enabled, throttles sensor data to about 2Hz, in tune with
+    samples; if disabled, samples at full res.
+  Lyft Level 5: all sensor data is key frames.
+    FMI see `PSegsNuScenes.print_sensor_sample_rates()` above.
+  """
+  
+  LABELS_KEYFRAMES_ONLY = True
+  """bool: Should we emit label datums for only keyframes?
+  If enabled, samples only raw annotations.  If disabled, will motion-
+  correct cuboids to every sensor reading.
+  """
+
+  PARTITIONS_PER_SEGMENT = 4
+  """int: To cap memory required per Spark partition of datums, partiton
+  entire segments read into this many Spark partitions. Tuned for approx
+  2GB RAM per core."""
+
+  INCLUDE_LIDARSEG = True
+  """bool: For every lidar cloud, include a column of nuscenes-lidarseg
+  labels (where available-- only training set keyframes)"""
+
+  @classmethod
+  def dataset_name(cls):
+    name = cls.API_CLS.DATASET
+    if cls.SENSORS_KEYFRAMES_ONLY:
+      name = name + '-Skfo'
+    if cls.LABELS_KEYFRAMES_ONLY:
+      name = name + '-Lkfo'
+    if cls.INCLUDE_LIDARSEG:
+      name = name + '+lseg'
+    return name
+
+  ## Subclass API
+  
+  @classmethod
+  def _get_all_segment_uris(cls):
+    nusc = cls.get_nusc(use_cached=False)
+    segment_ids = (s['name'] for s in nusc.scene)
+    return [datum.URI(
+              dataset=cls.dataset_name(),
+              split=nusc.get_split_for_scene(segment_id),
+              segment_id=segment_id)
+      for segment_id in segment_ids
+    ]
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+
+    ## First build a set of tasks to do ...
+    # NuScenes is big enough that it's cheap to list all scenes (850 for 
+    # the trainval-1.0 split) but expensive to list all datum URIs (
+    # there are millions).  So we create tasks based upon partitioned
+    # segments.
+
+    from psegs.spark import Spark
+    from oarphpy.spark import cluster_cpu_count
+    
+    TASKS_PER_RDD = cluster_cpu_count(spark)
+
+    seg_uris = cls.get_all_segment_uris()
+    if only_segments:
+      util.log.info(
+        "Filtering to only %s segments" % len(only_segments))
+      seg_uris = [
+        uri for uri in seg_uris
+        if any(
+          suri.soft_matches_segment_of(uri) for suri in only_segments)
+      ]
+
+    ## ... now do those tasks in chunks.
+    iter_tasks = itertools.chain.from_iterable(
+      ((seg_uri, p) for p in range(cls.PARTITIONS_PER_SEGMENT))
+      for seg_uri in seg_uris)
+    
+    datum_rdds = []
+    for task_chunk in oputil.ichunked(iter_tasks, TASKS_PER_RDD):
+      task_rdd = spark.sparkContext.parallelize(task_chunk)
+      def iter_uris_for_task(task):
+        seg_uri, partition = task
+        for i, uri in enumerate(cls.iter_uris_for_segment(seg_uri.segment_id)):
+          if (i % cls.PARTITIONS_PER_SEGMENT) == partition:
+            yield uri
+      
+      uri_rdd = task_rdd.flatMap(iter_uris_for_task)
+
+      # Are we trying to resume? Filter URIs if necessary.
+      if existing_uri_df is not None:
+        util.log.info("... checking existing URIs ...")
+        def to_datum_id(obj):
+          return (
+            obj.dataset,
+            obj.split,
+            obj.segment_id,
+            obj.topic,
+            obj.timestamp)
+        key_uri_rdd = uri_rdd.map(lambda u: (to_datum_id(u), u))
+        existing_keys_nulls = existing_uri_df.rdd.map(to_datum_id).map(
+                                    lambda t: (t, None))
+        uri_rdd = key_uri_rdd.subtractByKey(existing_keys_nulls).map(
+                                        lambda kv: kv[1])
+        uri_rdd = uri_rdd.cache()
+        if uri_rdd.count() == 0:
+          util.log.info("... all datums already exist, skipping this chunk ...")
+          continue
+      
+      # Some datums are more expensive to materialize than others.  Force
+      # a repartition to avoid stragglers.
+      uri_rdd = uri_rdd.repartition(TASKS_PER_RDD)
+
+      datum_rdd = uri_rdd.map(cls.create_stamped_datum)
+      datum_rdds.append(datum_rdd)
+    return datum_rdds
+
+
+  ## Public API
+
+  @classmethod
+  def get_nusc(cls, use_cached=True):
+    # NB: the nusc handle is not serializable so in some cases we need to
+    # use a fresh handle for Spark interop
+    factory = lambda: cls.API_CLS(version=cls.NUSC_VERSION, verbose=False)
+    if use_cached:
+      if not hasattr(cls, '_nusc'):
+        cls._nusc = factory()
+      return cls._nusc
+    else:
+      return factory()
+
+  @classmethod
+  def get_segment_ids(cls):
+    return sorted(uri.segment_id for uri in cls.get_all_segment_uris())
+
+  @classmethod
+  def iter_uris_for_segment(cls, segment_id):
+    nusc = cls.get_nusc()
+
+    scene_split = nusc.get_split_for_scene(segment_id)
+
+    ## Get sensor data and ego pose
+    for sd in nusc.iter_sample_data_for_scene(segment_id):
+      # Use these for creating frames based upon NuScenes / Lyft groupings
+      sample_token = sd['sample_token']
+      is_key_frame = str(sd['is_key_frame'])
+
+      # Note all poses
+      # DANGER: The timestamps of the pose records in Lyft Level 5 might be
+      # broken, but the sensor timestamps look corect.
+      # https://github.com/lyft/nuscenes-devkit/issues/73
+      yield datum.URI(
+              dataset=cls.dataset_name(),
+              split=scene_split,
+              segment_id=segment_id,
+              timestamp=to_nanostamp(sd['timestamp']),
+              topic='ego_pose',
+              extra={
+                'nuscenes-token': 'ego_pose|' + sd['ego_pose_token'],
+                'nuscenes-sample-token': sample_token,
+                'nuscenes-is-keyframe': is_key_frame,
+              })
+
+      # Maybe skip the sensor data (and associated label) if we're only
+      # doing keyframes
+      if cls.SENSORS_KEYFRAMES_ONLY:
+        if sd['sensor_modality'] and not sd['is_key_frame']:
+          continue
+
+      yield datum.URI(
+              dataset=cls.dataset_name(),
+              split=scene_split,
+              segment_id=segment_id,
+              timestamp=to_nanostamp(sd['timestamp']),
+              topic=sd['sensor_modality'] + '|' + sd['channel'],
+              extra={
+                'nuscenes-token': 'sample_data|' + sd['token'],
+                'nuscenes-sample-token': sample_token,
+                'nuscenes-is-keyframe': is_key_frame,
+              })
+
+      # Get labels (non-keyframes; interpolated one per track)
+      # for every sample datum
+      if not cls.LABELS_KEYFRAMES_ONLY:
+        yield datum.URI(
+                dataset=cls.dataset_name(),
+                split=scene_split,
+                segment_id=segment_id,
+                timestamp=to_nanostamp(sd['timestamp']),
+                topic='labels|cuboids',
+                extra={
+                  'nuscenes-token': 'sample_data|' + sd['token'],
+                  'nuscenes-sample-token': sample_token,
+                  'nuscenes-is-keyframe': is_key_frame,
+                  'nuscenes-label-channel': sd['channel'],
+                })
+
+    ## Get labels (keyframes only, but interpolated for each sensor)
+    if cls.LABELS_KEYFRAMES_ONLY:
+      # Get annos for *only* samples, which are keyframes
+      scene_tokens = [
+        s['token'] for s in nusc.scene if s['name'] == segment_id]
+      assert scene_tokens
+      scene_token = scene_tokens[0]
+
+      scene_samples = [
+        s for s in nusc.sample if s['scene_token'] == scene_token
+      ]
+
+      for sample in scene_samples:
+        for channel, sample_data_token in sample['data'].items():
+          sd = nusc.get('sample_data', sample_data_token)
+          sample_token = sd['sample_token']
+          yield datum.URI(
+                  dataset=cls.dataset_name(),
+                  split=scene_split,
+                  segment_id=segment_id,
+                  timestamp=to_nanostamp(sd['timestamp']),
+                  topic='labels|cuboids',
+                  extra={
+                    'nuscenes-token': 'sample_data|' + sd['token'],
+                    'nuscenes-sample-token': sample_token,
+                    'nuscenes-is-keyframe': 'True',
+                    'nuscenes-label-channel': channel,
+                  })
+
+  @classmethod
+  def create_stamped_datum(cls, uri):
+    if uri.topic.startswith('camera'):
+      sample_data = cls.__get_row(uri)
+      return cls.__create_camera_image(uri, sample_data)
+    elif uri.topic.startswith('lidar') or uri.topic.startswith('radar'):
+      sample_data = cls.__get_row(uri)
+      return cls.__create_point_cloud(uri, sample_data)
+    elif uri.topic == 'ego_pose':
+      pose_record = cls.__get_row(uri)
+      return cls.__create_ego_pose(uri, pose_record)
+    elif uri.topic == 'labels|cuboids':
+      sample_data = cls.__get_row(uri)
+      # nusc = cls.get_nusc()
+      # best_sd, diff_ns = nusc.get_nearest_sample_data(
+      #                           uri.segment_id,
+      #                           uri.timestamp)
+      # assert best_sd
+      # assert diff_ns < .01 * 1e9
+      return cls.__create_cuboids_in_ego(uri, sample_data['token'])
+    else:
+      raise ValueError(uri)
+
+
+  ## Support
+
+  @classmethod
+  def __get_row(cls, uri):
+    if 'nuscenes-token' in uri.extra:
+      record = uri.extra['nuscenes-token']
+      table, token = record.split('|')
+      nusc = cls.get_nusc()
+      return nusc.get(table, token)
+    raise ValueError
+    # nusc = cls.get_nusc()
+    # return nusc.get_row(uri.segment_id, uri.timestamp, uri.topic)
+  
+  @classmethod
+  def __create_camera_image(cls, uri, sample_data):
+    nusc = cls.get_nusc()
+
+    camera_token = sample_data['token']
+    cs_record = nusc.get(
+      'calibrated_sensor', sample_data['calibrated_sensor_token'])
+    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
+    pose_record = nusc.get('ego_pose', sample_data['ego_pose_token'])
+
+    data_path, _, cam_intrinsic = nusc.get_sample_data(camera_token)
+      # Ignore box_list, we'll get boxes in ego frame later
+    
+    w, h = sample_data['width'], sample_data['height']
+    # viewport = uri.get_viewport()~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # if not viewport:
+    #   from au.fixtures.datasets import common
+    #   viewport = datum.BBox2D.of_size(w, h)
+
+    import imageio
+    image_factory = lambda: imageio.imread(data_path)
+
+    timestamp = sample_data['timestamp']
+
+    ego_from_cam = transform_from_record(
+                      cs_record,
+                      dest_frame='ego',
+                      src_frame=sample_data['channel'])
+    cam_from_ego = ego_from_cam.get_inverse()
+    RT_h = cam_from_ego.get_transformation_matrix(homogeneous=True)
+    principal_axis_in_ego = get_camera_normal(cam_intrinsic, RT_h)
+
+    ego_pose = transform_from_record(
+                      pose_record,
+                      dest_frame='city',
+                      src_frame='ego')  ## TODO check nusc, lyft ok ~~~~~~~~~~~~~
+    sensor_name = sample_data['channel']
+    ci = datum.CameraImage(
+            sensor_name=sensor_name,
+            # image_jpeg=bytearray(open(data_path, 'rb').read()),
+            image_factory=image_factory,
+            height=h,
+            width=w,
+            # viewport=viewport,~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+            timestamp=to_nanostamp(timestamp),
+            ego_pose=ego_pose,
+            ego_to_sensor=ego_from_cam['ego', sensor_name],
+            K=cam_intrinsic,
+            # principal_axis_in_ego=principal_axis_in_ego,~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    )
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+  
+  @classmethod
+  def __create_point_cloud(cls, uri, sample_data):
+    # Based upon nuscenes.py#map_pointcloud_to_image()
+
+    nusc = cls.get_nusc()
+    sd = sample_data
+    pcl_path = os.path.join(nusc.dataroot, sd['filename'])
+    target_pose_token = sd['ego_pose_token']
+    cs_record = nusc.get(
+                  'calibrated_sensor', sd['calibrated_sensor_token'])
+    poserecord1 = nusc.get('ego_pose', sd['ego_pose_token'])
+    poserecord2 = nusc.get('ego_pose', target_pose_token)
+    add_lidarseg = (cls.INCLUDE_LIDARSEG and 
+                          nusc.has_lidarseg() and
+                          sd['sensor_modality'] == 'lidar' and
+                          sd['is_key_frame'])
+    lidarseg_labels_filename = ''
+    if add_lidarseg:
+      sample_record = nusc.get('sample', sd['sample_token'])
+      pointsensor_token = sample_record['data'][sd['channel']]
+      lidarseg_datum = nusc.get('lidarseg', pointsensor_token)
+      lidarseg_labels_filename = os.path.join(
+                                        nusc.dataroot,
+                                        lidarseg_datum['filename'])
+
+    def _get_cloud(
+          sd,
+          pcl_path,
+          cs_record,
+          poserecord1,
+          poserecord2,
+          lidarseg_labels_filename):
+        
+      from pyquaternion import Quaternion
+      from nuscenes.utils.data_classes import LidarPointCloud
+      from nuscenes.utils.data_classes import RadarPointCloud
+      import numpy as np
+        
+      if 'host-a011_lidar1_1233090652702363606.bin' in pcl_path:
+        raise ValueError('Lyft Level 5: Kaggle download has a broken file')
+      if sd['sensor_modality'] == 'lidar':
+        pc = LidarPointCloud.from_file(pcl_path)
+          # NB: In NuScenes, lidar is +y = forward, +x = right, +z = up
+      else:
+        pc = RadarPointCloud.from_file(pcl_path)
+          # NB: In NuScenes, radar is +x = forward, +y = left, +z = up
+
+      ## Adapted from Nuscenes code.  Not sure why they project through
+      ## the world frame, but we'll do that to be consistent with their API.
+
+      # Step 1: Points live in the point sensor frame.  First transform to
+      # world frame:
+      # 1a transform to ego
+      # First step: transform the point-cloud to the ego vehicle frame for the
+      # timestamp of the sweep.
+      pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
+      pc.translate(np.array(cs_record['translation']))
+
+      # 1b transform to the global frame.
+      pc.rotate(Quaternion(poserecord1['rotation']).rotation_matrix)
+      pc.translate(np.array(poserecord1['translation']))
+
+      # Step 2: Send points into the ego frame at the target timestamp
+      pc.translate(-np.array(poserecord2['translation']))
+      pc.rotate(Quaternion(poserecord2['rotation']).rotation_matrix.T)
+
+      # n_xyz = pc.points[:3, :].T
+      #   # Throw out intensity (lidar) and ... other stuff (radar) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      n_xyz = pc.points.T
+
+      if lidarseg_labels_filename:
+        # Load labels from .bin file.
+        labels = np.fromfile(lidarseg_labels_filename, dtype=np.uint8)
+        labels = np.reshape(labels, [-1, 1])
+        assert n_xyz.shape[0] == labels.shape[0], \
+          "Expected one label per point, have %s vs %s" % (
+            n_xyz.shape, labels.shape)
+        n_xyz = np.hstack([n_xyz, labels])
+          
+      return n_xyz.astype(np.float32)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    # from pyquaternion import Quaternion
+    # from nuscenes.utils.data_classes import LidarPointCloud
+    # from nuscenes.utils.data_classes import RadarPointCloud
+
+
+
+
+
+
+
+    # def _get_cloud(sd):
+    #   nusc = cls.get_nusc()
+
+    #   target_pose_token = sd['ego_pose_token']
+
+    #   pcl_path = os.path.join(nusc.dataroot, sd['filename'])
+    #   if 'host-a011_lidar1_1233090652702363606.bin' in pcl_path:
+    #     util.log.warn('Lyft Level 5: Kaggle download has a broken file') #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #   if sample_data['sensor_modality'] == 'lidar':
+    #     pc = LidarPointCloud.from_file(pcl_path)
+    #       # NB: In NuScenes, lidar is +y = forward, +x = right, +z = up
+    #   else:
+    #     pc = RadarPointCloud.from_file(pcl_path)
+    #       # NB: In NuScenes, radar is +x = forward, +y = left, +z = up
+
+    #   # Step 1: Points live in the point sensor frame.  First transform to
+    #   # world frame:
+    #   # 1a transform to ego
+    #   # First step: transform the point-cloud to the ego vehicle frame for the
+    #   # timestamp of the sweep.
+    #   cs_record = nusc.get(
+    #                 'calibrated_sensor', sd['calibrated_sensor_token'])
+    #   pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
+    #   pc.translate(np.array(cs_record['translation']))
+
+    #   # 1b transform to the global frame.
+    #   poserecord1 = nusc.get('ego_pose', sd['ego_pose_token'])
+    #   pc.rotate(Quaternion(poserecord1['rotation']).rotation_matrix)
+    #   pc.translate(np.array(poserecord1['translation']))
+
+    #   # Step 2: Send points into the ego frame at the target timestamp
+    #   poserecord2 = nusc.get('ego_pose', target_pose_token)
+    #   pc.translate(-np.array(poserecord2['translation']))
+    #   pc.rotate(Quaternion(poserecord2['rotation']).rotation_matrix.T)
+
+    #   # n_xyz = pc.points[:3, :].T
+    #   #   # Throw out intensity (lidar) and ... other stuff (radar) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #   n_xyz = pc.points.T
+
+    #   add_lidarseg = (cls.INCLUDE_LIDARSEG and 
+    #                     nusc.has_lidarseg() and
+    #                     sd['sensor_modality'] == 'lidar' and
+    #                     sd['is_key_frame'])
+    #   if add_lidarseg:
+    #     sample_record = nusc.get('sample', sd['sample_token'])
+    #     pointsensor_token = sample_record['data'][sd['channel']]
+    #     lidarseg_datum = nusc.get('lidarseg', pointsensor_token)
+    #     lidarseg_labels_filename = os.path.join(
+    #                                       nusc.dataroot,
+    #                                       lidarseg_datum['filename'])
+    #     # Load labels from .bin file.
+    #     labels = np.fromfile(lidarseg_labels_filename, dtype=np.uint8)
+    #     labels = np.reshape(labels, [-1, 1])
+    #     assert n_xyz.shape[0] == labels.shape[0], \
+    #       "Expected one label per point, have %s vs %s" % (
+    #         n_xyz.shape, labels.shape)
+    #     n_xyz = np.hstack([n_xyz, labels])
+      
+    #   return n_xyz.astype(np.float32)
+    
+    
+    # cloud = _get_cloud(sample_data)
+    cloud_factory = lambda: _get_cloud(
+                                sd,
+                                pcl_path,
+                                cs_record,
+                                poserecord1,
+                                poserecord2,
+                                lidarseg_labels_filename)
+
+    # if sample_data['sensor_modality'] == 'lidar':
+    #   n_xyz = n_xyz[:, [1, 0, 2]]
+    #   n_xyz[:, 0] *= -1
+    #   # .............. shouldnt these be in ego because of above? ~~~~~~~~~~~``
+    nusc = cls.get_nusc()
+    cs_record = nusc.get(
+                  'calibrated_sensor', sample_data['calibrated_sensor_token'])
+    ego_pose = transform_from_record(
+                      nusc.get('ego_pose', sample_data['ego_pose_token']),
+                      dest_frame='city',
+                      src_frame='ego')
+    # ego_to_sensor = transform_from_record(
+    #                   cs_record,
+    #                   src_frame='ego',
+    #                   dest_frame=sample_data['channel']) # ?????????????????????????
+    
+    # real_ego_to_sensor = transform_from_record(
+    #                   cs_record,
+    #                   src_frame='ego',
+    #                   dest_frame=sample_data['channel'])
+    # print('real_ego_to_sensor', real_ego_to_sensor) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+            # TODO FIXME
+    # TODO save this xform as context ................................................
+    ego_to_sensor = datum.Transform(
+      src_frame='ego', dest_frame=sample_data['channel'])  # why didn't the one above work?
+
+    pc = datum.PointCloud(
+            sensor_name=sample_data['channel'],
+            timestamp=to_nanostamp(sample_data['timestamp']),
+            # cloud=cloud,#n_xyz.astype(np.float32),
+            cloud_factory=cloud_factory,
+            ego_to_sensor=ego_to_sensor,
+            ego_pose=ego_pose,
+    )
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+  
+  @classmethod
+  def __create_cuboids_in_ego(cls, uri, sample_data_token):
+    nusc = cls.get_nusc()
+
+    # NB: This helper always does motion correction (interpolation) unless
+    # `sample_data_token` refers to a keyframe.
+    boxes = nusc.get_boxes(sample_data_token)
+
+    # Boxes are in world frame.  Move all to ego frame.
+    from pyquaternion import Quaternion
+    sd_record = nusc.get('sample_data', sample_data_token)
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+    for box in boxes:
+      # Move box to ego vehicle coord system
+      box.translate(-np.array(pose_record['translation']))
+      box.rotate(Quaternion(pose_record['rotation']).inverse)
+
+    ego_pose = transform_from_record(
+      pose_record, dest_frame='city', src_frame='ego')
+    # from au.fixtures.datasets.av import NUSCENES_CATEGORY_TO_AU_AV_CATEGORY ~~~~~~~~~~~~~~~~~~~
+    cuboids = []
+    for box in boxes:
+      cuboid = datum.Cuboid()
+
+      # Core
+      sample_anno = nusc.get('sample_annotation', box.token)
+      cuboid.track_id = \
+        'nuscenes_instance_token:' + sample_anno['instance_token']
+      cuboid.category_name = box.name
+      cuboid.timestamp = to_nanostamp(sd_record['timestamp'])
+      
+      cuboid.ps_category = 'todo' # ~~~~~~~~~~~~~~~~~~~~~~~`NUSCENES_CATEGORY_TO_AU_AV_CATEGORY[box.name]
+      
+      attribs = [
+        nusc.get('attribute', attrib_token)['name']
+        for attrib_token in sample_anno['attribute_tokens']
+      ]
+
+      # Try to give bikes riders
+      # NB: In Lyft Level 5, they appear to *not* label bikes without riders
+      if 'cycle.with_rider' in attribs:
+        if cuboid.ps_category == 'bike_no_rider':
+          cuboid.ps_category = 'bike_with_rider'
+        elif cuboid.ps_category == 'motorcycle_no_rider':
+          cuboid.ps_category = 'motorcycle_with_rider'
+        else:
+          # raise ValueError(
+            # "Don't know how to give a rider to %s %s" % (cuboid, attribs))
+          # print("""TODO "Don't know how to give a rider """)#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+          pass
+
+      cuboid.extra = {
+        'nuscenes_token': box.token,
+        'nuscenes_attribs': '|'.join(attribs),
+      }
+
+      # Points
+      # box3d = box.corners().T
+
+      # cuboid.motion_corrected = (not sd_record['is_key_frame'])~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+      # cuboid.distance_meters = np.min(np.linalg.norm(cuboid.box3d, axis=-1))
+      
+      # Pose
+      cuboid.width_meters = float(box.wlh[0])
+      cuboid.length_meters = float(box.wlh[1])
+      cuboid.height_meters = float(box.wlh[2])
+
+      cuboid.obj_from_ego = datum.Transform(
+          rotation=box.orientation.rotation_matrix,
+          translation=box.center,
+          src_frame='ego',
+          dest_frame='obj') # TODO: this naming is backwards???~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      cuboid.ego_pose = ego_pose
+      cuboids.append(cuboid)
+
+    return datum.StampedDatum(uri=uri, cuboids=cuboids)
+
+  @classmethod
+  def __create_ego_pose(cls, uri, pose_record):
+    nusc = cls.get_nusc()
+    pose_record = nusc.get('ego_pose', pose_record['token'])
+    ego_pose = transform_from_record(
+                      pose_record,
+                      dest_frame='city',
+                      src_frame='ego')
+    return datum.StampedDatum(uri=uri, transform=ego_pose)
+      # Note: we use the timestamp in `uri` versus the one in the pose record.
+      # DANGER: The timestamps of the pose records in Lyft Level 5 might be
+      # broken, but the sensor timestamps look corect.
+      # https://github.com/lyft/nuscenes-devkit/issues/73
+
+class NuscStampedDatumTableLabelsAllFrames(NuscStampedDatumTableFactory):
+  LABELS_KEYFRAMES_ONLY = False
+
+
+
+
+
+
+
+
+
+
+
+###############################################################################
+### IDatasetUtil Impl
+
+class NuscDSUtil(IDatasetUtil):
+  """DSUtil for Nuscenes (only)"""
+
+  FIXTURES = NuscFixtures
+
+  REQUIRED_SUBDIRS = ('maps', 'samples', 'sweeps')
+
+  WARM_CACHE_FOR_VERSIONS = ('v1.0-mini', 'v1.0-trainval', 'v1.0-test')
+
+  @classmethod
+  def emplace(cls):
+    cls.FIXTURES.maybe_emplace_psegs_ext()
+    if not cls.FIXTURES.ROOT.exists():
+      
+      req_subdirs = '\n        '.join(
+        '  * %s' % fname for fname in cls.REQUIRED_SUBDIRS)
+      cls.show_md("""
+        Due to NuScenes license constraints, you need to manually accept the 
+        NuScenes and download the `nuScenes Dataset` at 
+        [nuscenes.org](https://www.nuscenes.org/).
+
+        Furthermore, you need to untar / expand the downloaded files due
+        to the way that the NuScenes python devkit uses the files.  See the
+        `tar -xf` instructions here:
+         * [For NuScenes (core)](https://render.githubusercontent.com/view/ipynb?commit=d8403d35a49f9a5f2b8707129c8af1eff6a8906c&enc_url=68747470733a2f2f7261772e67697468756275736572636f6e74656e742e636f6d2f6e75746f6e6f6d792f6e757363656e65732d6465766b69742f643834303364333561343966396135663262383730373132396338616631656666366138393036632f707974686f6e2d73646b2f7475746f7269616c732f6e757363656e65735f7475746f7269616c2e6970796e62&nwo=nutonomy%%2Fnuscenes-devkit&path=python-sdk%%2Ftutorials%%2Fnuscenes_tutorial.ipynb&repository_id=147720534&repository_type=Repository#Google-Colab-(optional))
+         * [For NuScenes-lidarseg (optional)](https://render.githubusercontent.com/view/ipynb?commit=d8403d35a49f9a5f2b8707129c8af1eff6a8906c&enc_url=68747470733a2f2f7261772e67697468756275736572636f6e74656e742e636f6d2f6e75746f6e6f6d792f6e757363656e65732d6465766b69742f643834303364333561343966396135663262383730373132396338616631656666366138393036632f707974686f6e2d73646b2f7475746f7269616c732f6e757363656e65735f6c696461727365675f7475746f7269616c2e6970796e62&nwo=nutonomy%%2Fnuscenes-devkit&path=python-sdk%%2Ftutorials%%2Fnuscenes_lidarseg_tutorial.ipynb&repository_id=147720534&repository_type=Repository#Google-Colab-(optional))
+
+        Your decompressed dataset directory (the NuSecenes `dataroot`) must
+        have at least these subdirectories:
+
+        %s
+
+        Once you've downloaded and unpacked the NuScenes data, we'll need the
+        path to that data.  Enter that below, or exit this program.
+
+      """ % req_subdirs)
+      nusc_root = input(
+        "Please enter your NuScenes dataroot path; "
+        "PSegs will create a (read-only) symlink to it: ")
+      nusc_root = Path(nusc_root.strip())
+      assert nusc_root.exists()
+      assert nusc_root.is_dir()
+
+      oputil.mkdir(str(cls.FIXTURES.ROOT.parent))
+
+      cls.show_md("Symlink: \n%s <- %s" % (nusc_root, cls.FIXTURES.ROOT))
+      os.symlink(nusc_root, cls.FIXTURES.ROOT)
+
+    cls.show_md("Validating NuScenes data ...")
+    subdirs_needed = set(cls.REQUIRED_SUBDIRS)
+    subdirs_have = set()
+    for entry in cls.FIXTURES.ROOT.iterdir():
+      if entry.name in subdirs_needed:
+        subdirs_needed.remove(entry.name)
+        subdirs_have.add(entry.name)
+    
+    if subdirs_needed:
+      s_have = \
+        '\n        '.join('  * %s' % fname for fname in subdirs_have)
+      s_needed = \
+        '\n        '.join('  * %s' % fname for fname in subdirs_needed)
+      cls.show_md("""
+        Missing some expected subdirs!
+
+        Found:
+        
+        %s
+
+        Missing:
+
+        %s
+      """ % (s_have, s_needed))
+      return False
+    
+    cls.show_md("... core NuScenes data found!")
+
+    cls.show_md("Warming NuScenes caches ...")
+    class NuScenesWithMyFixtures(PSegsNuScenes):
+      FIXTURES = cls.FIXTURES
+    NuScenesWithMyFixtures.maybe_warm_caches()
+    cls.show_md("... done warming caches.")
+    return True
+
+  @classmethod
+  def test(cls):
+    from oarphpy import util as oputil
+    oputil.run_cmd("cd %s && pytest -s -vvv -k test_nuscenes" % C.PS_ROOT)
+    return True
+
+  @classmethod
+  def build_table(cls):
+    return True
+    assert False, "TODO"
+
+
+class LyftDSUtil(IDatasetUtil):
+  """DSUtil for Lyft (only)"""
+
+  FIXTURES = LyftFixtures
+
+  REQUIRED_SUBDIRS = ('maps', 'images', 'lidar')
+
+
+# 2020-10-29 07:43:22,447 oarph 965634 : Progress for                             
+# save_df_thunks [Pid:965634 Id:140090221242784]
+# -----------------------  -------------------------------------------------------------------------------
+# Thruput
+# N thru                   3 (of 496)
+# N chunks                 3
+# Total time               8 minutes and 51.56 seconds
+# Total thru               2.34 GB
+# Rate                     4.4 MB / sec
+# Hz                       0.0056438171817696624
+# Progress
+# Percent Complete         0.6048387096774194
+# Est. Time To Completion  1 day, 15 minutes and 52.23 seconds
+# Latency (per chunk)
+# Avg                      2 minutes, 57 seconds, 185 milliseconds, 44 microseconds and 765.47 nanoseconds
+# p50                      3 minutes, 1 second, 11 milliseconds, 476 microseconds and 755.14 nanoseconds
+# p95                      3 minutes, 7 seconds, 486 milliseconds, 417 microseconds and 293.55 nanoseconds
+# p99                      3 minutes, 8 seconds, 61 milliseconds, 967 microseconds and 563.63 nanoseconds
+# -----------------------  -------------------------------------------------------------------------------
+
+# with faster loops now
+# save_df_thunks [Pid:1588334 Id:140225493356208]
+# -----------------------  -------------------------------------------------------------------------------
+# Thruput
+# N thru                   3 (of 496)
+# N chunks                 3
+# Total time               3 minutes and 35.83 seconds
+# Total thru               2.34 GB
+# Rate                     10.82 MB / sec
+# Hz                       0.013900148032916951
+# Progress
+# Percent Complete         0.6048387096774194
+# Est. Time To Completion  9 hours, 51 minutes and 7.25 seconds
+# Latency (per chunk)
+# Avg                      1 minute, 11 seconds, 941 milliseconds, 679 microseconds and 875.06 nanoseconds
+# p50                      1 minute, 15 seconds, 234 milliseconds, 795 microseconds and 331.95 nanoseconds
+# p95                      1 minute, 16 seconds, 7 milliseconds, 751 microseconds and 536.37 nanoseconds
+# p99                      1 minute, 16 seconds, 76 milliseconds, 458 microseconds and 754.54 nanoseconds
+# -----------------------  -------------------------------------------------------------------------------
\ No newline at end of file
diff --git a/psegs/datasets/tanks_and_temples.py b/psegs/datasets/tanks_and_temples.py
new file mode 100644
index 0000000..fd334bc
--- /dev/null
+++ b/psegs/datasets/tanks_and_temples.py
@@ -0,0 +1,291 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from pathlib import Path
+
+from psegs import datum
+from psegs import util
+from psegs.conf import C
+from psegs.datasets.idsutil import IDatasetUtil
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+###############################################################################
+### TanksAndTemples Fixtures & Other Constants
+
+class Fixtures(object):
+
+  ROOT = C.EXT_DATA_ROOT / 'tanks_and_temples_archives'
+
+  EXTERNAL_FIXTURES_ROOT = C.EXTERNAL_TEST_FIXTURES_ROOT / 'tanks_and_temples'
+
+  TRAINING_SCENES = (
+    'Barn',
+    'Caterpillar',
+    'Church',
+    'Courthouse',
+    'Ignatius',
+    'Meetingroom',
+    'Truck',
+  )
+
+  TRAINING_DATA_MASTER_ZIP = 'trainingdata.zip'
+
+  @classmethod
+  def zip_path(cls, scene):
+    return cls.ROOT / f'{scene}.zip'
+
+  ### DSUtil Auto-download ####################################################
+
+  @classmethod
+  def maybe_emplace_psegs_kitti_ext(cls):
+    print('todo')
+    return
+
+###############################################################################
+### StampedDatum Table
+
+class TanksAndTemplesSDTable(StampedDatumTableFactory):
+  
+  FIXTURES = Fixtures
+
+  ## Subclass API
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    train_segs = [
+      datum.URI(
+            dataset='tanks-and-temples',
+            split='train',
+            segment_id=scene,
+            extra={
+              'tnt.scene': scene,
+            })
+      for scene in cls.FIXTURES.TRAINING_SCENES
+    ]
+    return sorted(train_segs)
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    assert False, "TODO"
+
+    # ## First build indices (saves several minutes per worker per chunk) ...
+    # class SDBenchmarkToRawMapper(BenchmarkToRawMapper):
+    #   FIXTURES = cls.FIXTURES
+    # SDBenchmarkToRawMapper.setup(spark=spark)
+
+    # ## ... now build a set of tasks to do ...
+    # archive_paths = cls._get_all_archive_paths()
+    # task_rdd = _rdd_of_all_archive_datafiles(spark, archive_paths)
+    # task_rdd = task_rdd.cache()
+    # util.log.info("Discovered %s tasks ..." % task_rdd.count())
+    
+    # ## ... convert to URIs and filter those tasks if necessary ...
+    # if existing_uri_df is not None:
+    #   # Since we keep track of the original archives and file names, we can
+    #   # just filter on those.  We'll collect them in this process b/c the
+    #   # maximal set of URIs is smaller than RAM.
+    #   def to_task(row):
+    #     return (row.extra.get('kitti.archive'),
+    #             row.extra.get('kitti.archive.file'))
+    #   skip_tasks = set(
+    #     existing_uri_df.select('extra').rdd.map(to_task).collect())
+      
+    #   task_rdd = task_rdd.filter(lambda t: t not in skip_tasks)
+    #   util.log.info(
+    #     "Resume mode: have datums for %s datums; dropped %s tasks" % (
+    #       existing_uri_df.count(), len(skip_tasks)))
+    
+    # uri_rdd = task_rdd.map(lambda task: kitti_archive_file_to_uri(*task))
+    # if only_segments:
+    #   util.log.info(
+    #     "Filtering to only %s segments" % len(only_segments))
+    #   uri_rdd = uri_rdd.filter(
+    #     lambda uri: any(
+    #       suri.soft_matches_segment(uri) for suri in only_segments))
+
+    # ## ... run tasks and create stamped datums.
+    # # from oarphpy.spark import cluster_cpu_count
+    # URIS_PER_CHUNK = os.cpu_count() * 64 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make class member so can configure to RAM
+    # uris = uri_rdd.collect()
+    # util.log.info("... creating datums for %s URIs." % len(uris))
+
+    # datum_rdds = []
+    # for chunk in oputil.ichunked(uris, URIS_PER_CHUNK):
+    #   chunk_uri_rdd = spark.sparkContext.parallelize(chunk)
+    #   datum_rdd = chunk_uri_rdd.flatMap(cls._iter_datums_from_uri)
+    #   datum_rdds.append(datum_rdd)
+    #   # if len(datum_rdds) >= 10:
+    #   #   break # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    # return datum_rdds
+  
+  @classmethod
+  def _get_all_archive_paths(cls):
+    archives = []
+    if cls.INCLUDE_OBJECT_BENCHMARK:
+      archives += list(cls.FIXTURES.OBJECT_BENCHMARK_FNAMES)
+      if not cls.INCLUDE_OBJ_PREV_FRAMES:
+        archives = [arch for arch in archives if 'prev' not in arch]
+    if cls.INCLUDE_TRACKING_BENCHMARK:
+      archives += list(cls.FIXTURES.TRACKING_BENCHMARK_FNAMES)
+    archives = [arch for arch in archives if 'calib' not in arch]
+    paths = [cls.FIXTURES.zip_path(arch) for arch in archives]
+    return paths
+
+
+  ## Datum Construction Support
+
+  @classmethod
+  def _get_uris_for_segment_uri(cls, seg_uri):
+    import zipfile
+
+    archive_path = cls.FIXTURES.zip_path(seg_uri.extra['tnt.scene'])
+    names = zipfile.ZipFile(archive_path).namelist()
+    base_uri = seg_uri
+    uris = [
+      datum.URI
+    ]
+    for name in sorted(names):
+      assert False, "TODO"
+
+
+  @classmethod
+  def _get_file_bytes(cls, uri=None, archive=None, entryname=None):
+    """Read bytes for the file referred to by `uri`"""
+
+    if uri is not None:
+      archive = uri.extra['kitti.archive']
+      entryname = uri.extra['kitti.archive.file']
+    assert archive and entryname
+
+    # Cache the Zipfiles for faster loading
+    if not hasattr(cls, '_get_file_bytes_archives'):
+      cls._get_file_bytes_archives = {}
+    if archive not in cls._get_file_bytes_archives:
+      import zipfile
+      path = cls.FIXTURES.zip_path(archive)
+      cls._get_file_bytes_archives[archive] = zipfile.ZipFile(path)
+      
+    
+    try:
+      return cls._get_file_bytes_archives[archive].read(entryname)
+    except Exception as e:
+        raise Exception((e, archive, uri))
+
+###############################################################################
+### IDatasetUtil Impl
+
+class DSUtil(IDatasetUtil):
+
+  FIXTURES = Fixtures
+
+  @classmethod
+  def all_training_zips(cls):
+    return [cls.FIXTURES.TRAINING_DATA_MASTER_ZIP] + [
+      f'{scene}.zip' for scene in cls.FIXTURES.TRAINING_SCENES
+    ]
+
+  @classmethod
+  def emplace(cls):
+    import os
+    from pathlib import Path
+
+    cls.FIXTURES.maybe_emplace_psegs_kitti_ext()
+
+    if not cls.FIXTURES.ROOT.exists():
+      cls.show_md("""
+        The Tanks And Temples data files are offered via Google Drive links
+        at [https://tanksandtemples.org/download/](https://tanksandtemples.org/download/).
+        You must be signed-in with your own Google account in order to download
+        these files.  
+        
+        The authors supply a download script, however this might only work
+        for you if you have a non-headless and authenticated terminal session:
+          https://github.com/isl-org/TanksAndTemples/blob/3c2c2125e9b16f32790c96a8953611de785d91d6/python_toolbox/download_t2_dataset.py#L1
+        
+        We recommend you download the data manually.  Please go to the download
+        page and download at least the following:
+          * `trainingdata.zip`-- The link for this file is embedded in a
+             text comment on the page.  Direct link:
+               https://drive.google.com/file/d/1jAr3IDvhVmmYeDWi0D_JfgiHcl70rzVE
+          * For each Training Data scene, download the "image set" archive;
+             here is a direct link for the Barn sequence:
+               https://drive.google.com/file/d/0B-ePgl6HF260NzQySklGdXZyQzA/
+        
+        You'll want to download all the following zip files (do not decompress
+        them) to a single directory on a local disk (spinning disk OK).
+        Once you've downloaded the archives, we'll need the path to where
+        you put them.  Enter that below, or exit this program.
+
+      """)
+      tnt_root = input(
+        "Please enter the directory containing your TanksAndTemples zip archives; "
+        "PSegs will create a (read-only) symlink to them: ")
+      tnt_root = Path(tnt_root.strip())
+      assert tnt_root.exists()
+      assert tnt_root.is_dir()
+
+      cls.FIXTURES.ROOT.parent.mkdir(parents=True, exist_ok=True)
+
+      cls.show_md("Symlink: \n%s <- %s" % (tnt_root, cls.FIXTURES.ROOT))
+      os.symlink(tnt_root, cls.FIXTURES.ROOT)
+
+      # Make symlink read-only
+      import stat
+      os.chmod(
+        tnt_root,
+        stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH,
+        follow_symlinks=False)
+
+    cls.show_md("Validating TanksAndTemples archives ...")
+    zips_needed = set(cls.all_training_zips())
+    zips_have = set()
+    for entry in cls.FIXTURES.ROOT.iterdir():
+      if entry.name in zips_needed:
+        zips_needed.remove(entry.name)
+        zips_have.add(entry.name)
+    
+    if zips_needed:
+      s_have = \
+        '\n        '.join('  * %s' % fname for fname in zips_have)
+      s_needed = \
+        '\n        '.join('  * %s' % fname for fname in zips_needed)
+      cls.show_md("""
+        Missing some expected archives!
+
+        Found:
+        
+        %s
+
+        Missing:
+
+        %s
+      """ % (s_have, s_needed))
+      return False
+    
+    cls.show_md("... all Tanks and Temples archives found!")
+    return True
+
+  @classmethod
+  def test(cls):
+    from oarphpy import util as oputil
+    oputil.run_cmd("cd %s && pytest -s -vvv -k test_tanks_and_temples" % C.PS_ROOT)
+    return True
+
+  # @classmethod
+  # def build_table(cls):
+  #   TanksAndTemplesSDTable.build()
+  #   return True
diff --git a/psegs/datum/__init__.py b/psegs/datum/__init__.py
new file mode 100644
index 0000000..2dff8f2
--- /dev/null
+++ b/psegs/datum/__init__.py
@@ -0,0 +1,31 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datum.bbox2d import BBox2D
+from psegs.datum.cuboid import Cuboid
+from psegs.datum.camera_image import CameraImage
+from psegs.datum.matched_pair import MatchedPair
+from psegs.datum.pobj import PObj
+from psegs.datum.point_cloud import PointCloud
+from psegs.datum.points2d import Points2D
+from psegs.datum.stamped_datum import Sample
+from psegs.datum.stamped_datum import StampedDatum
+from psegs.datum.stamped_datum import URI_PROTO
+from psegs.datum.stamped_datum import TRANSFORM_PROTO
+from psegs.datum.stamped_datum import POINTCLOUD_PROTO
+from psegs.datum.stamped_datum import CAMERAIMAGE_PROTO
+from psegs.datum.stamped_datum import MATCHED_PAIR_PROTO
+from psegs.datum.stamped_datum import STAMPED_DATUM_PROTO
+from psegs.datum.transform import Transform
+from psegs.datum.uri import URI
diff --git a/psegs/datum/bbox2d.py b/psegs/datum/bbox2d.py
new file mode 100644
index 0000000..e5cbe04
--- /dev/null
+++ b/psegs/datum/bbox2d.py
@@ -0,0 +1,251 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import attr
+import numpy as np
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class BBox2D(object):
+  """An object in an image; in particular, an (ideally amodal) bounding box
+  surrounding the object.  May include additional context.
+  
+  Note:
+    Why this representation instead of ... ?  This class is:
+     * Agnostic to origin (i.e. lower left or upper left)
+     * Supports boxes of area Zero (width and height of zero)
+     * Inclusivity / exclusivity of bounds are unambiguous
+     * The image size is optionally associated, making it easy to convert to
+       'relative' coordinates (i.e. `{x, y} in [0, 1]`) for the box encoders
+       in detectors like FasterRCNN, SSD, Retinanet, etc.
+     * We provide interop with `((x1, y1), (x2, y2))` corner encoding.
+  """
+  
+  # NB: We explicitly disable `validator`s so that the user may temporarily
+  # use floats
+
+  x = attr.ib(type=int, default=0, validator=None)
+  """int: Base x coordinate in pixels."""
+  
+  y = attr.ib(type=int, default=0, validator=None)
+  """int: Base y coordinate in pixels."""
+  
+  width = attr.ib(type=int, default=0, validator=None)
+  """int: Width of box in pixels."""
+
+  height = attr.ib(type=int, default=0, validator=None)
+  """int: Height of box in pixels."""
+
+  im_width = attr.ib(type=int, default=0, validator=None)
+  """int, optional: Width of enclosing image"""
+
+  im_height = attr.ib(type=int, default=0, validator=None)
+  """int, optional: Height of enclosing image"""
+  
+  category_name = attr.ib(type=str, default="")
+  """str, optional: Class associated with this bounding box."""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  def update(self, **kwargs):
+    """Update attributes of this `BBox2D` as specified in `kwargs`"""
+    for k in self.__slots__:
+      if k in kwargs:
+        setattr(self, k, kwargs[k])
+
+  @staticmethod
+  def of_size(width, height):
+    """Create a `BBox2D` of `width` by `height`"""
+    return BBox(
+            x=0, y=0,
+            width=width, height=height,
+            im_width=width, im_height=height)
+
+  @staticmethod
+  def from_x1_y1_x2_y2(x1, y1, x2, y2):
+    """Create a `BBox2D` from corners `(x1, y1)` and `(x2, y2)` (inclusive)"""
+    b = BBox2D()
+    b.set_x1_y1_x2_y2(x1, y1, x2, y2)
+    return b
+
+  def set_x1_y1_x2_y2(self, x1, y1, x2, y2):
+    """Update this `BBox2D` to have corners `(x1, y1)` and `(x2, y2)`
+    (inclusive)"""
+    self.update(x=x1, y=y1, width=x2 - x1 + 1, height=y2 - y1 + 1)
+
+  def get_x1_y1_x2_y2(self):
+    """Get the corners `(x1, y1)` and `(x2, y2)` (inclusive) of this `BBox2D`"""
+    return self.x, self.y, self.x + self.width - 1, self.y + self.height - 1
+
+  def get_r1_c1_r2_r2(self):
+    """Get the row-major corners `(y1, x1)` and `(y2, x2)` (inclusive) of
+    this `BBox2D`"""
+    return self.y, self.x, self.y + self.height - 1, self.x + self.width - 1
+
+  def get_x1_y1(self):
+    """Return the origin"""
+    return self.x, self.y
+
+  def get_fractional_xmin_ymin_xmax_ymax(self, clip=True):
+    """Get the corners `(x1, y1)` and `(x2, y2)` (inclusive) of this
+    `BBox2D` in image-relative coordinates; i.e. each corner is scaled
+    to [0, 1] based upon image size.  Forbid off-image corners only if
+    `clip`."""
+    xmin = float(self.x) / self.im_width
+    ymin = float(self.y) / self.im_height
+    xmax = float(self.x + self.width) / self.im_width
+    ymax = float(self.y + self.height) / self.im_height
+    if clip:
+      xmin, ymin, xmax, ymax = \
+        map(lambda x: float(np.clip(x, 0, 1)), \
+          (xmin, ymin, xmax, ymax))
+    return xmin, ymin, xmax, ymax
+
+  def add_padding(self, *args):
+    """Extrude this `BBox2D` with the given padding: either a single value
+    in pixels or a `(pad_x, pad_y)` tuple."""
+    if len(args) == 1:
+      px, py = args[0], args[0]
+    elif len(args) == 2:
+      px, py = args[0], args[1]
+    else:
+      raise ValueError(len(args))
+    self.x -= px
+    self.y -= py
+    self.width += 2 * px
+    self.height += 2 * py
+
+  def is_full_image(self):
+    """Does this `BBox2D` cover the whole image?"""
+    return (
+      self.x == 0 and
+      self.y == 0 and
+      self.width == self.im_width and
+      self.height == self.im_height)
+
+  def get_corners(self):
+    """Return all four corners, starting from the origin, in CCW order."""
+    return (
+      (self.x, self.y),
+      (self.x + self.width, self.y),
+      (self.x + self.width, self.y + self.height),
+      (self.x, self.y + self.height),
+    )
+
+  def get_num_onscreen_corners(self):
+    """Return the number (max four) of corners that are on the image."""
+    return sum(
+      1 for x, y in self.get_corners()
+      if (0 <= x < self.im_width) and (0 <= y < self.im_height))
+
+  def quantize(self):
+    """Creating a `BBox2D` with float values is technically OK; use this
+    method to round to integer values in-place."""
+    ATTRS = ('x', 'y', 'width', 'height', 'im_width', 'im_height')
+    def quantize(v):
+      return int(round(v)) if v is not None else v
+    for attr in ATTRS:
+      setattr(self, attr, quantize(getattr(self, attr)))
+
+  def clamp_to_screen(self):
+    """Clamp any out-of-image corners to edges of the image."""
+    def clip_and_norm(v, max_v):
+      return int(np.clip(v, 0, max_v).round())
+    
+    x1, y1, x2, y2 = self.get_x1_y1_x2_y2()
+    x1 = clip_and_norm(x1, self.im_width - 1)
+    y1 = clip_and_norm(y1, self.im_height - 1)
+    x2 = clip_and_norm(x2, self.im_width - 1)
+    y2 = clip_and_norm(y2, self.im_height - 1)
+    self.set_x1_y1_x2_y2(x1, y1, x2, y2)
+    
+  def get_intersection_with(self, other):
+    """Create a new `BBox2D` containing the intersection with `other`."""
+    x1, y1, x2, y2 = self.get_x1_y1_x2_y2()
+    ox1, oy1, ox2, oy2 = other.get_x1_y1_x2_y2()
+    ix1 = max(x1, ox1)
+    ix2 = min(x2, ox2)
+    iy1 = max(y1, oy1)
+    iy2 = min(y2, oy2)
+    
+    import copy
+    intersection = copy.deepcopy(self)
+    intersection.set_x1_y1_x2_y2(ix1, iy1, ix2, iy2)
+    return intersection
+
+  def get_union_with(self, other):
+    """Create a new `BBox2D` containing the union with `other`."""
+    x1, y1, x2, y2 = self.get_x1_y1_x2_y2()
+    ox1, oy1, ox2, oy2 = other.get_x1_y1_x2_y2()
+    ux1 = min(x1, ox1)
+    ux2 = max(x2, ox2)
+    uy1 = min(y1, oy1)
+    uy2 = max(y2, oy2)
+    
+    import copy
+    union = copy.deepcopy(self)
+    union.set_x1_y1_x2_y2(ux1, uy1, ux2, uy2)
+    return union
+
+  def overlaps_with(self, other):
+    """Does this `BBox2D` overlap with `other`."""
+    # TODO: faster
+    return self.get_intersection_with(other).get_area() > 0
+
+  def get_area(self):
+    """Area in square pixels"""
+    return self.width * self.height
+
+  def translate(self, *args):
+    """Move the origin of this `BBox2D` by the given `(x, y)` value;
+    either a tuple or a `numpy.ndarray`."""
+    if len(args) == 1:
+      x, y = args[0].tolist()
+    else:
+      x, y = args
+    self.x += x
+    self.y += y
+
+  def get_crop(self, img):
+    """Given the `numpy` array image `img`, return a crop based on this
+    `BBox2D`."""
+    c, r, w, h = self.x, self.y, self.width, self.height
+    return img[r:r+h, c:c+w, :]
+
+  def draw_in_image(self, img, color=None, thickness=2, category=None):
+    """Draw a bounding box in `np_image`.
+
+    Args:
+      img (numpy.ndarray): Draw in this image.
+      color (tuple): an (r, g, b) tuple specifying the border color; by
+        default use a category-determined color.
+      thickness (int): thickness of the line in pixels.
+      category (str): override the label text drawn for this box; otherwise
+        use the `category` attribute; omit label text if either is empty
+    """
+
+    assert self.im_height == img.shape[0], (self.im_height, img.shape)
+    assert self.im_width == img.shape[1], (self.im_width, img.shape)
+
+    category = category or self.category_name
+    if not color:
+      from oarphpy.plotting import hash_to_rbg
+      color = hash_to_rbg(category)
+
+    from psegs.util.plotting import draw_bbox_in_image
+    draw_bbox_in_image(
+      img, self, color=color, thickness=thickness, label_txt=category)
diff --git a/psegs/datum/camera_image.py b/psegs/datum/camera_image.py
new file mode 100644
index 0000000..ef96134
--- /dev/null
+++ b/psegs/datum/camera_image.py
@@ -0,0 +1,1574 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+
+
+
+"""
+
+TODOs
+
+ * try center crop of wide lenses
+
+video{start,end}datum with (start, end) [will work with time range segjoin?]
+and ffmpeg explode
+
+
+
+
+
+
+
+
+
+
+"""
+
+import copy
+import math
+import typing
+
+import attr
+import numpy as np
+
+from oarphpy.spark import CloudpickeledCallable
+
+from psegs.datum.transform import Transform
+from psegs.util import misc
+from psegs.util import plotting as pspl
+
+
+def l2_normalized(v):
+  if len(v.shape) > 1:
+    # Normalize row-wise
+    return v / np.linalg.norm(v, axis=1)[:, np.newaxis]
+  else:
+    return v / np.linalg.norm(v)
+
+
+def theta_signed(axis, v):
+  return np.arctan2(np.cross(axis, v), np.dot(axis, v.T))
+
+
+def depth_to_uvcs(hwc_arr):
+  h, w, c = hwc_arr.shape[:3]
+  px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+  px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+  pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+  pyx = pyx.astype(np.float32)
+  
+  chans = [hwc_arr[:, :, i] for i in range(c)]
+  vuc = np.dstack([pyx] + chans).reshape([-1, 2 + c])
+  axes = list(range(2 + c))
+  axes[0] = 1
+  axes[1] = 0
+  uvc = vuc[:, axes]
+  return uvc
+
+
+def uvdcs_to_xyzcs(uvdcs, fx, cx, fy, cy, normed_rays=False):
+  rays = np.zeros((uvdcs.shape[0], 3))
+  rays[:, 0] = (uvdcs[:, 0] - cx) / fx
+  rays[:, 1] = (uvdcs[:, 1] - cy) / fy
+  rays[:, 2] = 1.
+  if normed_rays:
+    rays /= np.linalg.norm(rays, axis=-1)[:, np.newaxis]
+  xyz = uvdcs[:, 2][:, np.newaxis] * rays
+
+  if uvdcs.shape[1] > 3:
+    cs = uvdcs[:, 3:]
+    return np.concatenate([xyz, cs], axis=1)
+  else:
+    return xyz
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class CameraImage(object):
+  """An image from a camera; typically the camera is calibrated.  The image
+  could also be a depth image."""
+
+  sensor_name = attr.ib(type=str, default='')
+  """str: Name of the camera, e.g. camera_front"""
+
+  image_jpeg = attr.ib(type=bytearray, default=bytearray())
+  """bytearray: Buffer of image JPEG data"""
+
+  image_png = attr.ib(type=bytearray, default=bytearray())
+  """bytearray: Buffer of image PNG data"""
+
+  image_factory = attr.ib(
+      type=CloudpickeledCallable,
+      converter=CloudpickeledCallable,
+      default=None)
+  """CloudpickeledCallable: A serializable factory function that emits an HWC
+  numpy array image"""
+
+  width = attr.ib(type=int, default=0, validator=None)
+  """int: Width of image in pixels"""
+
+  height = attr.ib(type=int, default=0, validator=None)
+  """int: Height of image in pixels"""
+
+  timestamp = attr.ib(type=int, default=0)
+  """int: Timestamp associated with this image; typically a Unix stamp in
+  nanoseconds."""
+
+  ego_pose = attr.ib(type=Transform, default=Transform())
+  """Transform: From world to ego / robot frame at the image's `timestamp`"""
+
+  ego_to_sensor = attr.ib(type=Transform, default=Transform())
+  """Transform: From ego / robot frame to the camera frame (typically a static
+  transform)."""
+
+  K = attr.ib(type=np.ndarray, default=np.eye(3, 3, dtype='float64'))
+  """numpy.ndarray: The 3x3 intrinsic calibration camera matrix"""
+
+  distortion_model = attr.ib(type=str, default="")
+  """str: Optional distortion model, e.g. OPENCV"""
+
+  distortion_kv = attr.ib(default={}, type=typing.Dict[str, float])
+  """Dict[str, float]: A map of distortion parameter name -> distortion paramte
+  value.  E.g. for OPENCV there might be entries for k1, k2, p1, p2."""
+
+  channel_names = attr.ib(default=['r', 'g', 'b'])
+  """List[str]: Semantic names for the channels (or dimensions / attributes)
+  of the image. By default, the `image` member uses `imageio` to read an
+  3-channel RGB image as a HWC array.  (Some PNGs could use an alpha channel
+  to produce an RGBA image).  In the case of depth images, one of the channels
+  (usually the first) decodes as depth in meters."""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  def get_world_to_sensor(self):
+    return (
+      # FIXME this is inverse of what says it is? check with colmap data etc
+      self.ego_to_sensor[self.sensor_name, 'ego'] @ 
+      self.ego_pose['ego', 'world']
+    )
+
+  @classmethod
+  def create_world_frame_ci(cls, sensor_name='', **kwargs):
+    sensor_name = sensor_name or 'world_frame_camera_image'
+    ego_to_sensor = Transform(src_frame=sensor_name, dest_frame='ego')
+    ego_pose = Transform(src_frame='ego', dest_frame='world')
+    return cls(
+            sensor_name=sensor_name,
+            ego_to_sensor=ego_to_sensor,
+            ego_pose=ego_pose,
+            **kwargs)
+
+  @property
+  def image(self):
+    """Decode and return the image.
+
+    Returns
+      numpy.ndarray: An HWC image with values in [0, 255]
+    """
+    buf = self.image_buffer
+    if not buf:
+      if self.image_factory != CloudpickeledCallable.empty():
+        return self.image_factory()
+      else:
+        raise ValueError("No image data!")
+      
+    from io import BytesIO
+    import imageio
+    return imageio.imread(BytesIO(buf))
+  
+  @property
+  def image_buffer(self):
+    """Return the byte buffer storing the wrapped image (if any).
+
+    Returns
+      bytearray: Raw image bytes; might be JPEG, PNG, etc.
+    """
+    return (self.image_jpeg or self.image_png)
+
+  def get_fov(self):
+    """Return the horizontal and verticle Fields of View in radians:
+    (FoV_h, FoV_v)"""
+    f_x = self.K[0, 0]
+    f_y = self.K[1, 1]
+    fov_h = 2. * math.atan(.5 * self.width / f_x)
+    fov_v = 2. * math.atan(.5 * self.height / f_y)
+    return fov_h, fov_v
+
+  def get_chan(self, channel_name):
+    for i, c in enumerate(self.channel_names):
+      if c == channel_name:
+        full_img = self.image
+        depth = full_img[:, :, i]
+        return depth
+    return None
+
+  def has_depth(self):
+    return 'depth' in self.channel_names
+
+  def get_depth(self):
+    return self.get_chan('depth')
+
+  def get_P(self, from_world=True):
+    if from_world:
+      xform = self.ego_pose['world', 'ego']
+      RT_w2e = xform.get_transformation_matrix(homogeneous=True)
+    else:
+      RT_w2e = np.eye(4)
+    
+    xform = self.ego_to_sensor['ego', self.sensor_name]
+    RT_e2c = xform.get_transformation_matrix(homogeneous=True)
+    K_h = np.eye(4)
+    K_h[:3, :3] = self.K
+    P_h = K_h @ RT_e2c @ RT_w2e
+    P = P_h[:3, :4]
+    return P
+
+  def has_rgb(self):
+    missing = set(['r', 'g', 'b']) - set(self.channel_names)
+    return not missing
+
+  def get_opencv_distcoeffs(self):
+    # fmt: off
+    # OpenCV wants at least four numbers, perhaps more, in a specific order
+    KEYS = (
+      # Base model
+      'k1', 'k2', 'p1', 'p2',
+      # Full model
+      'k3', 'k4', 'k5', 'k6', 
+      # TODO support othermodels
+    )
+    # fmt: on
+
+    dist_coeff_raw = [self.distortion_kv.get(k) for k in KEYS]
+    dist_coeff_raw = [v for v in dist_coeff_raw if v is not None]
+    if dist_coeff_raw:
+      return np.array(dist_coeff_raw)
+    else:
+      return None
+
+  def to_cv_undistorted_ci(self, alpha=0.):
+    """Uses cache-friendly image_factory"""
+    import cv2
+
+    assert self.has_rgb()
+    assert not self.has_depth(), 'TODO e.g. cv2.projectPoints for depth images'
+
+    dist = self.get_opencv_distcoeffs()
+
+    cur_wh = (self.width, self.height)
+    newK, roi = cv2.getOptimalNewCameraMatrix(self.K, dist, cur_wh, alpha)
+    rx, ry, rw, rh = roi
+
+    # print()
+    # print("self.K, dist, cur_wh, alpha", (self.K.tolist(), dist, cur_wh, alpha))
+    # print("newK, roi", (newK.tolist(), roi))
+    # print()
+
+
+    def _get_undistorted():
+      import cv2
+      # NB: cv2.remap() might be faster for latency-critical use cases
+      undistorted = cv2.undistort(self.image, self.K, dist, None, newK)
+      # TODO support cv2.fisheye.undistortImage
+      
+      undistorted = undistorted[ry : ry + rh, rx : rx + rw]
+      return undistorted
+
+    undistorted_ci = copy.deepcopy(self)
+    undistorted_ci.image_factory = CloudpickeledCallable(lambda: _get_undistorted())
+    undistorted_ci.K = newK
+    undistorted_ci.distortion_kv = {}
+    undistorted_ci.distortion_model = ''
+    undistorted_ci.width = rw
+    undistorted_ci.height = rh
+    return undistorted_ci
+
+  def to_resized_ci(
+        self,
+        target_h=None,
+        scale=1.0,
+        interpolate='',
+        resize_dtype='float32',
+        final_dtype='uint8'):
+    """Uses cache-friendly image_factory"""
+
+    if target_h is not None:
+      scale = float(target_h) / self.height      
+    else:
+      assert scale is not None
+    tw = int(self.width * scale)
+    th = int(self.height * scale)
+
+    if not interpolate:
+      if scale <= 1:
+        interpolate = 'INTER_AREA'
+      else:
+        interpolate = 'INTER_CUBIC'
+
+    def _get_resized():
+      import cv2
+
+      assert hasattr(cv2, interpolate), \
+        (interpolate, [x for x in dir(cv2) if 'INTER_' in x])
+      cv2_interp = getattr(cv2, interpolate)
+
+      image = self.image
+      if resize_dtype:
+        image = image.astype(resize_dtype)
+      resized = cv2.resize(image, (tw, th), interpolation=cv2_interp)
+      if final_dtype:
+        resized = resized.astype(final_dtype)
+      return resized
+
+    newK = self.K.copy()
+    newK[:2, :] *= scale
+    # K [[3168.395196984074, 0.0, 2048.995029305805], [0.0, 3248.9459634700142, 1030.390052631334], [0.0, 0.0, 1.0]] (2159, 3839) 
+    # newK [[798.0823536034384, 0.0, 516.1183103252705], [0.0, 818.6320537877201, 259.625840033092], [0.0, 0.0, 1.0]] (544, 967) (0.2518885126334983, 0.25196850393700787)
+    # scale_x = float(tw) / self.width
+    # newK[0, 0] *= scale_x
+    # newK[0, 2] *= scale_x
+
+    # scale_y = float(th) / self.height
+    # newK[1, 1] *= scale_y
+    # newK[1, 2] *= scale_y
+
+    # print('K', self.K.tolist(), (self.height, self.width), 'newK', newK.tolist(), (th, tw), scale)
+
+    resized_ci = copy.deepcopy(self)
+    resized_ci.image_factory = CloudpickeledCallable(lambda: _get_resized())
+    resized_ci.K = newK
+    resized_ci.width = tw
+    resized_ci.height = th
+    return resized_ci
+
+  def to_grayscale_ci(self, final_dtype='uint8'):
+    def _get_gray():
+      import cv2
+      image = self.image
+
+      gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+      if final_dtype:
+        gray = gray.astype(final_dtype)
+      gray = gray.reshape([image.shape[0], image.shape[1], 1])
+      return gray
+    
+    gray_ci = copy.deepcopy(self)
+    gray_ci.image_factory = CloudpickeledCallable(lambda: _get_gray())
+    gray_ci.channel_names = ['grayscale']
+    return gray_ci
+
+  def depth_image_to_point_cloud(self):
+    """Create and return a datum.PointCloud instance if this image is
+    a depth image (and None otherwise)"""
+    
+    if not any(c == 'depth' for c in self.channel_names):
+      return None
+
+    # Re-order so that depth is always the first channel / column
+    cs_names = list(self.channel_names)
+    axes = list(range(len(self.channel_names)))
+    for i, c in enumerate(self.channel_names):
+      if c == 'depth' and i != 0:
+        axes[0] = i
+        axes[i] = 0
+        cs_names[0] = 'depth'
+        cs_names[i] = self.channel_names[0]
+        break
+    cloud_colnames = ['x', 'y', 'z'] + cs_names[1:]
+
+    fx = self.K[0, 0]
+    cx = self.K[0, 2]
+    fy = self.K[1, 1]
+    cy = self.K[1, 2]
+    if self.image_factory != CloudpickeledCallable.empty():
+
+      def _to_cloud(image_factory, axes, intrinsics):
+        fx, cx, fy, cy = intrinsics
+        depth_image = image_factory()
+        depth_image = depth_image[:, :, axes]
+        uvdcs = depth_to_uvcs(depth_image)
+        
+        # Ignore depth 0
+        idx = np.where(uvdcs[:, 2] > 0)
+        uvdcs = uvdcs[idx[0]]
+        
+        xyzcs = uvdcs_to_xyzcs(uvdcs, fx, cx, fy, cy)
+        return xyzcs
+      
+      depth_factory = self.image_factory
+      cloud_factory = lambda: _to_cloud(depth_factory, axes, (fx, cx, fy, cy))
+      cloud = None
+    else:
+      depth_image = np.copy(self.image)
+      depth_image = depth_image[:, :, axes]
+      uvdcs = depth_to_uvcs(depth_image)
+
+      # Ignore depth 0
+      idx = np.where(uvdcs[:, 2] > 0)
+      uvdcs = uvdcs[idx[0]]
+
+      cloud = uvdcs_to_xyzcs(uvdcs, fx, cx, fy, cy)
+      cloud_factory = None
+
+    from psegs.datum.point_cloud import PointCloud
+    pc = PointCloud(
+          sensor_name=self.sensor_name + '|point_cloud',
+          timestamp=self.timestamp,
+          cloud_colnames=cloud_colnames,
+          cloud_factory=cloud_factory,
+          cloud=cloud,
+          ego_pose=copy.deepcopy(self.ego_pose),
+          ego_to_sensor=copy.deepcopy(self.ego_to_sensor),
+          extra=copy.deepcopy(self.extra))
+    return pc
+
+
+  def get_debug_image(self, clouds=None, cuboids=None, period_meters=10.):
+    """Create and return a debug image showing the given content projected
+    onto this `CameraImage`.
+
+    Args:
+      clouds (List[:class:`~psegs.datum.point_cloud.PointCloud`]): Draw these 
+        PointClouds in the given debug image.
+      cuboids (List[:class:`~psegs.datum.cuboid.Cuboid`]): Draw these 
+        cuboids in the given debug image.
+      period_meters (float): Choose a distinct hue every `period_meters` and
+        interpolate between hues.
+
+    Returns:
+      np.array: A HWC RGB debug image.
+    """
+
+    if any(c == 'depth' for c in self.channel_names):
+      depth = self.get_depth()
+      assert depth is not None, (self.channel_names, self.image.shape)
+      debug_img = np.zeros((self.height, self.width, 3))
+      uvd = depth_to_uvcs(depth.reshape([self.height, self.width, 1]))
+
+      # Ignore depth 0
+      idx = np.where(uvd[:, 2] > 0)
+      uvd = uvd[idx[0]]
+
+      from psegs.util.plotting import draw_xy_depth_in_image
+      draw_xy_depth_in_image(
+        debug_img, uvd, alpha=0.25, period_meters=period_meters)
+    else:
+      debug_img = np.copy(self.image)
+    
+    for pc in clouds or []:
+      cloud_raw = pc.get_cloud()
+      xyz = pc.ego_to_sensor.get_inverse().apply(cloud_raw[:, :3]).T # err why inv~~~~
+      uvd = self.project_ego_to_image(xyz, omit_offscreen=True)
+      pspl.draw_xy_depth_in_image(
+        debug_img, uvd, marker_radius=4, alpha=0.9, period_meters=period_meters)
+    
+    for c in cuboids or []:
+      # box_xyz = self.ego_to_sensor.apply(c.get_box3d()).T
+      box_uvd = self.project_ego_to_image(c.get_box3d(), omit_offscreen=False)
+      if (box_uvd[:, 2] <= 1e-6).all():
+        continue
+      
+      from oarphpy.plotting import hash_to_rbg
+      color = pspl.color_to_opencv(
+        np.array(hash_to_rbg(c.category_name)))
+
+      pspl.draw_cuboid_xy_in_image(
+        debug_img,
+        box_uvd[:, :2],
+        np.array(hash_to_rbg(c.category_name)),
+        alpha=0.3)
+    
+    return debug_img
+
+  def project_ego_to_image(self, pts, omit_offscreen=True):
+    """Project the given points into the image plane.
+
+    Args:
+      pts (numpy.ndarray): An n-by-3 array of points `(x, y, z)` in the **ego
+        frame**.
+      omit_offscreen (bool): Omit any point projected outside the image.
+    
+    Returns:
+      numpy.ndarray: An n-by-3 array of points `(x, y, d)` in the image plane
+        where `(x, y)` is a pixel location and `d` is depth in meters from
+        the focal plane.
+    """
+    pts_in_cam = self.ego_to_sensor.apply(pts).T
+
+    if omit_offscreen:
+      fov_h, fov_v = self.get_fov()
+      half_fov_h, half_fov_v = .5 * fov_h, .5 * fov_v
+
+      Z_HAT = np.array([0, 1]) # Principal axis in X-Z and Y-Z planes
+      pts_xz = pts_in_cam[:, (0, 2)]
+      theta_h = theta_signed(l2_normalized(pts_xz), Z_HAT)
+      pts_yz = pts_in_cam[:, (1, 2)]
+      theta_v = theta_signed(l2_normalized(pts_yz), Z_HAT)
+
+      PADDING_RADIANS = math.pi / 8
+      idx_ = np.where(
+              np.logical_and.reduce((
+                # Filter off-the-edge points
+                np.abs(theta_h) <= half_fov_h + PADDING_RADIANS,
+                np.abs(theta_v) <= half_fov_v + PADDING_RADIANS)))
+                # # Filter behind-screen points
+                # uv[2, :] > 0)))
+      idx_ = idx_[0]
+      pts_in_cam = pts_in_cam[idx_, :]
+
+    uvd = self.K.dot(pts_in_cam.T)
+    uvd[0:2, :] /= uvd[2, :]
+    uvd = uvd.T
+
+    return uvd
+
+  def _has_edge_in_fov(self, cuboid):
+    
+    f_x = self.K[0, 0]
+    f_y = self.K[1, 1]
+    fov_h = 2. * math.atan(.5 * self.width / f_x)
+    fov_v = 2. * math.atan(.5 * self.height / f_y)
+
+    def intervals_overlap(i1, i2):
+      (s1, e1), (s2, e2) = (i1, i2)
+      return max(s1, s2) <= min(e1, e2)
+
+    # Check in x-y (horizontal) plane
+    cuboid_pts_h_hat = l2_normalized(cuboid.box3d[:, :2])
+    camera_pov_h_hat = l2_normalized(self.principal_axis_in_ego[:2])
+    theta_h = theta_signed(camera_pov_h_hat, cuboid_pts_h_hat)
+    is_in_fov_h = intervals_overlap(
+                    (-.5 * fov_h, .5 * fov_h),
+                    (theta_h.min(), theta_h.max()))
+
+    # Check in x-z (vertical) plane
+    XZ = np.array([0, 2])
+    cuboid_pts_v_hat = l2_normalized(cuboid.box3d[:, XZ])
+    camera_pov_v_hat = l2_normalized(self.principal_axis_in_ego[XZ])
+    theta_v = theta_signed(camera_pov_v_hat, cuboid_pts_v_hat)
+    is_in_fov_v = intervals_overlap(
+                    (-.5 * fov_v, .5 * fov_v),
+                    (theta_v.min(), theta_v.max()))
+
+    # if cuboid.track_id == 'df33e853-f5d1-4e49-b0c7-b5523cfe75cd':
+    #   print('offscreen', is_in_fov_h, is_in_fov_v)
+    #   print(cuboid.box3d)
+    #   import pdb; pdb.set_trace()
+    # elif cuboid.track_id == '79f92a80-93dc-442b-8cce-1c8da11fbe3b':
+    #   print('ON', is_in_fov_h, is_in_fov_v)
+    #   print(cuboid.box3d)
+    #   import pdb; pdb.set_trace()
+    # return True
+    # if cuboid.track_id == 'nuscenes_instance_token:e91afa15647c4c4994f19aeb302c7179':
+    #   import pdb; pdb.set_trace()
+    return is_in_fov_h and is_in_fov_v
+
+  def project_cuboid_to_bbox(self, cuboid):
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    bbox = BBox(
+            im_width=self.width,
+            im_height=self.height,
+            category_name=cuboid.category_name,
+            au_category=cuboid.au_category,
+            cuboid=cuboid)
+    
+    ## Fill Points
+    centroid = np.mean(cuboid.box3d, axis=0)
+    pts_in_cam = self.cam_from_ego.apply(cuboid.box3d).T
+    bbox.cuboid_in_cam = pts_in_cam
+    centroid_in_cam = self.cam_from_ego.apply(centroid[np.newaxis, :]).T
+
+    # nope nope fixme ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # Since the cuboid could be behind or alongside the camera, not all
+    # of the cuboid faces may be visible.  If the object is very large,
+    # perhaps only a single edge is visible.  To find the image-space 
+    # 2-D axis-aligned bounding box that bounds all cuboid points, we find
+    # the horizonal and vertical angles relative to the camera principal
+    # axis (Z in the camera frame) that fits all cuboid points.  Then
+    # if the object is partially out of view (or even behind the camera),
+    # it is easy to clip the bounding box to the camera field of view.
+
+    def l2_normalized(v):
+      if len(v.shape) > 1:
+        # Normalize row-wise
+        return v / np.linalg.norm(v, axis=1)[:, np.newaxis]
+      else:
+        return v / np.linalg.norm(v)
+
+    def to_0_2pi(thetas):
+      return (thetas + 2 * math.pi) % 2 * math.pi
+
+    def theta_signed(cam_h, cuboid_h):
+      thetas = np.arctan2(np.cross(cam_h, cuboid_h), np.dot(cam_h, cuboid_h.T))
+      return thetas
+      # return to_0_2pi(thetas)
+
+    Z_HAT = np.array([0, 1]) # Principal axis in X-Z and Y-Z planes
+    pts_xz = pts_in_cam[:, (0, 2)]
+    theta_h = theta_signed(l2_normalized(pts_xz), Z_HAT)
+    pts_yz = pts_in_cam[:, (1, 2)]
+    theta_v = theta_signed(l2_normalized(pts_yz), Z_HAT)
+
+    # center_h = theta_signed(Z_HAT, l2_normalized(centroid[(0, 2)]))
+    # center_v = theta_signed(Z_HAT, l2_normalized(centroid[(1, 2)]))
+
+    f_x = self.K[0, 0]
+    f_y = self.K[1, 1]
+    c_x = self.K[0, 2]
+    c_y = self.K[1, 2]
+    fov_h, fov_v = self.get_fov()
+
+    t_h_min, t_h_max = theta_h.min(), theta_h.max()
+    t_v_min, t_v_max = theta_v.min(), theta_v.max()
+
+    def to_pixel(theta, fov, length):
+      half_fov = .5 * fov
+      # p = np.clip(theta, -half_fov, half_fov) / half_fov
+      p = theta / half_fov
+      p = (p + 1) / 2
+      return length * p
+
+    x1 = to_pixel(t_h_min, fov_h, self.width)
+    x2 = to_pixel(t_h_max, fov_h, self.width)
+    y1 = to_pixel(t_v_min, fov_v, self.height)
+    y2 = to_pixel(t_v_max, fov_v, self.height)
+
+    focal_pixel_h = (.5 * self.width) / math.tan(fov_h * .5)
+    focal_pixel_v = (.5 * self.height) / math.tan(fov_v * .5)
+
+    uvd = self.K.dot(pts_in_cam.T)
+    uvd[0:2, :] /= uvd[2, :]
+    uvd = uvd.T
+
+    centroid_uvd = self.K.dot(centroid_in_cam.T)
+    centroid_uvd[0:2, :] /= centroid_uvd[2, :]
+    centroid_uvd = centroid_uvd.T[0, :]
+
+    # # import pdb; pdb.set_trace()
+    # uvt_good = np.stack([
+    #   np.sin(theta_h) * np.linalg.norm(pts_xz, axis=1) * focal_pixel_h,
+    #   np.sin(theta_v) * np.linalg.norm(pts_yz, axis=1) * focal_pixel_v,
+    #   uvd[:,2],
+    # ]).T
+
+    # def to_point(theta, dist, fov, focal_l, pts):
+    #   # disp = (theta > 0) * dist
+    #   p_prime = 2. * np.tan(np.abs(theta) * .5) * focal_l * pts[:,0]
+    #   return p_prime / np.abs(pts[:,1]) + .5 * dist
+
+    # uvt = np.stack([
+    #   to_point(theta_h, self.width, fov_h, f_x, pts_xz),
+    #   to_point(theta_v, self.height, fov_v, f_y, pts_yz),
+    #   # np.sin(theta_h - fov_h * .5) * f_x + .5 * self.width+ self.width, #np.linalg.norm(pts_xz, axis=1) * focal_pixel_h,
+    #   # np.sin(theta_v - fov_v * .5) * f_y + , #np.linalg.norm(pts_yz, axis=1) * focal_pixel_v,
+    #   uvd[:,2],
+    # ]).T
+    
+    
+    
+    # FIXME docs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # Suppose the camera is side-by-side with a long pole, where part of the
+    # pole extends in front of and part behind the camera. We need to project
+    # points that are behind the camera to a place that makes sense
+    # geometrically. As points in front of the camera get closer, they get 
+    # projected to the infinite horizon beyond the left and right edges
+    # of the image.
+    # We choose to project these points as follows:
+    #   First, pretend these points are actually in front of the camera, and
+    #   compute the angle they make between their projection onto the principal
+    #   plane and the focal center.
+    #   Second, define the ray in the principal plane that has this angle. Now
+    #   follow that ray off-screen to "pseudo-infinity" (based upon focal 
+    #   length of the camera) to plot the final projected point.
+    pts_xy = pts_in_cam[:, :2]
+    theta_xy = np.arctan2(pts_xy[:, 1], pts_xy[:, 0])
+    PSEUDO_INF = 1 / 0.001
+    uvt = np.stack([
+      np.cos(theta_xy) * f_x * PSEUDO_INF,
+      np.sin(theta_xy) * f_y * PSEUDO_INF,
+      uvd[:,2],
+    ]).T
+
+    for r in range(8):
+      # if abs(theta_h[r]) > fov_h * .5 or abs(theta_v[r]) > fov_v * .5:
+      if uvd[r, 2] <= 0:
+        uvd[r, :] = uvt[r, :]
+    # uvd = uvt
+
+    # if cuboid.track_id == 'nuscenes_instance_token:df8a0ce6d79446369952166553ede088':
+    #   import pdb; pdb.set_trace()
+
+
+    # print('')
+    # uvd = self.project_ego_to_image(cuboid.box3d, omit_offscreen=False)
+
+    bbox.cuboid_pts = uvd
+    bbox.cuboid_center = centroid_uvd
+
+
+    # print('uvd')
+    # print(uvd)
+    # print()
+    # if cuboid.track_id == 'nuscenes_instance_token:e91afa15647c4c4994f19aeb302c7179':
+    #   import pdb; pdb.set_trace()
+
+    x1, x2 = np.min(uvd[:, 0]), np.max(uvd[:, 0])
+    y1, y2 = np.min(uvd[:, 1]), np.max(uvd[:, 1])
+    bbox.set_x1_y1_x2_y2(x1, y1, x2, y2)
+
+    z = float(np.max(uvd[:, 2]))
+    num_onscreen = bbox.get_num_onscreen_corners()
+    bbox.has_offscreen = ((z <= 0) or (num_onscreen < 4))
+
+    # While none of the points or cuboid points may be onscreen, if the object
+    # is very close to the camera then a single edge of the cuboid or bbox
+    # may intersect the screen.  TODO: proper frustum clipping for objects
+    # that are beyond FoV and yet very slightly in front of the image plane.
+    bbox.is_visible = (z > 0 and self._has_edge_in_fov(cuboid))
+      # bbox.overlaps_with(common.BBox.of_size(self.width, self.height)))
+
+    bbox.clamp_to_screen()
+
+    ## Fill Pose
+    bbox.cuboid_from_cam = \
+      cuboid.obj_from_ego.translation - self.cam_from_ego.translation
+
+    cuboid_from_cam_hat = \
+      bbox.cuboid_from_cam / np.linalg.norm(bbox.cuboid_from_cam)
+    
+    cuboid_from_cam_hat = cuboid_from_cam_hat.reshape(3)
+
+    from scipy.spatial.transform import Rotation as R
+    X_HAT = np.array([1, 0, 0])
+    obj_normal = cuboid.obj_from_ego.rotation.dot(X_HAT)
+    cos_theta = cuboid_from_cam_hat.dot(obj_normal.reshape(3))
+    rot_axis = np.cross(cuboid_from_cam_hat, obj_normal)
+    obj_from_ray = R.from_rotvec(
+          math.acos(cos_theta) * rot_axis / np.linalg.norm(rot_axis))
+    bbox.ypr_camera_local = obj_from_ray.as_euler('zxy')
+
+    return bbox
+
+  NO_RV_SMOOTHING = -1  
+  @staticmethod
+  def get_cloud_rv_simple(
+          im_size,
+          uvd,
+          ptvs=None,
+          depth_soft_horizon_meters=50):
+    """Return a greyscale Pointcloud Range-View image given a set of points.
+    Points with a value (e.g. depth) of 0 have color black and points with
+    a value of 255 have color white. Optionally smooth cloud sparsity using
+    markers that are scaled inversely with depth according to
+    `depth_soft_horizon_meters`.
+
+    Args:
+      img_size: tuple of image size (height, width) in pixels.
+      uvd: Array of n-by-3 points containing (pixel x, pixel y, depth meters)
+        values for all cloud points.
+      ptvs: Array of n values; use these values for plot intensity.  By
+        default, use depth from `uvd` with tanh smoothing. The given values are
+        clipped to [0, 1].
+      depth_soft_horizon_meters: If non-negative, smooth cloud sparsity using
+        markers scaled by the depth values in `uvd`. We use tanh smoothing
+        to create larger markers for points at most `depth_soft_horizon_meters`
+        away from the camera; points farther than this threshhold will obtain
+        less smoothing.
+
+    Returns:
+      An `img_size`-sized 1-channel image.
+    """
+    
+    im_h, im_w = im_size
+    uvd = np.copy(uvd)
+
+    if ptvs is None:
+      if depth_soft_horizon_meters > 0:
+        ptvs = np.tanh(uvd[:, 2] / depth_soft_horizon_meters)
+      else:
+        ptvs = uvd[:, 2] / 128.
+    ptvs = np.clip(ptvs, 0, 1)
+
+    # First, re-order points by depth ascending so that farther points (which
+    # may be smaller and brighter) are drawn over nearer points (which might
+    # have larger markers via smoothing)
+    order = (uvd[:,2]).argsort()
+    uvd = uvd[order]
+    ptvs = ptvs[order]
+    
+    # Decide on marker sizes and colors
+    def marker_size(depth):
+      if depth_soft_horizon_meters <= 0:
+        return 0.05 * im_h * im_w
+      else:
+        def unitized_depth(z):
+          return np.tanh(z / depth_soft_horizon_meters)
+        v = 1 - unitized_depth(depth)
+        pt_scale = 0.5 * im_w * im_h
+        s = pt_scale * v
+        return s
+    marker_sizes = [marker_size(z) for z in uvd[:,2]]
+    
+    # Convert to RGB; if we don't, then Matplotlib uses weird greyscale
+    # mapping.
+    colors = [(v, v, v) for v in ptvs]
+
+    # We use matplotlib for image rendering, as it's faster and more flexible
+    # than direct numpy.
+    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+    from matplotlib.figure import Figure
+
+    fig = Figure(figsize=(im_w, im_h), dpi=1)
+    fig.set_facecolor((0, 0, 0))
+    canvas = FigureCanvas(fig)
+    
+    ax = fig.gca()
+    ax.scatter(
+        uvd[:,0], im_h - 1 - uvd[:,1],   # x, y
+        s=marker_sizes, c=colors)
+    
+    # Crop plot so that it has 1-to-1 pixel correspondence with the camera
+    # image.
+    ax.axis('off')
+    ax.set_xlim(0, im_w)
+    ax.set_ylim(0, im_h)
+    fig.tight_layout()
+    fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
+
+    # Render!
+    canvas.draw()
+    img_str, (width, height) = canvas.print_to_buffer()
+
+    rv_img = np.fromstring(img_str, np.uint8).reshape((im_h, im_w, 4))
+
+    # Release memory
+    fig.clear()
+    canvas.get_renderer().clear()
+
+    return rv_img[:, :, :1] # Return greyscale
+
+  @staticmethod
+  def get_cloud_rv_delaunay_smoothing(
+          im_size,
+          uvd,
+          cloud,
+          principal_axis):
+    """Return an RGB Range-View image given a set of N points projected
+    into the camera frame.  Use delaunay triangle smoothing: compute
+    a delaunay triangulation of the 2D (x, y) projection of the points onto
+    the image plane, and use these triangles to interpolate depth values
+    for pixels that don't have point returns.
+
+    The returned image is RGB, but can be interpreted as follows at HSV:
+     * Hue: encodes depth; we hash depth to a hue which changes in 10-meter
+        buckets, and measurments between these buckets are hue-interpolated.
+        See `plotting.rgb_for_distance()`.
+     * Saturation: (unused)
+     * Value: encodes the normal of the triangle relative to the camera 
+        perspective.  Normals orthogonal to the camera perspective have
+        low brightness ("value")
+
+    TODO: Create delaunay-based mesh from pointsensor perspective and render
+    in camera view using a raytracer like pyrender.  Important when the 
+    pointsensor has a very different vantage point from the camera and
+    returns points that are behind the camera PoV-- in these cases, our
+    delaunay smoothing will improperly mesh together foreground and
+    background points.
+
+    Points with a value (e.g. depth) of 0 have color black and points with
+    a value of 255 have color white. Optionally smooth cloud sparsity using
+    markers that are scaled inversely with depth according to
+    `depth_soft_horizon_meters`.
+
+    Args:
+      img_size: tuple of image size (height, width) in pixels.
+      uvd: Array of n-by-3 points containing (pixel x, pixel y, depth meters)
+        values for all cloud points.
+      cloud: Array of n-by-3 points containing (x, y, z) values
+        of all points.  Used to determine triangle normals.
+      principal_axis: 3-vector representing the camera's perspective.
+        Used to determine triangle normals.  Must be in same frame as `cloud`
+        (e.g. both in ego frame).
+
+    Returns:
+      An `img_size`-sized RGB image.
+    """
+
+    uvd = np.copy(uvd)
+
+    # We use matplotlib for image rendering, as it's faster and more flexible
+    # than direct numpy.
+    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+    from matplotlib.figure import Figure
+    from matplotlib.colors import ListedColormap
+    from matplotlib.colors import Normalize
+    
+    img_h, img_w = im_size
+
+    ###
+    ### Create Hue / Depth-Colored image
+    ###
+    figd = Figure(figsize=(img_w, img_h), dpi=1)
+    figd.set_facecolor((0, 0, 0))
+    canvasd = FigureCanvas(figd)
+    axd = figd.gca()
+
+    MAX_DEPTH_METERS = 1000
+    from au.plotting import rgb_for_distance
+    zcolors = [
+      (np.array(rgb_for_distance(z)) / 255).tolist()
+      for z in np.arange(0, MAX_DEPTH_METERS, 0.1)
+    ]
+    cmap = ListedColormap(zcolors)
+    cmap.set_under("black")
+    cmap.set_over("white")
+    norm = Normalize(vmin=0, vmax=MAX_DEPTH_METERS)
+
+    import matplotlib.tri as mtri
+    triang = mtri.Triangulation(uvd[:,0], img_h - 1 - uvd[:,1])
+
+    tri_vertices = list(map(lambda index: uvd[index], triang.triangles))
+
+    def maptocolor(tri):
+      zval = np.mean(tri[:,2])
+      return zval
+    axd.tripcolor(
+        triang,
+        facecolors=np.array([maptocolor(tri) for tri in tri_vertices]),
+        cmap=cmap, norm=norm)
+
+    axd.axis('off')
+    axd.set_xlim(0, img_w)
+    axd.set_ylim(0, img_h)
+    figd.tight_layout()
+    figd.subplots_adjust(bottom=0, top=1, left=0, right=1)
+
+    canvasd.draw()
+    img_str_d, (width, height) = canvasd.print_to_buffer()
+
+    depth_image = np.fromstring(img_str_d, np.uint8).reshape((height, width, 4))
+    
+    ###
+    ### Create Normals Overlay
+    ###
+
+    def maptocolor_norm(tri):
+      dspan = abs(tri[:,3].max() - tri[:,3].min())
+      xspan = abs(tri[:,0].max() - tri[:,0].min())
+      yspan = abs(tri[:,1].max() - tri[:,1].min())
+      zspan = abs(tri[:,2].max() - tri[:,2].min())
+      if any(v > 5 for v in (xspan, yspan, zspan)):
+        return float('inf')
+      tri = tri[:,:3]
+      tri_norm = np.cross(tri[0] - tri[1], tri[0] - tri[2])
+      tri_norm /= np.linalg.norm(tri_norm)
+      return 1. - abs(principal_axis.dot(tri_norm))
+
+
+    fign = Figure(figsize=(img_w, img_h), dpi=1)
+    fign.set_facecolor((0, 0, 0))
+    canvasn = FigureCanvas(fign)
+    axn = fign.gca()
+    zcolors_max = 1
+    zcolors_norm = [
+      (1-(np.array([z, z, z]) / zcolors_max)).tolist()
+      for z in np.arange(0, zcolors_max, 0.01)
+    ]
+    cmap_norm = ListedColormap(zcolors_norm)
+    cmap_norm.set_under("white")
+    cmap_norm.set_over("black")
+    norm_n = Normalize(vmin=0, vmax=zcolors_max + 1)
+
+    xyzd = np.concatenate([cloud[:, 0:3], uvd[:, 2:]], axis=-1)
+    fused_vertices = list(map(lambda index: xyzd[index], triang.triangles))
+    axn.tripcolor(
+        triang,
+        facecolors=np.array([maptocolor_norm(tri) for tri in fused_vertices]),
+        cmap=cmap_norm, norm=norm_n)
+
+    axn.axis('off')
+    axn.set_xlim(0, img_w)
+    axn.set_ylim(0, img_h)
+    fign.tight_layout()
+    fign.subplots_adjust(bottom=0, top=1, left=0, right=1)
+
+    canvasn.draw()       # draw the canvas, cache the renderer
+    img_str_n, (width, height) = canvasn.print_to_buffer()
+
+    normals_image = np.fromstring(img_str_n, np.uint8)
+    normals_image = normals_image.reshape((height, width, 4))
+    
+    final_image = (
+      depth_image.astype(float) * (normals_image.astype(float) / 255))
+    
+    # Release memory
+    fign.clear()
+    canvasn.get_renderer().clear()
+    figd.clear()
+    canvasd.get_renderer().clear()
+    
+    return final_image.astype(np.uint8)[:, :, :3]
+
+
+  ALL_RV_IMG_TYPES = (
+    'depth_delaunay_smoothed',
+    'depth_smoothed',
+    'depth',
+    'height_smoothed',
+    'height',
+  )
+  def get_cloud_rv_images(self, img_types):
+    img_types = img_types or []
+    if not img_types:
+      return {}
+    
+    img_out = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+    clouds = [pc.cloud for pc in self.clouds]
+    if not clouds:
+      return img_out
+    fused_cloud = np.concatenate(clouds)
+
+    # Project points to cam
+    pts_in_cam = self.cam_from_ego.apply(fused_cloud).T
+    uvd = self.K.dot(pts_in_cam.T)
+    uvd[0:2, :] /= uvd[2, :]
+    uvd = uvd.T
+
+    # Only keep onscreen points
+    uvd = uvd.T
+    indices = np.where(
+              np.logical_and.reduce((
+                # Filter offscreen points
+                0 <= uvd[0, :], uvd[0, :] < self.width - 1.0,
+                0 <= uvd[1, :], uvd[1, :] < self.height - 1.0,
+                # Filter behind-screen points
+                uvd[2, :] > 0)))
+    indices = indices[0]
+    uvd = uvd[:, indices].T
+
+    # Compute height data and normalize
+    ego_z = fused_cloud[indices, 2]
+    ego_z = np.tanh((ego_z + 1) / 5)
+
+    # Save unfiltered ego-frame points
+    unfiltered_fused_cloud = fused_cloud[indices, :]
+
+    im_size = (self.height, self.width)
+    NO_SMOOTH = CameraImage.NO_RV_SMOOTHING
+
+    rv_images = {}
+    if 'depth' in img_types:
+      rv_images['depth'] = CameraImage.get_cloud_rv_simple(
+                              im_size,
+                              uvd,
+                              depth_soft_horizon_meters=NO_SMOOTH)
+    if 'depth_smoothed' in img_types:
+      rv_images['depth_smoothed'] = CameraImage.get_cloud_rv_simple(
+                                        im_size, uvd)
+    
+    if 'height' in img_types:
+      rv_images['height'] = CameraImage.get_cloud_rv_simple(
+                              im_size,
+                              uvd,
+                              ptvs=ego_z,
+                              depth_soft_horizon_meters=NO_SMOOTH)
+    if 'height_smoothed' in img_types:
+      rv_images['height_smoothed'] = CameraImage.get_cloud_rv_simple(
+                                        im_size, uvd, ptvs=ego_z)
+
+    if 'depth_delaunay_smoothed' in img_types:
+      rv_images['depth_delaunay_smoothed'] = (
+        CameraImage.get_cloud_rv_delaunay_smoothing(
+          im_size,
+          uvd,
+          unfiltered_fused_cloud,
+          self.principal_axis_in_ego))
+
+    return rv_images
+    
+
+
+
+
+    
+
+    # PT_RADIUS_PIXELS = 10
+    # if channel == 'depth':
+    #   print('start depth')
+    #   pts_in_cam = self.cam_from_ego.apply(fused_cloud).T
+    #   uvd = self.K.dot(pts_in_cam.T)
+    #   uvd[0:2, :] /= uvd[2, :]
+
+    #   uvd_out = None
+    #   MAX_DEPTH_METERS = 80 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #   D_STEP = 10
+    #   r = PT_RADIUS_PIXELS
+    #   for dlo in range(0, MAX_DEPTH_METERS, D_STEP):
+    #     dhi = dlo + D_STEP
+    #     indices = np.where(
+    #                 np.logical_and.reduce((
+    #                   dlo <= uvd[2, :], uvd[2, :] < dhi)))
+    #     indices = indices[0]
+    #     uvd_bucket = uvd[:, indices]
+    #     for rx in range(-r, r+1):
+    #       for ry in range(-r, r+1):
+    #         added = uvd_bucket + np.array([[rx], [ry], [0]])
+    #         if uvd_out is None:
+    #           uvd_out = added
+    #         else:
+    #           uvd_out = np.concatenate([uvd_out, added], axis=1)
+      
+    #   # Only keep onscreen points
+    #   indices = np.where(
+    #               np.logical_and.reduce((
+    #                 # Filter offscreen points
+    #                 0 <= uvd_out[0, :], uvd_out[0, :] < self.width - 1.0,
+    #                 0 <= uvd_out[1, :], uvd_out[1, :] < self.height - 1.0,
+    #                 # Filter behind-screen points
+    #                 uvd_out[2, :] > 0)))
+    #   indices = indices[0]
+    #   uvd_out = uvd_out[:, indices].T
+
+    #   # map depth -> pixel color
+    #   uvd_out[2, :] = np.clip(255 * (uvd_out[2, :] / MAX_DEPTH_METERS), 0, 255)
+
+    #   uvd_out = uvd_out.T
+    #   np.sort(uvd_out, axis=-1)
+    #   idx = np.floor(uvd_out.T[:2,:].T).astype(int)
+    #   img_out[idx[:,1],idx[:0]] = uvd_out[:,2][:,np.newaxis]
+
+    #   # uvd = self.project_ego_to_image(fused_cloud, omit_offscreen=True)
+
+    #   # # TODO try to use a faster np array assign:
+    #   # # idx = np.floor(uvd.T[:2,:].T).astype(int)
+    #   # # img_out[idx[:,1],idx[:0]] = uvd[:,2][:,np.newaxis]
+    #   # for pt in uvd.tolist():
+    #   #   u, v, d = pt
+    #   #   if 0 <= v < self.height and 0 <= u < self.width:
+    #   #     MAX_DEPTH_METERS = 80 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #   #     d = np.clip(255 * (float(d) / MAX_DEPTH_METERS), 0, 255)
+    #   #     radius = int(np.clip((1. - (d / 255)) * PT_RADIUS_PIXELS, 2, PT_RADIUS_PIXELS))
+    #   #     for rr in range(-radius, radius+1):
+    #   #       for rc in range(-radius, radius + 1):
+    #   #         r = int(v + rr); c = int(u + rc)
+    #   #         if 0 <= r < self.height and 0 <= c < self.width:
+    #   #           img_out[r, c, :] = max(img_out[r, c, :], d)
+    # elif channel == 'height':
+    #   cloud_z = fused_cloud[:, 2]
+    #   pts_in_cam = self.cam_from_ego.apply(fused_cloud).T
+    #   uvd = self.K.dot(pts_in_cam.T)
+    #   uvd[0:2, :] /= uvd[2, :]
+
+    #   # Only keep onscreen points
+    #   indices = np.where(
+    #               np.logical_and.reduce((
+    #                 # Filter offscreen points
+    #                 0 <= uvd[0, :], uvd[0, :] < self.width - 1.0,
+    #                 0 <= uvd[1, :], uvd[1, :] < self.height - 1.0,
+    #                 # Filter behind-screen points
+    #                 uvd[2, :] > 0)))
+    #   indices = indices[0]
+
+    #   uvh = uvd[:, indices].T
+    #   uvh[:, 2] = cloud_z[indices]
+
+    #   for pt in uvh.tolist():
+    #     u, v, h = pt
+    #     if 0 <= v < self.height and 0 <= u < self.width:
+    #       MAX_HEIGHT_METERS = 10 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #       h = np.clip(255 * (float(h) / MAX_HEIGHT_METERS), 0, 255)
+    #       # u = int(u)
+    #       # v = int(v)
+    #       # img_out[v, u, :] = max(img_out[v, u, :], h)
+    #       radius = int(np.clip((1. - (h / 255)) * PT_RADIUS_PIXELS, 2, PT_RADIUS_PIXELS))
+    #         # makes less sense ....
+    #       for rr in range(-radius, radius+1):
+    #         for rc in range(-radius, radius + 1):
+    #           r = int(v + rr); c = int(u + rc)
+    #           if 0 <= r < self.height and 0 <= c < self.width:
+    #             img_out[r, c, :] = max(img_out[r, c, :], h)
+    # else:
+    #   ValueError(channel) # TODO: BEV mebbe
+    
+    # return img_out
+
+  def to_html(self):
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    import tabulate
+    from au import plotting as aupl
+    table = [
+      [attr, to_preformatted(getattr(self, attr))]
+      for attr in (
+        'camera_name',
+        'timestamp',
+        'cam_from_ego',
+        'K',
+        'principal_axis_in_ego')
+    ]
+    html = tabulate.tabulate(table, tablefmt='html')
+
+    image = self.image
+    if util.np_truthy(image):
+      table = [
+        ['<b>Image</b>'],
+        [aupl.img_to_img_tag(image, display_viewport_hw=(1000, 1000))],
+      ]
+      html += tabulate.tabulate(table, tablefmt='html')
+
+    if self.clouds:
+      debug_img = np.copy(self.image)
+      for pc in self.clouds:
+        cloud = self.project_ego_to_image(pc.cloud, omit_offscreen=True)
+        aupl.draw_xy_depth_in_image(debug_img, cloud, alpha=0.7)
+      table = [
+        ['<b>Image With Clouds</b>'],
+        [aupl.img_to_img_tag(debug_img, display_viewport_hw=(1000, 1000))],
+      ]
+      html += tabulate.tabulate(table, tablefmt='html')
+
+      # ## HACKS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      rv_images = self.get_cloud_rv_images(self.ALL_RV_IMG_TYPES)
+      for img_type, image in rv_images.items():
+        image = image.astype(np.uint8)
+        table = [
+          ['<b>RV Image: %s</b>' % img_type],
+          [aupl.img_to_img_tag(image, display_viewport_hw=(1000, 1000))],
+        ]
+        html += tabulate.tabulate(table, tablefmt='html')
+    
+    if self.bboxes:
+      debug_img = np.copy(self.image)
+      for bbox in self.bboxes:
+        bbox.draw_in_image(debug_img)
+      table = [
+        ['<b>Image With Boxes</b>'],
+        [aupl.img_to_img_tag(debug_img, display_viewport_hw=(1000, 1000))],
+      ]
+      html += tabulate.tabulate(table, tablefmt='html')
+
+      html += '<br /><b>Boxes</b><br />'
+      table = [
+        [aupl.img_to_img_tag(
+            bbox.get_crop(image),
+            image_viewport_hw=(300, 300)),
+         bbox.to_html() + '<br /><hr />']
+        for bbox in self.bboxes
+      ]
+      html += tabulate.tabulate(table, tablefmt='html')
+
+    return html
+
+
+  def to_plotly_world_frame_3d(self, frustum_size_meters=0.1):
+    
+    corners = np.array([
+      [0, 0],
+      [self.width, 0],
+      [self.width, self.height],
+      [0, self.height],
+    ])
+
+    f_x = self.K[0, 0]
+    f_y = self.K[1, 1]
+    c_x = self.K[0, 2]
+    c_y = self.K[1, 2]
+
+    rays_xy_cam = (corners - np.array([c_x, c_y])) / np.array([f_x, f_y])
+    rays_xyz_cam = np.hstack([rays_xy_cam, np.ones((4, 1))])
+    rays_xyz_cam *= frustum_size_meters
+
+    cam_pts_cam = np.vstack([rays_xyz_cam, np.array([0, 0, 0])])
+    cam_frame_pts_cam = np.array([
+      [1., 0., 0.], # x hat
+      [0., 1., 0.], # y hat
+      [0., 0., 1.], # z hat
+    ])
+    cam_frame_pts_cam *= frustum_size_meters
+
+    T_ego_from_sensor = self.ego_to_sensor[self.sensor_name, 'ego']
+    cam_pts_ego = T_ego_from_sensor.apply(cam_pts_cam).T
+    cam_frame_pts_ego = T_ego_from_sensor.apply(cam_frame_pts_cam).T
+
+    T_world_from_ego = self.ego_pose['ego', 'world']
+    cam_pts_world = T_world_from_ego.apply(cam_pts_ego).T
+    cam_frame_pts_world = T_world_from_ego.apply(cam_frame_pts_ego).T
+
+    frustum_corners = [
+      cam_pts_world[0, :],
+      cam_pts_world[1, :],
+      cam_pts_world[2, :],
+      cam_pts_world[3, :],
+    ]
+    cam_frame_x_hat = cam_frame_pts_world[0, :]
+    cam_frame_y_hat = cam_frame_pts_world[1, :]
+    cam_frame_z_hat = cam_frame_pts_world[2, :]
+    cam_center = cam_pts_world[-1, :]
+
+    lines = []
+    colors = []
+
+    def make_line(pts):
+      return [None] + [list(p) for p in (pts + [pts[0]])] + [None]
+    def to_css_color(rgb):
+      r, g, b = np.clip(rgb, 0, 255).astype(int).tolist()
+      return 'rgb(%s,%s,%s)' % (r, g, b)
+    def add_color(c, n):
+      colors.extend(['rgb(0,0,0)'] + (n-2) * [to_css_color(c)] + ['rgb(0,0,0)'])
+
+    # lines from cam center to frustum corners
+    for corner in frustum_corners:
+      l = make_line([cam_center, corner])
+      lines.append(l)
+      add_color((255, 255, 0), len(l))
+    
+    # lines around square of frustum
+    for i in range(4):
+      start = frustum_corners[i]
+      end = frustum_corners[(i + 1) % 4]
+
+      l = make_line([start, end])
+      lines.append(l)
+      add_color((125, 125, 0), len(l))
+    
+    # R, G, B lines to show cam x-hat, y-hat, z-hat
+    l = make_line([cam_center, cam_frame_x_hat])
+    lines.append(l)
+    add_color((255, 0, 0), len(l))
+    l = make_line([cam_center, cam_frame_y_hat])
+    lines.append(l)
+    add_color((0, 255, 0), len(l))
+    l = make_line([cam_center, cam_frame_z_hat])
+    lines.append(l)
+    add_color((0, 0, 255), len(l))
+
+    import plotly
+    import plotly.graph_objects as go
+    def to_line_vals(idx, lines):
+      import itertools
+      ipts = itertools.chain.from_iterable(lines)
+      return [(pt[idx] if pt is not None else pt) for pt in ipts]
+    lines_plot = go.Scatter3d(
+                    name=str(self.timestamp),
+                    x=to_line_vals(0, lines),
+                    y=to_line_vals(1, lines),
+                    z=to_line_vals(2, lines),
+                    mode='lines',
+                    line=dict(width=3, color=colors))
+
+    return lines_plot
+
+  def to_trimeshes_world_frame(
+         self,
+         frustum_meters=.1,
+         include_thumnail=True,
+         thumb_height_pixels=128,
+         thumb_height_meters=0.1,
+         thumb_offset_meters=0.1,
+         thumb_thickness_meters=0.001,
+         thumb_alpha=0.75):
+    
+    import io
+    import trimesh
+    import shapely # Transitive requirement for frustum marker
+
+    T_ego_from_sensor = self.ego_to_sensor[self.sensor_name, 'ego']
+    T_world_from_ego = self.ego_pose['ego', 'world']
+    
+    # errrr wait 
+    w2c = T_world_from_ego @ T_ego_from_sensor
+    w2c = w2c.get_transformation_matrix(homogeneous=True)
+
+    meshes = []
+
+    # Create camera marker
+    fov_h, fov_v = self.get_fov()
+    fov_h_deg = fov_h * 180. / math.pi
+    fov_v_deg = fov_v * 180. / math.pi
+    cam = trimesh.creation.camera_marker(
+                  trimesh.scene.Camera(
+                      fov=(fov_h_deg, fov_v_deg)),
+                  marker_height=frustum_meters) # Actually also frustum depth
+    # cam[1].colors = [[.5, .5, .5, 1.]] * 5
+      # Actually frustum color
+
+    for m in cam:
+      m.apply_transform(w2c)
+      meshes.append(m)
+
+    if include_thumnail:
+      # Create the thumbnail image and texture
+      from PIL import Image
+      import imageio
+      import cv2
+
+      debug = self.image
+      debug = debug.astype('uint8')
+      
+      h = thumb_height_pixels
+      aspect = debug.shape[1] / debug.shape[0]
+      w = int(aspect * h)
+
+      debug = cv2.resize(debug, (w, h))
+      
+      buf = io.BytesIO()
+      imageio.imwrite(buf, debug, format='jpg', quality=75)
+      buf.seek(0)
+      pil_img = Image.open(buf)
+      # thumb_material = trimesh.visual.material.PBRMaterial(
+      #               baseColorTexture=pil_img.copy(),
+      #               baseColorFactor=[1.0, 1.0, 1.0, thumb_alpha],
+      #               alphaMode="BLEND",
+      #               doubleSided=True,
+      #               alphaCutoff=0.01)
+      thumb_material = trimesh.visual.material.SimpleMaterial(
+                    image=pil_img.copy(),
+                    # baseColorFactor=[1.0, 1.0, 1.0, thumb_alpha],
+                    # alphaMode="BLEND",
+                    doubleSided=True)
+                    # ,
+                    # alphaCutoff=0.01)
+      
+      # Create thumnail mesh
+      thumb_h = thumb_height_meters
+      thumb_w = aspect * thumb_h
+      thumb_RT = np.eye(4, 4)
+      thumb_RT[2, 3] -= thumb_offset_meters
+      thumb_mesh = trimesh.creation.box(
+                      extents=[thumb_w, thumb_h, thumb_thickness_meters],
+                      transform=thumb_RT)
+      thumb_mesh.visual.face_colors = (1., 1., 1.)
+
+      thumb_mesh.visual.material = thumb_material
+      thumb_mesh.visual.uv = np.zeros((len(thumb_mesh.vertices), 2))
+
+      # -z face
+      thumb_mesh.visual.uv[0] = [0, 1] # include a ud flip
+      thumb_mesh.visual.uv[4] = [1, 1]
+      thumb_mesh.visual.uv[2] = [0, 0]
+      thumb_mesh.visual.uv[6] = [1, 0]
+
+      # +z face
+      thumb_mesh.visual.uv[1] = [0, 1] # include a ud flip
+      thumb_mesh.visual.uv[5] = [1, 1]
+      thumb_mesh.visual.uv[3] = [0, 0]
+      thumb_mesh.visual.uv[7] = [1, 0]
+
+      thumb_mesh.apply_transform(w2c)
+      meshes.append(thumb_mesh)
+  
+    return meshes
+
+
+# show_html(html)
+
+# import numpy as np
+# import plotly.graph_objects as go
+# import skimage.io as sio
+
+# x = np.linspace(-2,2, 128)
+# x, z = np.meshgrid(x,x)
+# y = np.sin(x**2*z)
+
+# fig = go.Figure(go.Surface(x=x, y=y, z=z,
+#                            colorscale='RdBu', 
+#                            showscale=False))
+# image = sio.imread ("https://raw.githubusercontent.com/empet/Discrete-Arnold-map/master/Images/cat-128.jpg") 
+# print(image.shape)
+# img = imag[:,:, 1] 
+# Y = 0.5 * np.ones(y.shape)
+# fig.add_surface(x=x, y=Y, z=z, 
+#                 surfacecolor=np.flipud(img), 
+#                 colorscale='matter_r', 
+#                 showscale=False)
+# fig.update_layout(width=600, height=600, 
+#                   scene_camera_eye_z=0.6, 
+#                   scene_aspectratio=dict(x=0.9, y=1, z=1));
+# fig.show()
+
+
+# @attr.s(slots=True, eq=False, weakref_slot=False)
+# class CameraVideo(object):
+#   """A video file event from a camera; the camera could be calibrated.  The video
+#   might also have a depth channel (i.e. RGB-D video)."""
+
+#   sensor_name = attr.ib(type=str, default='')
+#   """str: Name of the camera, e.g. camera_front"""
+
+#   video_bytes = attr.ib(type=bytearray, default=bytearray())
+#   """bytearray: Buffer of video data (rare; use `imageio` to sniff for
+#   video type)"""
+
+#   video_uri = attr.ib(type=str, default='')
+
+#   iter_image_factory = attr.ib(
+#       type=CloudpickeledCallable,
+#       converter=CloudpickeledCallable,
+#       default=None)
+#   """CloudpickeledCallable: A serializable factory function that emits a
+#   stream of HWC numpy array images"""
+
+#   width = attr.ib(type=int, default=0, validator=None)
+#   """int: Width of images in pixels"""
+
+#   height = attr.ib(type=int, default=0, validator=None)
+#   """int: Height of images in pixels"""
+
+#   start_timestamp = attr.ib(type=int, default=0)
+#   """int: Timestamp associated with the start of this video; typically a Unix stamp in
+#   nanoseconds."""
+
+#   end_timestamp = attr.ib(type=int, default=0)
+#   """int: Timestamp associated with the start of this video; typically a Unix stamp in
+#   nanoseconds."""
+
+#   K = attr.ib(type=np.ndarray, default=np.eye(3, 3))
+#   """numpy.ndarray: The 3x3 intrinsic calibration camera matrix"""
+
+#   distortion_model = attr.ib(type=str, default="")
+#   """str: Optional distortion model, e.g. OPENCV"""
+
+#   distortion_kv = attr.ib(default={}, type=typing.Dict[str, float])
+#   """Dict[str, float]: A map of distortion parameter name -> distortion paramte
+#   value.  E.g. for OPENCV there might be entries for k1, k2, p1, p2."""
+
+#   channel_names = attr.ib(default=['r', 'g', 'b'])
+#   """List[str]: Semantic names for the channels (or dimensions / attributes)
+#   of the image. By default, the `image` member uses `imageio` to read an
+#   3-channel RGB image as a HWC array.  (Some PNGs could use an alpha channel
+#   to produce an RGBA image).  In the case of depth images, one of the channels
+#   (usually the first) decodes as depth in meters."""
+
+#   extra = attr.ib(default={}, type=typing.Dict[str, str])
+#   """Dict[str, str]: A map for adhoc extra context"""
+
+
+
+
+
+# @attr.s(slots=True, eq=False, weakref_slot=False)
+# class CameraVideoEvent(object):
+#   """A video file event from a camera; the camera could be calibrated.  The video
+#   might also have a depth channel (i.e. RGB-D video).  A segment may have
+#   *two* datums for video: one at the start and one at the end."""
+
+#   is_start = attr.ib(type=bool, default=True)
+#   """bool: This datum denotes the start of a video sequence; there might be
+#   a separate datum at """
+
diff --git a/psegs/datum/cuboid.py b/psegs/datum/cuboid.py
new file mode 100644
index 0000000..b105080
--- /dev/null
+++ b/psegs/datum/cuboid.py
@@ -0,0 +1,361 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import attr
+import numpy as np
+
+from psegs.datum import datumutils as du
+from psegs.datum.transform import Transform
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class Cuboid(object):
+  """An 8-vertex cuboid"""
+
+  ## Context
+
+  track_id = attr.ib(type=str, default='')
+  """str: String identifier; same object across many frames has same
+  track_id"""
+
+  category_name = attr.ib(type=str, default='')
+  """str: Category of the cuboid, can be using the dataset category domain"""
+
+  ps_category = attr.ib(type=str, default='')
+  """str: `psegs` Category (typically coarser than `category_name`)"""
+
+  timestamp = attr.ib(type=int, default=0)
+  """int: Timestamp associated with this cuboid; typically a Unix stamp in
+  nanoseconds.  Probably a Lidar timestamp."""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  ## Cuboid orientation and size
+
+  length_meters = attr.ib(type=float, default=0.)
+  """float: Length in ego frame, where +x is forward"""
+
+  width_meters = attr.ib(type=float, default=0.)
+  """float: Width in ego frame, where +y is left"""
+  
+  height_meters = attr.ib(type=float, default=0.)
+  """float: Height in ego frame, where +z is up"""
+
+  obj_from_ego = attr.ib(type=Transform, default=Transform())
+  """Transform: From center of cuboid frame to ego / robot frame"""
+
+  ego_pose = attr.ib(type=Transform, default=Transform())
+  """Transform: From world to ego / robot frame at the cuboid's `timestamp`"""
+
+
+  # ## Extra Context
+
+  # distance_meters = attr.ib(type=float, default=0.)
+  # """float: Distance from ego / robot to closest cuboid point"""
+
+  # ## In robot / ego frame
+  #   'length_meters',        # Cuboid frame: +x forward
+  #   'width_meters',         #               +y left
+  #   'height_meters',        #               +z up    
+  #   'distance_meters',      # Dist from ego to closest cuboid point
+
+
+  #   ## Points # TODO keep ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #   'box3d',                # Points in ego / robot frame defining the cuboid.
+  #                           # Given in order:
+  #                           #   (+x +y +z)  [Front face CW about +x axis]
+  #                           #   (+x -y +z)
+  #                           #   (+x -y -z)
+  #                           #   (+x +y -z)
+  #                           #   (-x +y +z)  [Rear face CW about +x axis]
+  #                           #   (-x -y +z)
+  #                           #   (-x -y -z)
+  #                           #   (-x +y -z)
+  #   'motion_corrected',     # Is `3d_box` corrected for ego motion?
+
+  #   ## In robot / ego frame
+  #   'length_meters',        # Cuboid frame: +x forward
+  #   'width_meters',         #               +y left
+  #   'height_meters',        #               +z up    
+  #   'distance_meters',      # Dist from ego to closest cuboid point
+    
+  #   # TODO
+  #   # 'yaw',                  # +yaw to the left (right-handed)
+  #   # 'pitch',                # +pitch up from horizon
+  #   # 'roll',                 # +roll towards y axis (?); usually 0
+
+  #   'obj_from_ego',         # type: Transform from ego / robot frame to object
+  #   'ego_pose',             # type: Transform (ego from world)
+    
+  #   'extra',                # type: string -> string extra metadata
+
+  # __slots__ = (
+  #   ## Core
+  #   'track_id',             # String identifier; same object across many frames
+  #                           #   has same track_id
+    
+  # )
+
+  # def __init__(self, **kwargs):
+  #   _set_defaults(self, kwargs, {})
+  #     # Default all to None
+
+  # def __eq__(self, other):
+  #   return _slotted_eq(self, other)
+
+  @classmethod
+  def merge_extras(cls, e1, e2):
+    merged = dict(e1)
+    for k, v in e2.items():
+      if k == 'motion_corrected':
+        merged[k] = str(bool(merged.get('motion_corrected')) or bool(v))
+      else:
+        merged[k] = v
+    return merged
+
+  def get_box3d(self):
+    """Return the 3d box in ego / robot frame defining the cuboid.
+        Given in order:
+            (+x +y +z)  [Front face CW about +x axis]
+            (+x -y +z)
+            (+x -y -z)
+            (+x +y -z)
+            (-x +y +z)  [Rear face CW about +x axis]
+            (-x -y +z)
+            (-x -y -z)
+            (-x +y -z)
+    """
+    l, w, h = self.length_meters, self.width_meters, self.height_meters
+    CORNERS_IN_CUBE_FRAME = .5 * np.array([
+                  [ l,  w,  h],  # Front
+                  [ l, -w,  h],
+                  [ l, -w, -h],
+                  [ l,  w, -h],
+
+                  [-l,  w,  h],  # Back
+                  [-l, -w,  h],
+                  [-l, -w, -h],
+                  [-l,  w, -h],
+    ])
+
+    to_ego = self.obj_from_ego['ego', 'obj']
+    corners_in_ego = to_ego.apply(CORNERS_IN_CUBE_FRAME)
+    return corners_in_ego.T
+
+  def to_html(self):
+    import tabulate
+    table = [
+      [attr, du.to_preformatted(getattr(self, attr))]
+      for attr in self.__slots__
+    ]
+    return tabulate.tabulate(table, tablefmt='html')
+
+  @classmethod
+  def get_merged(cls, c1, c2, mode='union', alpha=None):
+    """Return a new cuboid via merging `c1` and `c2`.
+
+    Args:
+      c1 (Cuboid): Merge this cuboid with `c2`. Retain category and other
+        context of `c1`.  
+      c2 (Cuboid): Merge this cuboid with `c1`.
+      mode (str): Merging mode. Choices:
+        `union`: Pick a mean position and orientation and scale to fit points
+        of both `c1` and `c2`.  Use to merge two objects (e.g. bicycle and its
+        rider)
+        `interpolate`: Interpolate (using `alpha`) between the positions and
+        orientations of `c1` and `c2` and use the size of `c1`.  Use to 
+        compute the interpolated position / cuboid of a track between positions
+        at time `c1.timestamp` and `c2.timestamp`.
+      alpha (float, optional): For interpolation, weight `c1` with 1-`alpha`
+        and `c2` with `alpha`, where `alpha in [0, 1]`
+    
+    Returns:
+      Cuboid: The merged cuboid
+    """
+    
+    ## Find new box3d, maintaining orientation of old box.
+    # Step 1: Compute mean centroid and pose
+    if mode == 'union':
+      alpha = 0.5
+
+    c1_obj_from_ego = c1.obj_from_ego['ego', 'obj']
+    c2_obj_from_ego = c2.obj_from_ego['ego', 'obj']
+
+    merged_translation = (
+      (1 - alpha) * c1_obj_from_ego.translation + 
+      alpha * c2_obj_from_ego.translation)
+        # NB: use alpha blend consistent with the definition of Slerp
+
+    from scipy.spatial.transform import Rotation as R
+    from scipy.spatial.transform import Slerp
+
+    # # DELETEME WHEN NEW DATUMS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # c1.obj_from_ego.rotation = np.reshape(c1.obj_from_ego.rotation, (3,3))
+    # c2.obj_from_ego.rotation = np.reshape(c2.obj_from_ego.rotation, (3,3))
+
+    rots = R.from_matrix([
+      c1_obj_from_ego.rotation,
+      c2_obj_from_ego.rotation,
+    ])
+    slerp = Slerp([0, 1], rots)
+    merged_rot = slerp([alpha]).as_matrix()
+
+    merged_transform = Transform(
+      rotation=merged_rot,
+      translation=merged_translation,
+      dest_frame='obj',
+      src_frame='ego',
+    )
+
+    # Step 2: Compute cuboid bounds given new pose
+    if mode == 'union':
+      # Project all the points of the cubes `c1` and `c2` into the new
+      # merged frame
+      all_pts_in_ego = np.concatenate((c1.get_box3d(), c2.get_box3d()))
+      to_merged = merged_transform['obj', 'ego']
+      all_pts_in_merged = to_merged.apply(all_pts_in_ego).T
+
+      lwh = all_pts_in_merged.max(axis=0) - all_pts_in_merged.min(axis=0)
+      length, width, height = lwh.tolist()
+
+      # length = all_pts_in_merged[:,0].max() - all_pts_in_merged[:,0].min()
+      # width = c1.width_meters
+      # height = c1.height_meters
+
+      # # A cube with each corner touches a point of unity in each dimension
+      # UNIT_CUBE = np.array([
+      #               [ 1,  1.,  1.],  # Front
+      #               [ 1, -1.,  1.],
+      #               [ 1, -1., -1.],
+      #               [ 1,  1., -1.],
+
+      #               [-1,  1.,  1.],  # Back
+      #               [-1, -1.,  1.],
+      #               [-1, -1., -1.],
+      #               [-1,  1., -1.],
+      # ])
+
+      # # Send the unit cube into the object frame
+      # cube_in_merged_frame = merged_transform['ego', 'obj'].apply(UNIT_CUBE).T
+      # # import pdb; pdb.set_trace()
+      # # Stretch the cuboid to fit all points
+      # all_pts = np.concatenate((c1.get_box3d(), c2.get_box3d()))
+      # merged_box3d = []
+      # for i in range(8):
+      #   corner = cube_in_merged_frame[i, :3]
+      #   corner /= np.linalg.norm(corner)
+      #   merged_box3d.append(
+      #     # Scale corner by the existing point with the greatest projection
+      #     corner * all_pts.dot(corner).max()
+      #   )
+      #   # import pdb; pdb.set_trace()
+      # merged_box3d = np.array(merged_box3d)
+    
+    elif mode == 'interpolate':
+      # Just fit the box from the first cuboid; assume the track is not
+      # deformable
+      length = c1.length_meters
+      width = c1.width_meters
+      height = c1.height_meters
+      # radius = 0.5 * np.array([
+      #   c1.length_meters, c1.width_meters, c1.height_meters])
+      # box_in_cube_frame = UNIT_CUBE * radius
+      # merged_box3d = merged_transform.apply(box_in_cube_frame).T
+
+    else:
+      raise ValueError(mode)
+    
+    # width = np.linalg.norm(merged_box3d[1] - merged_box3d[0])
+    # length = np.linalg.norm(merged_box3d[4] - merged_box3d[0])
+    # height = np.linalg.norm(merged_box3d[3] - merged_box3d[0])
+
+    timestamp = c1.timestamp
+    if mode == 'interpolate':
+      diff = abs(c1.timestamp - c2.timestamp)
+      timestamp += int((1 - alpha) * diff)
+
+    return Cuboid(
+      track_id=c1.track_id + '-' + mode + '-' + c2.track_id,
+      category_name=c1.category_name,
+      ps_category=c1.ps_category,
+      timestamp=timestamp,
+      # box3d=merged_box3d,~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      length_meters=float(length),
+      width_meters=float(width),
+      height_meters=float(height),
+      # distance_meters=float(np.min(np.linalg.norm(merged_box3d, axis=-1))),~~~~~~~~
+      obj_from_ego=merged_transform,
+      extra=cls.merge_extras(c1.extra, c2.extra),
+    )
+
+  @classmethod
+  def get_interpolated(cls, cuboids, target_timestamp, allow_future=False):
+    """For each distrinct track in `cuboids`, return a single cuboid
+    interpolated to have estimated pose at time `target_timestamp` based
+    on cuboids before and after that target.  If the track does not have
+    cuboids that straddle `target_timestamp`, then return the most
+    recent cuboid, if there is one.  If `allow_future`, then return the
+    cuboid closest in time, even if it's in the future (i.e. `cuboid.timestamp`
+    is after [greater than] `target_timestamp`)."""
+
+    track_id_to_cuboid = {}
+    for cuboid in cuboids:
+      track_id = cuboid.track_id
+      track_id_to_cuboid.setdefault(track_id, [])
+      track_id_to_cuboid[track_id].append(cuboid)
+    
+    cuboids_out = []
+    for track_id in track_id_to_cuboid.keys():
+      cuboids = track_id_to_cuboid[track_id]
+
+      ## Nothing to interpolate
+      if len(cuboids) == 1:
+        c = cuboids[0]
+        if c.timestamp < target_timestamp or allow_future:
+          cuboids_out.append(c)
+        continue
+      
+      ## Are there cuboids straddling `target_timestamp`?
+      diff_cuboid = [(target_timestamp - c.timestamp, c) for c in cuboids]
+      before = None
+      after = None
+      for c in cuboids:
+        diff_t = target_timestamp - c.timestamp
+        if diff_t <= 0:
+          if not before or diff_t < abs(target_timestamp - before.timestamp):
+            before = c
+        else: # diff_t > 0; for after we use strictly after
+          if not after or diff_t < abs(target_timestamp - after.timestamp):
+            after = c
+
+      if before is None:
+        if allow_future:
+          cuboids_out.append(after)
+        continue
+      if after is None:
+        cuboids_out.append(before)
+        continue
+      
+      ## Interpolate!
+      alpha = (
+        float(target_timestamp - before.timestamp) / 
+          (after.timestamp - before.timestamp))
+      assert 0 <= alpha <= 1, alpha
+      interpolated = Cuboid.get_merged(
+                        after, before, mode='interpolate', alpha=alpha)
+      cuboids_out.append(interpolated)
+    return cuboids_out
diff --git a/psegs/datum/datumutils.py b/psegs/datum/datumutils.py
new file mode 100644
index 0000000..126994a
--- /dev/null
+++ b/psegs/datum/datumutils.py
@@ -0,0 +1,123 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+import numpy as np
+
+from psegs.util import misc
+
+
+##############################################################################
+## Misc
+
+def maybe_make_homogeneous(pts, dim=3):
+  """Convert numpy n-by-d array `pts` to Homogeneous coordinates of target
+  `dim` if necessary"""
+  if pts.shape[-1] != (dim + 1):
+    pts = np.hstack((pts, np.ones((pts.shape[0], 1))))
+  return pts
+
+def l2_normalized(v):
+  if len(v.shape) > 1:
+    # Normalize row-wise
+    return v / np.linalg.norm(v, axis=1)[:, np.newaxis]
+  else:
+    return v / np.linalg.norm(v)
+
+def theta_signed(axis, v):
+  return np.arctan2(np.cross(axis, v), np.dot(axis, v.T))
+
+def to_preformatted(v):
+  import pprint
+  import html
+  return '<pre>%s</pre>' % html.escape(pprint.pformat(v))
+
+
+
+##############################################################################
+## Datum Diffing
+
+def datum_to_diffable_tree(datum):
+  """Given a `StampedDatum` instance, return a diff-able tree for use
+  in verification / diffing.
+
+  To efficiently diff `StampedDatum`s, we use the following approach:
+    1) Most datums are attrs-based classes without (auto-generated) equality
+        methods, so we compare datums in a dict-like form.
+    2) Many datums have numpy arrays, and those are not easily comparable. 
+        However, the OarphPy `RowAdapter`-ified form of a `numpy` array (i.e.
+        `oarphpy.spark.Tensor`) is easily comparable.  
+    3) Some datums have embedded `oarphpy.spark.CloudpickeledCallable`
+        instances, and these might have local filesystem paths embedded and
+        thus are not directly comparable.  For these, we can only diff the
+        function name.
+    4) For any binary data fields, we just want to compare the hashes of
+        the data.
+  """
+
+  import hashlib
+
+  from oarphpy.spark import RowAdapter
+
+
+  def to_sha1_str(v):
+    return 'SHA1:' + hashlib.sha1(v).hexdigest()
+      # Give a prefix so diffs make more sense
+
+  def cpc_get_pyclass(cpc):
+    # `cpc`` is the dict form of a Row-ified CloudpickeledCallable
+    if cpc is not None:
+      return 'CloudpickeledCallable:func_pyclass=' + cpc['func_pyclass']
+        # Give a prefix so diffs make more sense
+    return None
+
+  DATUM_MEMBER_TO_FIELD_FORMATTER = {
+    'camera_image': {
+      'image_jpeg': to_sha1_str,
+      'image_png': to_sha1_str,
+      'image_factory': cpc_get_pyclass,
+    },
+    'point_cloud': {
+      'cloud_factory': cpc_get_pyclass,
+    }
+  }
+
+  row = RowAdapter.to_row(datum)
+  rowdict = row.asDict(recursive=True)
+  for membername, field_to_formatter in DATUM_MEMBER_TO_FIELD_FORMATTER.items():
+    if rowdict[membername] is not None:
+      d = rowdict[membername]
+      for fieldname, formatter in field_to_formatter.items():
+        d[fieldname] = formatter(d[fieldname])
+  return rowdict
+
+
+def get_datum_diff_string(sd1, sd2):
+  """Return a string showing the diff between `StampedDatum`s `sd1` and
+  `sd2` (if any).
+  """
+  tree1 = datum_to_diffable_tree(sd1)
+  tree2 = datum_to_diffable_tree(sd2)
+  if tree1 == tree2:
+    return ''
+  else:
+    return misc.diff_of_pprint(tree1, tree2)
+  
+
+
+
+
+
+
diff --git a/psegs/datum/frame.py b/psegs/datum/frame.py
new file mode 100644
index 0000000..b1b4580
--- /dev/null
+++ b/psegs/datum/frame.py
@@ -0,0 +1,169 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import typing
+
+import attr
+
+from psegs.datum.uri import DatumSelection
+from psegs.datum.uri import URI
+from psegs.datum.stamped_datum import StampedDatum
+
+# TODO NOPE!!!   USE SAMPLE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class Frame(object):
+  """A `Frame` is a group of :class:`~psegs.datum.stamped_datum.StampedDatum`
+  instances that centers around a single event or purpose.  For example, a
+  `Frame` may group all datums around a labels for a specific timestamp; in
+  particular, a `Frame` may be used to synchronized camera, lidar, and label
+  data.
+  
+  Notes:
+   * `Frame`s are intended to be a utility for serialized `StampedDatum`s
+     rather than serialized themselves.
+  """
+
+  datums = attr.ib(type=typing.List[StampedDatum], default=[])
+  """List[StampedDatum]: All datums associated with this `Frame`"""
+
+  uri = attr.ib(type=URI, default=None)
+  """URI: The URI addressing this frame (and group of datums)"""
+
+  def __attrs_post_init__(self):
+    if not self.uri:
+      if self.datums:
+        # Note this is not safe ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        base_uri = sorted(self.datums)[0].uri
+        self.uri = copy.deepcopy(base_uri)
+    
+    if self.uri and not self.uri.sel_datums:
+      self.uri.sel_datums = DatumSelection.selections_from_value(self.datums)
+
+  # @property
+  # def uri(self):
+  #   kwargs = dict((attr, getattr(self, attr)) for attr in URI.__slots__)
+  #   kwargs['sel_datums'] = DatumSelection.selections_from_value(self.datums)
+  #   return URI(**kwargs)
+
+
+  ## Topic selectors
+
+  def topic_datums(self, topic=None, prefix=None):
+    """Return all `StampedDatum` instances for the given topic.
+
+    Args:
+      topic (str): Select all datums from this topic, e.g. `camera|front`.
+      prefix (str): Select all datums with this topic prefix; E.g.
+        `camera` selects `camera|front` and `camera|back`.
+    
+    Returns
+      List[StampedDatum]: The selected datums
+    """
+    
+    def is_from_topic(datum):
+      if topic is not None:
+        return datum.topic == topic
+      elif prefix is not None:
+        return datum.topic.startswith(prefix)
+      else:
+        raise ValueError("Must specify a topic or prefix")
+    
+    return [
+      sd for sd in self.datums
+      if is_from_topic(sd)
+    ]
+
+  @property
+  def ego_poses(self):
+    """Normalized selector for the `ego_pose`
+    :class:`~psegs.datum.transform.Transform` canonical topic.
+    Returns a list of transforms.
+    """
+    return [
+      sd.transform for sd in self.topic_datums(topic='ego_pose')
+    ]
+  
+  @property
+  def camera_images(self):
+    """Normalized selector for all camera
+    :class:`~psegs.datum.camera_image.CameraImage` canonical topics.
+    Returns a list of camera images.
+    """
+    return [
+      sd.camera_image for sd in self.topic_datums(prefix='camera')
+    ]
+  
+  @property
+  def lidar_clouds(self):
+    """Normalized selector for all lidar
+    :class:`~psegs.datum.point_cloud.PointCloud` canonical topics.
+    Returns a list of point clouds.
+    """
+    return [
+      sd.point_cloud for sd in self.topic_datums(prefix='lidar')
+    ]
+  
+  @property
+  def cuboid_labels(self):
+    """Normalized selector for the *label* :class:`~psegs.datum.Cuboid`
+    canonical topic.  Returns a list of cuboids flattened from all available
+    datums.
+    """
+    return list(itertools.chain.from_iterable(
+      sd.cuboids for sd in self.topic_datums(topic='labels|cuboids')))
+
+  # def to_html(self):
+  #   from datetime import datetime
+  #   import tabulate
+  #   import pprint
+  #   uri = self.uri
+    
+  #   def get_topic_offset_html(datums):
+  #     topic_time = [(d.topic, d.timestamp) for d in datums]
+  #     topic_time.sort(key=lambda t: -t[-1])
+  #     end = 0
+  #     if topic_time:
+  #       end = topic_time[0][-1]
+  #     table = [['Topic', 'Relative to Oldest Datum (msec)']]
+  #     table += [[topic, '-%5.2f' % (1e-6 * (end - t))] for topic, t in topic_time]
+  #     return tabulate.tabulate(table, tablefmt='html')
+
+  #   table = [
+  #     ['URI', to_preformatted(uri)],
+  #     ['Timestamp', 
+  #       datetime.utcfromtimestamp(uri.timestamp * 1e-9).strftime('%Y-%m-%d %H:%M:%S')],
+  #     ['Extra', to_preformatted(self.extra)],
+  #     ['Datums', to_preformatted(sorted(str(d.uri) for d in self.datums))],
+  #     ['Offsets', get_topic_offset_html(self.datums)],
+  #   ]
+  #   html = tabulate.tabulate(table, tablefmt='html')
+  #   table = [['<h2>Camera Images</h2>']]
+  #   for c in self.camera_images:
+  #     c = copy.deepcopy(c)
+  #     # TODO: find a way to get rid of clouds from camera_image ~~~~~~~~~~~~~~~~~~~~~
+  #     c.clouds += self.lidar_clouds
+  #     for cuboid in self.cuboids:
+  #       bbox = c.project_cuboid_to_bbox(cuboid)
+  #       if not bbox.is_visible:
+  #         continue
+  #       c.bboxes.append(bbox)
+  #     table += [[c.to_html()]]
+    
+  #   table += [['<h2>Point Clouds</h2>']]
+  #   for c in self.lidar_clouds:
+  #     table += [[c.to_html(cuboids=self.cuboids)]]
+
+  #   html += tabulate.tabulate(table, tablefmt='html')
+  #   return html
diff --git a/psegs/datum/matched_pair.py b/psegs/datum/matched_pair.py
new file mode 100644
index 0000000..8a5b5ac
--- /dev/null
+++ b/psegs/datum/matched_pair.py
@@ -0,0 +1,521 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+from pathlib import Path
+
+import attr
+import numpy as np
+
+from oarphpy.spark import CloudpickeledCallable
+
+from psegs.datum.camera_image import CameraImage
+from psegs.datum.point_cloud import PointCloud
+from psegs.datum.transform import Transform
+from psegs.util import plotting as pspl
+from psegs.util import misc
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class MatchedPair(object):
+  """A pair of `CameraImages` with pixelwise matches"""
+
+  matcher_name = attr.ib(type=str, default='')
+  """str: Name of the match source, e.g. SIFT_matches; could be identical to
+  the topic name or topic suffix."""
+
+  timestamp = attr.ib(type=int, default=0)
+  """int: Timestamp associated with this matched pair; use the timestamp
+  of `img1` or `img2` or the wall time of matching."""
+
+  img1 = attr.ib(default=None, type=CameraImage)
+  """CameraImage: The first (left, source) image"""
+
+  img2 = attr.ib(default=None, type=CameraImage)
+  """CameraImage: The second (right, target) image"""
+
+  matches_array = attr.ib(type=np.ndarray, default=None)
+  """numpy.ndarray: Matches as an n-by-d matrix (where `d` is *at least*
+  4, i.e. (img1 x, img1 y, img2 x, img2 y))."""
+
+  matches_factory = attr.ib(
+    type=CloudpickeledCallable,
+    converter=CloudpickeledCallable,
+    default=None)
+  """CloudpickeledCallable: A serializable factory function that emits the
+  values for `matches_array` (if a realized array cannot be provided)"""
+
+  matches_colnames = attr.ib(default=['x1', 'y1', 'x2', 'y2'])
+  """List[str]: Semantic names for the columns (or dimensions / attributes)
+  of the `matches_array`.  Typically matches are just 2D point pairs, but
+  match data can include confidence, occlusion state, track ID, and/or 
+  other data."""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  def get_matches(self):
+    if self.matches_array is not None:
+      return self.matches_array
+    elif self.matches_factory != CloudpickeledCallable.empty():
+      return self.matches_factory()
+    else:
+      raise ValueError("No matches data!")
+
+  def get_col_idx(self, colname):
+    for i in range(len(self.matches_colnames)):
+      if self.matches_colnames[i] == colname:
+        return i
+    raise ValueError(
+      "Colname %s not found in %s" % (colname, self.matches_colnames))
+
+  def get_x1y1x2y2_axes(self):
+    return [
+      self.get_col_idx('x1'),
+      self.get_col_idx('y1'),
+      self.get_col_idx('x2'),
+      self.get_col_idx('y2'),
+    ]
+  
+  def get_other_axes(self):
+    x1y1x2y2c = set(['x1', 'y1', 'x2', 'y2'])
+    all_c = set(self.matches_colnames)
+    other_names = sorted(list(all_c - x1y1x2y2c))
+    other_idx = [self.get_col_idx(n) for n in other_names]
+    return other_names, other_idx
+
+  def get_x1y1x2y2(self):
+    matches = self.get_matches()
+    x1y1x2y2 = matches[:, self.get_x1y1x2y2_axes()]
+    return x1y1x2y2
+
+  def get_x1y1x2y2_extra(self):
+    matches = self.get_matches()
+    other_names, other_idx = self.get_other_axes()
+    cols = self.get_x1y1x2y2_axes() + other_idx
+    x1y1x2y2_extra = matches[:, cols]
+    return x1y1x2y2_extra
+
+  def get_debug_line_image(self):
+    return pspl.create_matches_debug_line_image(
+              self.img1.image,
+              self.img2.image,
+              self.get_x1y1x2y2())
+
+  def get_point_cloud_in_world_frame(self):
+
+    import cv2
+
+    P_1 = self.img1.get_P()
+    P_2 = self.img2.get_P()
+    matches = self.get_matches()
+
+    x1c, y1c, x2c, y2c = self.get_x1y1x2y2_axes()
+    other_names, other_idx = self.get_other_axes()
+    uv_1 = matches[:, [x1c, y1c]]
+    uv_2 = matches[:, [x2c, y2c]]
+
+    if uv_1.shape[0] > 0:
+      xyzh = cv2.triangulatePoints(P_1, P_2, uv_1.T, uv_2.T)
+      xyz = xyzh.T.copy()
+      xyz = xyz[:, :3] / xyz[:, (-1,)]
+    else:
+      xyz = np.zeros((0, 3), dtype=np.float64)
+
+    other_vals = matches[:, other_idx]
+    cloud = np.hstack([xyz, other_vals])
+    return PointCloud(
+              sensor_name=self.matcher_name,
+              ego_to_sensor=Transform(
+                src_frame='ego', dest_frame=self.matcher_name),
+              ego_pose=Transform(
+                src_frame='world', dest_frame='ego'),
+              timestamp=self.timestamp,
+              cloud=cloud,
+              cloud_colnames = ['x', 'y', 'z'] + other_names)
+
+
+def create_stereo_rect_pair_debug_view_html(
+      ci_left,
+      ci_rights=[],
+      lr_matches=[],
+      mp_uris=[],
+      rect_image_wh=None,
+      image_viz_max_size=-1,
+      max_matches_per_pair=10_000,
+      embed_images_root_path='stereo_rect_pair_viz_images',
+      embed_opencv_js=True):
+  
+  import json
+  
+  import attr
+  import cv2
+
+  from oarphpy.plotting import hash_to_rbg
+
+  if lr_matches:
+    assert len(ci_rights) == len(lr_matches), (
+      f"{len(ci_rights)} != len(lr_matches)")
+
+    if mp_uris:
+      assert len(mp_uris) == len(lr_matches), (
+        f"{len(mp_uris)} != len(lr_matches)")
+
+  embed_images_root_path = Path(embed_images_root_path)
+  embed_images_root_path.mkdir(parents=True, exist_ok=True)
+
+  rightImageIdToInfo_entries = []
+  default_right_image_uri = ""
+  for right_id in range(len(ci_rights)):
+    ci_right = ci_rights[right_id]
+
+    rect_image_wh = None or (ci_left.width, ci_right.height)
+    mp_uri = '(unknown)' if not mp_uris else mp_uris[right_id]
+
+    match_left_xy = []
+    match_right_xy = []
+    match_color = []
+    matches = None if not lr_matches else lr_matches[right_id]
+    if matches is not None:
+      rng = np.random.RandomState(1337)
+      n = min(max_matches_per_pair, matches.shape[0])
+      idx = rng.choice(np.arange(matches.shape[0]), n)
+      for mid in idx:
+        x1, y1, x2, y2 = matches[mid, :4]
+        r, g, b = hash_to_rbg(mid)
+        match_left_xy.append((x1, y1))
+        match_right_xy.append((x2, y2))
+        match_color.append((int(b), int(g), int(r)))
+
+    K1 = ci_left.K
+    #RT1 = ci_left.get_world_to_sensor() # FIXME!! this is giving ego to ego :(
+    # https://forum.opencv.org/t/is-it-possible-to-stereorectify-with-externally-calibrated-cameras/8275/7
+    RT1 = ci_left.ego_pose
+
+    K2 = ci_right.K
+    RT2 = ci_right.ego_pose #get_world_to_sensor()
+
+    invRT1 = RT1.get_inverse()
+    invRT1h = invRT1.get_transformation_matrix(homogeneous=True)
+    RT = RT2.get_transformation_matrix(homogeneous=True) @ invRT1h
+    R = RT[:3, :3]
+    T = RT[:3, 3]
+
+    distCoeffs1 = ci_left.get_opencv_distcoeffs()
+    if distCoeffs1 is None:
+      distCoeffs1 = np.array([0., 0., 0., 0.,])
+    distCoeffs2 = ci_right.get_opencv_distcoeffs()
+    if distCoeffs2 is None:
+      distCoeffs2 = np.array([0., 0., 0., 0.,])
+    rect_output = cv2.stereoRectify(
+                    K1, distCoeffs1, K2, distCoeffs2,
+                    (ci_left.width, ci_right.height),
+                    R, T,
+                    newImageSize=rect_image_wh)
+    sR1, sR2, sP1, sP2, sQ, sroi1, sroi2 = rect_output
+
+    right_image_dest = embed_images_root_path / f"right_{right_id}.jpg"
+    right_img = ci_right.image
+    cv2.imwrite(
+      str(right_image_dest),
+      cv2.cvtColor(right_img, cv2.COLOR_RGB2BGR),
+      [int(cv2.IMWRITE_JPEG_QUALITY), 90])
+    right_image_uri = str(right_image_dest)
+    if not default_right_image_uri:
+      default_right_image_uri = right_image_uri
+
+    def _mat2jsstr(mat):
+      nrows = mat.shape[0]
+      ncols = 1
+      if len(mat.shape) > 1:
+        ncols = mat.shape[1]
+      js = f"""(
+        new cv.matFromArray(
+          {nrows},
+          {ncols},
+          cv.CV_32F,
+          {mat.flatten().tolist()}
+        )
+      )
+      """
+      return js
+    
+    def _roundFloats(o, precision=2):
+      """Save a bunch of JSON bytes where precision doesn't matter"""
+      if isinstance(o, float): return round(o, precision)
+      if isinstance(o, (list, tuple)): return [_roundFloats(x) for x in o]
+      return o
+
+    rightImageIdToInfo_entries.append(f"""
+      "{right_id}": // rightImageId
+        {{
+          "rightImageId": {right_id},
+          "rightImageUri": "{right_image_uri}",
+          "K1": {_mat2jsstr(K1)},
+          "K2": {_mat2jsstr(K2)},
+          "sR1": {_mat2jsstr(sR1)},
+          "sR2": {_mat2jsstr(sR2)},
+          "sP1": {_mat2jsstr(sP1)},
+          "sP2": {_mat2jsstr(sP2)},
+          "sroi1": {list(sroi1)},
+          "sroi2": {list(sroi2)},
+          "distCoeffs1": {_mat2jsstr(distCoeffs1)},
+          "distCoeffs2": {_mat2jsstr(distCoeffs2)},
+          "newImageSize": new cv.Size({rect_image_wh[0]}, {rect_image_wh[1]}),
+          "mpURI": "{str(mp_uri)}",
+          "mpURIPretty": ( {json.dumps(attr.asdict(mp_uri, recurse=True))} ),
+          "matchLeftXY": ( {json.dumps(_roundFloats(match_left_xy))} ),
+          "matchRightXY": ( {json.dumps(_roundFloats(match_right_xy))} ),
+          "matchColor": ( {json.dumps(match_color)} )
+        }}
+    """)
+
+  from oarphpy import plotting as opplot
+  left_img = ci_left.image
+  left_img_data_uri = opplot.img_to_data_uri(left_img, format='jpg', jpeg_quality=90)
+
+  stereoRectVizSelectRight_body = "".join(
+    f""" <option value="{i}">Image {i}</option> """
+    for i in range(len(ci_rights))
+  )
+
+  final_html = f"""
+
+  <div id="stereoRectVizRoot">
+
+  <script 
+    async
+    src="https://docs.opencv.org/4.5.5/opencv.js"
+    type="text/javascript">
+  </script>
+  <script type="text/javascript">
+
+    // BEGIN opencv rectifier and load hook
+
+    // Show first right image by default
+    stereoRectVizCurrentRight = "0";
+
+    var Module = {{
+      // https://emscripten.org/docs/api_reference/module.html#Module.onRuntimeInitialized
+      onRuntimeInitialized() {{
+        console.log("StereoRectViz Setup");
+
+        
+        // BEGIN embedded rect variables
+    
+        rightImageIdToInfo = {{
+          { ",".join(rightImageIdToInfo_entries) }
+        }};
+
+        // END embedded rect variables
+
+
+        // BEGIN utils
+
+        showRightImageId = function(i) {{
+          console.log("Showing right image " + i);
+
+          info = rightImageIdToInfo[i];
+          let K1 = info["K1"];
+          let K2 = info["K2"];
+          let sR1 = info["sR1"];
+          let sR2 = info["sR2"];
+          let sP1 = info["sP1"];
+          let sP2 = info["sP2"];
+          let sroi1 = info["sroi1"];
+          let sroi2 = info["sroi2"];
+          let distCoeffs1 = info["distCoeffs1"];
+          let distCoeffs2 = info["distCoeffs2"];
+          let newImageSize = info["newImageSize"];
+          let mpURI = info["mpURI"];
+          let mpURIPretty = info["mpURIPretty"];
+          let matchLeftXY = info["matchLeftXY"];
+          let matchRightXY = info["matchRightXY"];
+          let matchColor = info["matchColor"];
+
+          let leftMap1 = new cv.Mat();
+          let leftMap2 = new cv.Mat();
+          let rightMap1 = new cv.Mat();
+          let rightMap2 = new cv.Mat();
+          try {{
+            cv.initUndistortRectifyMap(
+              K1, distCoeffs1, sR1, sP1, newImageSize, cv.CV_32FC1,
+              leftMap1, leftMap2);
+            cv.initUndistortRectifyMap(
+              K2, distCoeffs2, sR2, sP2, newImageSize, cv.CV_32FC1,
+              rightMap1, rightMap2);
+          }} catch(err) {{
+            document.getElementById("stereoRectMPURI").innerHTML = (
+              "Error rectifying, cameras are too far apart? " + err);
+          }}
+
+          let leftOrigImg = cv.imread(document.getElementById("inputLeft"));
+          let rightOrigImg = cv.imread(document.getElementById("inputRight"));
+
+          for (var i = 0; i < matchLeftXY.length; i++) {{
+            let lxy = new cv.Point(matchLeftXY[i][0], matchLeftXY[i][1]);
+            let rxy = new cv.Point(matchRightXY[i][0], matchRightXY[i][1]);
+            let bgr = new cv.Scalar(
+              matchColor[i][0], matchColor[i][1], matchColor[i][2], 128);
+            cv.circle(leftOrigImg, lxy, 3, bgr, cv.FILLED);
+            cv.circle(rightOrigImg, rxy, 3, bgr, cv.FILLED);
+          }}
+
+          let leftRectImg = new cv.Mat();
+          cv.remap(
+            leftOrigImg,
+            leftRectImg, leftMap1, leftMap2, cv.INTER_LANCZOS4);
+          let rightRectImg = new cv.Mat();
+          cv.remap(
+            rightOrigImg,
+            rightRectImg, rightMap1, rightMap2, cv.INTER_LANCZOS4);
+          
+          cv.rectangle(
+            leftRectImg,
+            new cv.Point(sroi1[0], sroi1[1]), new cv.Point(sroi1[2], sroi1[3]),
+            new cv.Scalar(0, 255, 0), 1);
+          cv.rectangle(
+            rightRectImg,
+            new cv.Point(sroi2[0], sroi2[1]), new cv.Point(sroi2[2], sroi2[3]),
+            new cv.Scalar(0, 255, 0), 1);
+
+          cv.imshow('stereoRectVizLeft', leftRectImg);
+          cv.imshow('stereoRectVizRight', rightRectImg);
+
+          document.getElementById("stereoRectMPURI").innerHTML = mpURI;
+          document.getElementById("stereoRectMPURIPretty").innerHTML = 
+            JSON.stringify(mpURIPretty, undefined, 2);
+
+        }};
+
+        stereoRectVizSelectRightChanged = function () {{
+          var rightId = 
+            document.getElementById("stereoRectVizSelectRight").value;
+        
+          console.log("Selecting right image " + rightId);
+
+          let info = rightImageIdToInfo[rightId];
+          let rightImageUri = info["rightImageUri"];
+          let rightImage = document.getElementById("inputRight");
+          stereoRectVizCurrentRight = rightId;
+          rightImage.src = rightImageUri;        
+        }};
+        
+        stereoRectVizRightLoaded = function () {{
+          console.log("Right image loaded " + stereoRectVizCurrentRight);
+          showRightImageId(stereoRectVizCurrentRight);
+        }};
+
+        // END utils
+
+
+        showRightImageId(stereoRectVizCurrentRight);
+
+        console.log("StereoRectViz Setup Complete");
+
+      }}
+    }};
+    // END opencv loaded hook
+  </script>
+    
+  
+  <!-- StereoRectViz HTML UI -->
+  
+  <img 
+    src="{left_img_data_uri}"
+    id="inputLeft"
+    style="display: none;" />
+  <img 
+    src="{default_right_image_uri}"
+    id="inputRight"
+    style="display: none;"
+    onload="stereoRectVizRightLoaded();" />
+
+  <div id="stereoRectVizContainer">
+    <div id="stereoRectVizContainerOverlayRoot" style="position: relative">
+    
+      <div 
+        id="stereoRectVizPairViz" 
+        style="position: absolute;">
+        <table style="background-color: rgba(128, 128, 128, 0.5);">
+          <tr>
+            <td><canvas id="stereoRectVizLeft"></canvas></td>
+            <td><canvas id="stereoRectVizRight"></canvas></td>
+          </tr>
+        </table>
+
+        <div id="stereoRectControlsNInfo">
+
+          <select
+              id="stereoRectVizSelectRight"
+              style="padding: 0.5em; font-size: large;"
+              onchange="stereoRectVizSelectRightChanged();">
+            {stereoRectVizSelectRight_body}
+          </select>
+
+          <pre>
+            <div id="stereoRectMPURI">(not loaded)</div>
+            <div id="stereoRectMPURIPretty">(not loaded)</div>
+          </pre>
+
+        </div>
+
+      </div>
+    
+      <div
+        id="stereoRectVizHorizontalLine"
+        style="position: absolute; z-index: 10; background-color: red; width: 100%; height: 2px; translate(0px, 100px)"
+        >
+      </div>
+    
+    </div>
+  </div>
+    
+  <script type="text/javascript">
+
+    // BEGIN Mouse chaser lines
+
+    mouseChaseDiv = document.getElementById("stereoRectVizPairViz");
+    var drawLines = function(event) {{
+      let rect = event.target.getBoundingClientRect();
+      let x = event.clientX - rect.left; //x position within the element.
+      let y = event.clientY - rect.top;  //y position within the element.
+      let lineDiv = document.getElementById("stereoRectVizHorizontalLine");
+      lineDiv.style.transform = 'translate(0px, ' + y + 'px)';
+    }}
+    mouseChaseDiv.addEventListener('mousemove', function(event) {{
+      drawLines(event);
+    }});
+    mouseChaseDiv.addEventListener('mousedown', function(event) {{
+      drawLines(event);   
+    }});
+    mouseChaseDiv.addEventListener('mouseup', function(event) {{
+      drawLines(event);
+    }});
+    mouseChaseDiv.addEventListener('mouseout', function(event) {{
+      drawLines(event);
+    }});
+
+    // END Mouse chaser lines
+
+  </script>
+
+  </div> <!-- END stereoRectVizRoot -->
+  
+  """
+
+  return final_html
+
diff --git a/psegs/datum/pobj.py b/psegs/datum/pobj.py
new file mode 100644
index 0000000..2fe6c3d
--- /dev/null
+++ b/psegs/datum/pobj.py
@@ -0,0 +1,133 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import attr
+import numpy as np
+
+from oarphpy.spark import CloudpickeledCallable
+
+from psegs.datum import datumutils as du
+
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class PUnion(object):
+  """A union type representing the possible values for attributes of 
+  `PObj` (see below)."""
+
+  v_int     = attr.ib(type=int, default=0)
+  """int: A integer value"""
+  
+  v_float   = attr.ib(type=float, default=0.)
+  """float: A floating point value"""
+
+  v_str     = attr.ib(type=str, default='')
+  """str: A string blob value"""
+  
+  v_bytes   = attr.ib(type=bytearray, default=bytearray())
+  """bytearray: A binary blob value"""
+
+  v_arr     = attr.ib(type=np.ndarray, default=None)
+  """numpy.ndarray: An array value"""
+
+  v_factory = attr.ib(
+                type=CloudpickeledCallable,
+                converter=CloudpickeledCallable,
+                default=None)
+  """CloudpickeledCallable: A serializable factory function that returns
+    some object."""
+
+  v_method  = attr.ib(
+                type=CloudpickeledCallable,
+                converter=CloudpickeledCallable,
+                default=None)
+  """CloudpickeledCallable: A serializable unary function that accepts this
+    PUnion instance as input and returns some object."""
+
+
+  # Helpers for HTML-based visualization / report
+
+  @classmethod
+  def create_html_obj(cls, html='', html_factory=None, html_method=None):
+    if html != '':
+      return cls(v_str=html)
+    elif html_factory != None:
+      return cls(v_factory=html_factory)
+    elif html_method != None:
+      return cls(v_method=html_method)
+    else:
+      raise ValueError("Don't know how to HTML-ize")
+
+  def to_html_value(self):
+    if self.v_str != '':
+      return self.v_str
+    elif self.v_factory != None:
+      return self.v_factory()
+    elif self.v_method != None:
+      return self.v_method(self)
+
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class PObj(object):
+  """A generic perception (pythonic) object container.  Use this datum
+  for storing debug material, visualizations, and other hacks before
+  promoting to a formal datum (or modifying an existing one).
+
+  Good PObj material:
+   * Evaluation metric metadata (e.g. matched bounding boxes or cuboids)--
+      PSegs does not yet include any evaluation routines or metric
+      implementations.
+   * Rendered (or lazily-rendered) visualizations / debug content-- use PObj
+      to associate this content with existing datums / segments.
+
+  Bad PObj material:
+   * Labels / Predictions-- Use Cuboid, BBox2D, etc., or add a new type.
+   * Binary blob sensor data--  Use CameraImage, PointCloud, etc., perhaps
+      using a factory function for dynamic I/O.
+  """
+
+  tag = attr.ib(type=str, default='')
+
+  attr_name_to_value = attr.ib(default={}, type=typing.Dict[str, PUnion])
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  @classmethod  
+  def create_html(cls, html='', html_factory=None, html_method=None):
+    o = PUnion.create_html_obj(
+          html=html, html_method=html_method, html_factory=html_factory)
+    return cls(tag='HTML', attr_name_to_value={'__html__': o})
+
+  def to_html(self):
+    if '__html__' in self.attr_name_to_value:
+      o = self.attr_name_to_value['__html__']
+      return o.to_html_value()
+    else:
+      import tabulate
+      table = [
+        ['tag', self.tag]
+      ]
+      table += [
+        [attr, du.to_preformatted(o)]
+        for attr, o in sorted(self.attr_name_to_value.items())
+      ]
+      table += [
+        ['extra.' + k, v]
+        for k, v in self.extra.items()
+      ]
+      return tabulate.tabulate(table, tablefmt='html')
diff --git a/psegs/datum/point_cloud.py b/psegs/datum/point_cloud.py
new file mode 100644
index 0000000..ab94159
--- /dev/null
+++ b/psegs/datum/point_cloud.py
@@ -0,0 +1,730 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import attr
+import numpy as np
+
+from oarphpy.spark import CloudpickeledCallable
+
+from psegs.datum.transform import Transform
+from psegs.util import misc
+from psegs.util import plotting as pspl
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class PointCloud(object):
+  """A cloud of `n` points `(x, y, z)` typically in the ego frame
+  (versus the sensor frame).
+  """
+
+  sensor_name = attr.ib(type=str, default='')
+  """str: Name of the point sensor, e.g. lidar_top"""
+
+  timestamp = attr.ib(type=int, default=0)
+  """int: Timestamp associated with this cloud; typically a Unix stamp in
+  nanoseconds."""
+
+  cloud = attr.ib(type=np.ndarray, default=None)
+  """numpy.ndarray: Lidar points as an n-by-d matrix (typically of 
+  `(x, y, z)` points). Nominally, these points are in **ego** frame????
+  not point sensor frame. need to check this because looks like we put in sensor frame?"""#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # TODO rename to cloud_array once we can dump SD parquet ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+  cloud_factory = attr.ib(
+    type=CloudpickeledCallable,
+    converter=CloudpickeledCallable,
+    default=None)
+  """CloudpickeledCallable: A serializable factory function that emits an HWC
+    numpy array image"""
+
+  cloud_colnames = attr.ib(default=['x', 'y', 'z'])
+  """List[str]: Semantic names for the columns (or dimensions / attributes)
+  of the cloud.  Typically clouds have just 3-D (x, y, z) points, but some
+  clouds have reflectance, RGB, labels, and/or other data."""
+
+  # then start using get_cloud() in call sites.  could rename cloud to cloud_array and then just dump
+  # the nuscenes SDTable that we built...
+  # for SDTable and impls, lets:
+  #  * default to callable use in the code for big assets
+  #  * give SDTable a base class flag if the class to_row() should expand the data or not ! 
+
+  ego_to_sensor = attr.ib(type=Transform, default=Transform())
+  """Transform: From ego / robot frame to the sensor frame (typically a static
+  transform)."""
+
+  ego_pose = attr.ib(type=Transform, default=Transform())
+  """Transform: From world to ego / robot frame at the cuboid's `timestamp`"""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  @classmethod
+  def create_world_frame_cloud(cls, sensor_name='', **kwargs):
+    sensor_name = sensor_name or 'world_frame_cloud'
+    ego_to_sensor = Transform(src_frame=sensor_name, dest_frame='ego')
+    ego_pose = Transform(src_frame='ego', dest_frame='world')
+    return cls(
+            sensor_name=sensor_name,
+            ego_to_sensor=ego_to_sensor,
+            ego_pose=ego_pose,
+            **kwargs)
+
+  def get_cloud(self):
+    if self.cloud is not None:
+      return self.cloud
+    elif self.cloud_factory != CloudpickeledCallable.empty():
+      return self.cloud_factory()
+    else:
+      raise ValueError("No cloud data!")
+
+  def get_col_idx(self, colname):
+    for i in range(len(self.cloud_colnames)):
+      if self.cloud_colnames[i] == colname:
+        return i
+    raise ValueError(
+      "Colname %s not found in %s" % (colname, self.cloud_colnames))
+
+  def get_xyz_axes(self):
+    return [
+      self.get_col_idx('x'),
+      self.get_col_idx('y'),
+      self.get_col_idx('z'),
+    ]
+
+  def get_rgb_axes(self):
+    try:
+      return [
+        self.get_col_idx('r'),
+        self.get_col_idx('g'),
+        self.get_col_idx('b'),
+      ]
+    except ValueError:
+      return []
+
+  def has_rgb(self):
+    return all(k in self.cloud_colnames for k in ('r', 'g', 'b'))
+
+  def get_xyz_cloud(self):
+    # TODO: accept frame parameter e.g. world / sensor
+    cloud = self.get_cloud()
+    xyz = cloud[:, self.get_xyz_axes()]
+    return xyz
+
+  def get_xyzrgb(self, default_color=None):
+    # TODO: accept frame parameter e.g. world / sensor
+    if self.has_rgb():
+      axes = self.get_xyz_axes() + self.get_rgb_axes()
+      cloud = self.get_cloud()
+      return cloud[:, axes]
+    else:
+      xyz = self.get_xyz_cloud()
+      if default_color is None:
+        default_color = (0, 0, 0)
+      rgb = np.zeros_like(xyz)
+      rgb[:, 0] = default_color[0]
+      rgb[:, 1] = default_color[1]
+      rgb[:, 2] = default_color[2]
+      return np.hstack([xyz, rgb])
+
+  def get_colors_cloud(self):
+    cloud = self.get_cloud()
+    axes = self.get_rgb_axes()
+    if axes:
+      return cloud[:, axes]
+    else:
+      return np.zeros((0, 3), dtype=cloud.dtype)
+    
+  # @
+  # def _get_2d_debug_image(
+      
+
+
+  # def get_bev_debug_image(
+  #       self, 
+  #       cuboids=None,
+  #       x_bounds_meters=(-50, 50),
+  #       y_bounds_meters=(-50, 50),
+  #       pixels_per_meter=200):
+  #   """Create and return a BEV (Bird's-Eye-View) perspective debug image
+  #   for this point cloud (i.e. flatten the z-axis).
+
+  #   Args:
+  #     cuboids (List[:class:`~psegs.datum.cuboid.Cuboid`]): Draw these 
+  #       cuboids in the given debug image.
+  #     x_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+  #       x-value in point cloud frame.
+  #     y_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+  #       y-value in point cloud frame.
+  #     pixels_per_meter (int): Rasterize debug image at this resolution.
+
+  #   Returns:
+  #     np.array: A HWC RGB debug image.
+  #   """
+
+
+
+
+
+
+  #   cuboids = cuboids or []
+
+  #   ## Draw Cloud
+  #   import matplotlib
+  #   from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+  #   from matplotlib.figure import Figure
+
+  #   fig = Figure(dpi=150)
+  #   fig.set_facecolor((0, 0, 0))
+  #   canvas = FigureCanvas(fig)
+    
+  #   ax = fig.gca()
+
+  #   xyz = self.cloud
+  #   if colored_cloud:
+  #     from psegs.util.plotting import rgb_for_distance
+  #     # colors = [~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #     #   rgb_for_distance(np.linalg.norm(pt)) / 255
+  #     #   for pt in self.cloud
+  #     # ]
+  #     colors = rgb_for_distance(np.linalg.norm(self.cloud, axis=1)) / 255
+  #     ax.scatter(xyz[:, 0], xyz[:, 1], s=.1, c=colors)
+  #   else:
+  #     ax.scatter(xyz[:, 0], xyz[:, 1], s=.1)
+
+  #   ## Draw Cuboids
+  #   from matplotlib.patches import Polygon
+  #   from matplotlib.collections import PatchCollection
+
+  #   for c in cuboids:
+  #     box_xyz = c.get_box3d()
+  #     box_xyz_2d = box_xyz[:, :2]
+
+  #     from scipy.spatial import ConvexHull
+  #     hull = ConvexHull(box_xyz_2d)
+  #     corners = [(box_xyz_2d[v, 0], box_xyz_2d[v, 1]) for v in hull.vertices]
+  #     polygon = Polygon(corners, closed=True)
+
+  #     from oarphpy.plotting import hash_to_rbg
+  #     color = np.array(hash_to_rbg(c.category_name)) / 255
+
+  #     ax.add_collection(
+  #       PatchCollection([polygon], facecolor=color, edgecolor=color, alpha=0.5))
+
+  #   ax.axis('off')
+  #   fig.tight_layout()
+
+  #   # Render!
+  #   canvas.draw()
+  #   img_str, (width, height) = canvas.print_to_buffer()
+
+  #   img = np.frombuffer(img_str, np.uint8).reshape((height, width, 4))
+  #   return img[:, :, :3] # Return RGB for easy interop
+
+  @staticmethod
+  def paint_ego_cloud(cloud, camera_images=None):
+    """ TODO comment """
+    xyzrgb = np.ones((cloud.shape[0], 3 + 3)) * 128.
+    xyzrgb[:, :3] = cloud[:, :3]
+
+    camera_images = camera_images or []
+    #alpha = 1. / len(camera_images) if camera_images else 1.
+    for i, ci in enumerate(camera_images):
+      uvd = ci.project_ego_to_image(xyzrgb[:, :3], omit_offscreen=False)
+      
+      img = ci.image
+      h, w = ci.image.shape[:2]
+      uvd[:, :2] = np.rint(uvd[:, :2])
+      to_paint = np.where(
+                (uvd[:, 0] >= 0) & 
+                (uvd[:, 0] < w) &
+                (uvd[:, 1] >= 0) & 
+                (uvd[:, 1] < h) &
+                (uvd[:, 2] >= 0.01))
+      px_xy = uvd[to_paint].astype(np.int)
+      painted = img[px_xy[:, 1], px_xy[:, 0], :]
+      # if i == 0:
+      xyzrgb[to_paint[0], 3:] = painted
+      # else:
+      #   xyzrgb[to_paint[0], 3:] = (
+      #     alpha * xyzrgb[to_paint[0], 3:] + alpha * painted)
+
+    return xyzrgb
+
+  @staticmethod
+  def get_ortho_debug_image(
+        cloud,
+        user_colors=None,
+        cuboids=None,
+        camera_images=None,
+        ego_to_sensor=None,
+        flatten_axis='+x',
+        u_axis='+y',
+        v_axis='+z',
+        u_bounds=(-10, 10),
+        v_bounds=(-10, 10),
+        depth_values=None,
+        filter_behind=True,
+        pixels_per_meter=200):
+    """Create and return a half-space-flattened debug image for the given
+    `cloud` of (x, y, z) points.  For example, an RV (Range-Value-perspective)
+    image flattens the cloud's +x axis (forwards), and a BEV (Bird's-Eye-View
+    perspective) image flattens the cloud's +z axis (up).
+
+    Args:
+      cloud (np.array): An nx3 array of points (in units of meters)
+        draw this cloud.
+      user_colors (np.array): Optionally color each point using this nx3 array of
+        RGB colors (with color values in [0, 255]).  By default, color
+        points based on distance from the origin.
+      cuboids (List[:class:`~psegs.datum.cuboid.Cuboid`]): Optionally draw
+        these cuboids in the given debug image; cuboids must either (a) be in
+        the ego frame (PSegs standard) and `ego_to_sensor` given, or (b) the
+        caller of this method must first transform `cuboids` to the point
+        sensor frame.
+      camera_images (List[:class:`~psegs.datum.camera_image.CameraImage`]): 
+        Optionally paint the cloud points using pixels from these camera
+        images.  By default, color points based upon distance from the sensor
+        origin.
+      ego_to_sensor (:class:`~psegs.datum.transform.Transform`): Optional
+        transform for projecting ego points (`cuboids` corners and
+        `camera_image` rays) to the sensor frame.
+      flatten_axis (str): Flatten this `cloud` axis and use it as the image
+        plane. Use a positive sign and `filter_behind=True` to plot points in
+        the positive half-space.
+      u_axis (str): Use this `cloud` axis as the +u (left-to-right) axis
+        of the debug image.  Negative sign flips the `cloud` axis.
+      v_axis (str): Use this `cloud` axis as the +v (bottom-to-top) axis
+        of the debug image.  Negative sign flips the `cloud` axis.
+      u_bounds_meters (Tuple[int, int]): Restrict view to this min/max
+        u_axis-value (in meters).  Use None to auto-fit.
+      v_bounds_meters (Tuple[int, int]): Restrict view to this min/max
+        v_axis-value (in meters).  Use None to auto-fit.
+      depth_values (np.array): Optional nx1 array of depth-in-meters values to
+        use for plot colors (in place of the raw `flatten_axis` values).
+      filter_behind (bool): Restrict view to only positive points
+        along the flattened dimension.
+      pixels_per_meter (int): Rasterize points at this resolution.
+    
+    Returns:
+      np.array: A HWC RGB debug image.
+    """
+
+    def pts_to_uvd(pts):
+      # Return a copy of `pts` changing axis ordering to reflect the desired
+      # `u`, `v`, and `d` axes (new x y and z).
+      AXIS_NAME_TO_IDX = {'x': 0, 'y': 1, 'z': 2}
+      AXES = (u_axis, v_axis, flatten_axis)
+
+      uvd = np.zeros((pts.shape[0], 3))
+      uid, vid, did = tuple(AXIS_NAME_TO_IDX[a[-1]] for a in AXES)
+      us, vs, ds = tuple(-1. if a[0] == '-' else 1. for a in AXES)
+
+      uvd[:, 0] = pts[:, uid] * us
+      uvd[:, 1] = pts[:, vid] * vs
+      uvd[:, 2] = pts[:, did] * ds
+
+      return uvd
+
+    # Map cloud to (u, v, d) space
+    uvd = pts_to_uvd(cloud)
+
+    unfiltered = None
+    if filter_behind:
+      unfiltered = uvd[:, 2] >= 0
+      uvd = uvd[unfiltered]
+
+    # Decide bounds
+    if u_bounds is None:
+      u_bounds = (uvd[:, 0].min(), uvd[:, 0].max())
+    if v_bounds is None:
+      v_bounds = (uvd[:, 1].min(), uvd[:, 1].max())
+    if depth_values is not None:
+      uvd[:, 2] = depth_values
+
+    # Maybe paint the cloud
+    if camera_images and (user_colors is None):
+      cloud = cloud[:, :3] # Ignore any non-position columns
+      to_paint = (
+        ego_to_sensor.get_inverse().apply(cloud).T
+        if ego_to_sensor
+        else cloud)
+      if unfiltered is not None:
+        to_paint = to_paint[unfiltered]
+      xyzrgb = PointCloud.paint_ego_cloud(to_paint, camera_images=camera_images)
+      user_colors = xyzrgb[:, 3:]
+
+    # Draw!
+    img = pspl.get_ortho_debug_image(
+            uvd,
+            min_u=u_bounds[0],
+            max_u=u_bounds[1],
+            min_v=v_bounds[0],
+            max_v=v_bounds[1],
+            pixels_per_meter=pixels_per_meter,
+            period_meters=10.,
+            user_colors=user_colors)
+  
+    for c in cuboids or []:
+      box_xyz = c.get_box3d()
+      if ego_to_sensor is not None:
+        box_xyz = ego_to_sensor.apply(box_xyz).T
+      box_uvd = pts_to_uvd(box_xyz)
+
+      if filter_behind:
+        has_in_front = np.any(box_uvd[:, 2] >= 0)
+        if not has_in_front:
+          continue
+
+      box_uv = box_uvd[:, (0, 1)] - np.array([u_bounds[0], v_bounds[1]])
+      box_uv *= pixels_per_meter
+      box_uv[:, 1] *= -1 # Debug image y-axis is flipped
+      box_uv = np.rint(box_uv).astype(np.int)
+
+      from oarphpy.plotting import hash_to_rbg
+      # color = pspl.color_to_opencv(
+      #   np.array(hash_to_rbg(c.category_name)))
+
+      pspl.draw_cuboid_xy_in_image(
+        img,
+        box_uv,
+        np.array(hash_to_rbg(c.category_name)),
+        alpha=0.3)
+
+    return img
+
+
+  def get_front_rv_debug_image(
+          self,
+          cuboids=None,
+          camera_images=None,
+          z_bounds_meters=(-3, 3),
+          y_bounds_meters=(-20, 20),
+          pixels_per_meter=200):
+    """Create and return an RV (Range-Value) perspective debug image
+    for this point cloud (in the +x direction).
+
+    Args:
+      cuboids (List[:class:`~psegs.datum.cuboid.Cuboid`]): (Optional) draw
+        these cuboids in the given debug image.
+      camera_images (List[:class:`~psegs.datum.camera_image.CameraImage`]): 
+        (Optional) paint the cloud points using pixels from these camera
+        images.
+      z_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+        z-value in point cloud frame.  Use `None` to auto-size.
+      y_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+        y-value in point cloud frame.  Use `None` to auto-size.
+      pixels_per_meter (int): Rasterize debug image at this resolution.
+
+    Returns:
+      np.array: A HWC RGB debug image.
+    """
+    cloud = self.get_xyz_cloud()
+    return PointCloud.get_ortho_debug_image(
+              cloud,
+              cuboids=cuboids,
+              camera_images=camera_images,
+              ego_to_sensor=self.ego_to_sensor,
+              flatten_axis='+x',
+              u_axis='-y',
+              v_axis='+z',
+              u_bounds=y_bounds_meters,
+              v_bounds=z_bounds_meters,
+              filter_behind=True,
+              pixels_per_meter=pixels_per_meter)
+  
+
+  def get_bev_debug_image(
+          self,
+          cuboids=None,
+          camera_images=None,
+          x_bounds_meters=(-80, 80),
+          y_bounds_meters=(-80, 80),
+          pixels_per_meter=20):
+    """Create and return a BEV (Birds Eye View) perspective debug image
+    for this point cloud.
+
+    Args:
+      cuboids (List[:class:`~psegs.datum.cuboid.Cuboid`]): Draw these 
+        cuboids in the given debug image.
+      camera_images (List[:class:`~psegs.datum.camera_image.CameraImage`]): 
+        (Optional) paint the cloud points using pixels from these camera
+        images.
+      x_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+        x-value in point cloud frame.  Use `None` to auto-size.
+      y_bounds_meters (Tuple[int, int]): Filter points to to this min/max
+        y-value in point cloud frame.  Use `None` to auto-size.
+      pixels_per_meter (int): Rasterize debug image at this resolution.
+
+    Returns:
+      np.array: A HWC RGB debug image.
+    """
+    cloud = self.get_xyz_cloud()
+    depth_values = np.linalg.norm(cloud[:, (0, 1)], axis=-1)
+    return PointCloud.get_ortho_debug_image(
+              cloud,
+              cuboids=cuboids,
+              camera_images=camera_images,
+              ego_to_sensor=self.ego_to_sensor,
+              flatten_axis='-z',
+              u_axis='+x',
+              v_axis='+y',
+              u_bounds=x_bounds_meters,
+              v_bounds=y_bounds_meters,
+              depth_values=depth_values,
+              filter_behind=False,
+              pixels_per_meter=pixels_per_meter)
+
+
+    # import cv2
+
+    # # Build the image to return
+    # w = sum(abs(v) for v in y_bounds_meters) * pixels_per_meter
+    # h = sum(abs(v) for v in z_bounds_meters) * pixels_per_meter
+    # img = np.zeros((h, w, 3)).astype(np.uint8)
+
+    # def yz_to_uv(yz):
+    #   # cloud +y = img -x axis
+    #   u = -yz[:, 0] * pixels_per_meter + w / 2.
+    #   # cloud +z = img -y axis (down)
+    #   v = -yz[:, 1] * pixels_per_meter + h / 2.
+    #   return np.column_stack([u, v])
+
+    # ## Draw Cloud
+    # # Filter behind ego; keep only +x points
+    # cloud = self.cloud[:, :3]
+    # cloud = cloud[np.where(cloud[:, 0] >= 0)]
+
+    # # Convert to pixel (u, v, d)
+    # pts_d = cloud[:, 0]
+    # pts_uv = yz_to_uv(cloud[:, (1, 2)])
+    # pts = np.column_stack([pts_uv, pts_d])
+    
+    # pspl.draw_xy_depth_in_image(img, pts, alpha=1.0)
+
+    # ## Draw Cuboids
+    # for c in cuboids or []:
+    #   # TODO frame check ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #   box_xyz = c.get_box3d()
+    #   box_xyz_2d = box_xyz[:, (1, 2)]
+
+    #   # TODO Filter behind +x !!!!!!!!!!!!!!!!!!!!
+
+    #   from oarphpy.plotting import hash_to_rbg
+    #   color = pspl.color_to_opencv(
+    #     np.array(hash_to_rbg(c.category_name)))
+
+    #   pspl.draw_cuboid_xy_in_image(
+    #     img,
+    #     yz_to_uv(box_xyz_2d),
+    #     np.array(hash_to_rbg(c.category_name)),
+    #     alpha=0.8)
+
+
+    #   # from scipy.spatial import ConvexHull
+    #   # hull = ConvexHull(box_xyz[:, 1:])
+    #   # corners_yz = np.array([
+    #   #   (box_xyz[v, 1], box_xyz[v, 2]) for v in hull.vertices])
+      
+    #   # from oarphpy.plotting import hash_to_rbg
+    #   # color = pspl.color_to_opencv(
+    #   #   np.array(hash_to_rbg(c.category_name)))
+
+    #   # pts_uv = yz_to_uv(corners_yz)
+    #   # pts_uv = np.rint(pts_uv).astype(np.int)
+
+    #   # # Draw transparent fill
+    #   # CUBOID_FILL_ALPHA = 0.6
+    #   # coverlay = img.copy()
+    #   # cv2.fillPoly(img, [pts_uv], color)
+    #   # img[:] = cv2.addWeighted(
+    #   #   coverlay, CUBOID_FILL_ALPHA, img, 1 - CUBOID_FILL_ALPHA, 0)
+      
+    #   # # Draw outline
+    #   # cv2.polylines(
+    #   #   img,
+    #   #   [pts_uv],
+    #   #   True, # is_closed
+    #   #   color,
+    #   #   1) #thickness
+
+    # return img
+
+  def to_trimeshes_world_frame(
+          self,
+          period_meters=1.,
+          use_pc_colors=False,
+          max_num_points=1_000_000,
+          color=None,
+          colors=None):
+
+    import trimesh
+    from psegs.util.plotting import rgb_for_distance
+
+    T_ego_from_sensor = self.ego_to_sensor[self.sensor_name, 'ego']
+    T_world_from_ego = self.ego_pose['ego', 'world']
+    p2w = T_world_from_ego @ T_ego_from_sensor
+    w2p = p2w.get_inverse()
+
+    xyz = self.get_xyz_cloud()
+    xyz = w2p.apply(xyz).T
+    if max_num_points >= 0 and xyz.shape[0] > max_num_points:
+      idx = np.random.choice(
+              np.arange(xyz.shape[0]), max_num_points)
+      xyz = xyz[idx, :]
+    
+    if use_pc_colors and self.has_rgb():
+      colors = self.get_colors_cloud()
+    elif colors is None:
+      if color is not None:
+        n = xyz.shape[0]
+        color = np.array(color)
+        colors = np.tile(color, [n, 1])
+      else:
+        colors = rgb_for_distance(
+                    np.linalg.norm(xyz, axis=1),
+                    period_meters=period_meters)
+
+    if colors is not None:
+      colors = np.clip(colors, 0, 255).astype('uint8')
+
+    pc_tmesh = trimesh.points.PointCloud(
+                vertices=xyz if len(xyz) else np.array([[0., 0., 0.]]),  # TODO fixme trimesh wont GLTF empty array?????
+                colors=colors)
+    
+    return [pc_tmesh]
+      
+
+
+  def to_html(self, cuboids=None, bev_debug=False, rv_debug=False):
+    cuboids = cuboids or []
+    from psegs.datum.datumutils import to_preformatted
+    import tabulate
+    table = [
+      [attr, to_preformatted(getattr(self, attr))]
+      for attr in (
+        'sensor_name',
+        'timestamp',
+        'ego_to_sensor')
+    ]
+
+    # TODO: BEV / RV cloud
+    cloud = self.get_cloud()
+    table.extend([
+      ['Cloud', ''],
+      ['Num Points', cloud.shape[0]]
+    ])
+
+    html = tabulate.tabulate(table, tablefmt='html')
+
+    ### Plotly 3d plot of cloud and cubes  TODO extract to au plotting ~~~~~~~~~~~~~~~~~~
+
+    import plotly
+    import plotly.graph_objects as go
+    import pandas as pd
+
+    cloud = self.get_cloud()
+    cloud_df = pd.DataFrame(cloud, columns=['x', 'y', 'z'])
+
+    from psegs.util.plotting import rgb_for_distance
+    cloud_df['color'] = [
+      rgb_for_distance(np.linalg.norm(pt))
+      for pt in cloud_df[['x', 'y', 'z']].values
+    ]
+
+    # df_tmp = df_tmp[df_tmp['norm'] < 500]
+    scatter = go.Scatter3d(
+                name=self.sensor_name,
+                x=cloud_df['x'], y=cloud_df['y'], z=cloud_df['z'],
+                mode='markers',
+                marker=dict(size=1, color=cloud_df['color'], opacity=0.8),)
+    # print('plotted %s' % len(cloud))
+
+    lines = []
+    colors = []
+    for cuboid in cuboids:
+      cbox = cuboid.get_box3d()
+      front = [cbox[i,:] for i in (0, 1, 2, 3)]
+      back = [cbox[i,:] for i in (4, 5, 6, 7)]
+      
+      from oarphpy.plotting import hash_to_rbg
+      base_color_rgb = hash_to_rbg(cuboid.category_name)
+      base_color = np.array(base_color_rgb)
+      front_color = base_color + 0.3 * 255
+      back_color = base_color - 0.3 * 255
+      center_color = base_color
+      
+      def to_css_color(rgb):
+        r, g, b = np.clip(rgb, 0, 255).astype(int).tolist()
+        return 'rgb(%s,%s,%s)' % (r, g, b)
+
+      def make_line(pts):
+        return [None] + [list(p) for p in (pts + [pts[0]])] + [None]
+      l = make_line(front)
+      lines.append(l)
+      def add_color(c, n):
+        colors.extend(['rgb(0,0,0)'] + (n-2) * [to_css_color(c)] + ['rgb(0,0,0)'])
+      add_color(front_color, len(l))
+      # colors.extend(['rgb(0,0,0)'] + [to_css_color(front_color)] + ['rgba(0,0,0)'])
+      # colors.append(to_css_color(front_color))
+      # colors.append(to_css_color(front_color))
+      
+      for start, end in zip(front, back):
+        l = make_line([start, end])
+        lines.append(l)
+        add_color(center_color, len(l))
+        # lines.append(make_line([start, end]))
+        # colors.extend(['rgb(0,0,0)'] + [to_css_color(center_color)] + ['rgb(0,0,0)'])
+        # colors.append(to_css_color(center_color))
+        # colors.append(to_css_color(center_color))
+
+      l = make_line(back)
+      lines.append(l)
+      add_color(back_color, len(l))
+      # lines.append(make_line(back))
+      # colors.extend(['rgb(0,0,0)'] + [to_css_color(back_color)] + ['rgb(0,0,0)'])
+      # colors.append(to_css_color(back_color))
+      # colors.append(to_css_color(back_color))
+        
+    def to_line_vals(idx, lines):
+      import itertools
+      ipts = itertools.chain.from_iterable(lines)
+      return [(pt[idx] if pt is not None else pt) for pt in ipts]
+    lines_plot = go.Scatter3d(
+                    name='labels|cuboids',
+                    x=to_line_vals(0, lines),
+                    y=to_line_vals(1, lines),
+                    z=to_line_vals(2, lines),
+                    mode='lines',
+                    line=dict(width=3, color=colors))
+        
+    fig = go.Figure(data=[scatter, lines_plot])
+    fig.update_layout(
+      title=self.sensor_name,
+      width=1000, height=700,
+      scene_aspectmode='data')
+      # scene_camera=dict(
+      #   up=dict(x=0, y=0, z=1),
+      #   eye=dict(x=0, y=0, z=0),
+      #   center=dict(x=1, y=0, z=0),
+      # ))
+    plot_str = plotly.offline.plot(fig, output_type='div')
+
+    html += '<br/><br/>' + plot_str
+    
+    return html
diff --git a/psegs/datum/points2d.py b/psegs/datum/points2d.py
new file mode 100644
index 0000000..aa81094
--- /dev/null
+++ b/psegs/datum/points2d.py
@@ -0,0 +1,148 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+from oarphpy.spark import CloudpickeledCallable
+
+import attr
+import numpy as np
+
+from psegs.util import misc
+from psegs.datum.camera_image import CameraImage
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class Points2D(object):
+  """new todo docs
+  """
+
+  annotator_name = attr.ib(type=str, default="")
+  """str: Name of the source of the points, e.g. "point_detector"; could be
+  identical to the topic name or suffix."""
+
+  timestamp = attr.ib(type=int, default=0)
+  """int: Timestamp associated with this set of points; often the timestamp of
+  `img`."""
+
+  img = attr.ib(default=None, type=CameraImage)
+  """CameraImage: The image domain for these points."""
+
+  points_array = attr.ib(type=np.ndarray, default=None)
+  """numpy.ndarray: Matches as an n-by-d matrix (where `d` is *at least*
+  2, i.e. (img x, img y))."""
+
+  points_factory = attr.ib(
+    type=CloudpickeledCallable,
+    converter=CloudpickeledCallable,
+    default=None)
+  """CloudpickeledCallable: A serializable factory function that emits the
+  values for `points_array` (if a realized array cannot be provided)"""
+
+  points_colnames = attr.ib(default=['x', 'y'])
+  """List[str]: Semantic names for the columns (or dimensions / attributes)
+  of the `points_array`.  Typically points are just 2D points, but point data
+  can include numeric class_id, score, distance, etc."""
+
+  point_attributes = attr.ib(default=[], type=typing.List[str])
+  """List[str]: For each row / point in `points_array`, this member
+  provides string attributes (e.g. classnames) for the point.
+  """
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  def get_col_idx(self, colname):
+    for i in range(len(self.points_colnames)):
+      if self.points_colnames[i] == colname:
+        return i
+    raise ValueError(
+      "Colname %s not found in %s" % (colname, self.points_colnames))
+
+  def get_xy_axes(self):
+    return [
+      self.get_col_idx('x'),
+      self.get_col_idx('y'),
+    ]
+  
+  def get_other_axes(self):
+    xyc = set(['x', 'y'])
+    all_c = set(self.points_colnames)
+    other_names = sorted(list(all_c - xyc))
+    other_idx = [self.get_col_idx(n) for n in other_names]
+    return other_names, other_idx
+
+  def get_points(self):
+    if self.points_array is not None:
+      return self.points_array
+    elif self.points_factory != CloudpickeledCallable.empty():
+      return self.points_factory()
+    else:
+      raise ValueError("No points data!")
+
+  def get_xy(self):
+    points = self.get_points()
+    xy = points[:, self.get_xy_axes()]
+    return xy
+
+  def get_xy_extra(self):
+    matches = self.get_points()
+    other_names, other_idx = self.get_other_axes()
+    cols = self.get_xy_axes() + other_idx
+    xy_extra = matches[:, cols]
+    return xy_extra
+
+  def get_points_colnames(self, colnames):
+    axes = [self.get_col_idx(c) for c in colnames]
+    points = self.get_points()
+    return points[:, axes]
+
+  def get_debug_points_image(
+          self,
+          should_color_with_gid_col=True,
+          color_cols=None,
+          colors=None):
+    from psegs.util import plotting as pspl
+    from oarphpy.plotting import hash_to_rbg
+
+    pts = self.get_xy()
+    if colors is None and len(self.points_colnames) > 2:
+      colordata = None
+      if should_color_with_gid_col:
+        for i, colname in enumerate(self.points_colnames):
+          if colname.endswith('gid'):
+            colordata = self.get_points()
+            colordata = colordata[:, i]
+      elif color_cols is not None:
+        colordata = self.get_points_colnames(color_cols)
+      if colordata is None:
+        colordata = self.get_xy_extra()
+        colordata = colordata[:, 2:]
+
+      colors = np.array([
+        hash_to_rbg(r) for r in colordata
+      ])
+
+    if self.img is not None:
+      debug_image = self.img.image.copy()
+    else:
+      w = int(pts[:, 0].max()) + 1
+      h = int(pts[:, 1].max()) + 1
+      debug_image = np.zeros((h, w, 3), dtype='uint8')
+    
+    pspl.draw_colored_2dpts_in_image(debug_image, pts, user_colors=colors)
+    return debug_image
diff --git a/psegs/datum/stamped_datum.py b/psegs/datum/stamped_datum.py
new file mode 100644
index 0000000..204f0a4
--- /dev/null
+++ b/psegs/datum/stamped_datum.py
@@ -0,0 +1,275 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import itertools
+import typing
+
+import attr
+import numpy as np
+
+from psegs.datum.camera_image import CameraImage
+from psegs.datum.cuboid import Cuboid
+from psegs.datum.point_cloud import PointCloud
+from psegs.datum.points2d import Points2D
+from psegs.datum.matched_pair import MatchedPair
+from psegs.datum.transform import Transform
+from psegs.datum.uri import DatumSelection
+from psegs.datum.uri import URI
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class StampedDatum(object):
+  """A union-like class containing a single piece of data associated with a
+  specific time ("stamp" or "timestamp") from a specific segment of data
+  (i.e. a single `segment_id`).  Represents a single row in a
+  `StampedDatumTable`.  While the object has multiple attributes, only one
+  data attribute typically has non-vacuous value.
+  """
+
+  ## Every datum can be addressed
+  
+  uri = attr.ib(type=URI, default=None, converter=URI.from_str)
+  """URI: The URI addressing this datum; also defines sort order"""
+
+  def __lt__(self, other):
+    """Ordering is by URI (and not by data content)"""
+    # TODO Fixme it turns out when there's a tie, attrs will look at content anyways, we need to fix or dump this ............
+    assert type(other) is type(self)
+    return self.uri < other.uri
+
+  ## A datum should contain exactly one of the following:
+
+  camera_image = attr.ib(type=CameraImage, default=None)
+  """CameraImage: A single camera image"""
+
+  point_cloud = attr.ib(type=PointCloud, default=None)
+  """PointCloud: A single point cloud"""
+
+  cuboids = attr.ib(type=typing.List[Cuboid], default=[])
+  """List[Cuboid]: Zero or more cuboids; topic name may indicate label or
+  prediction."""
+
+  transform = attr.ib(type=Transform, default=None)
+  """Transform: A transform such as ego pose; topic indicates semantics"""
+
+  matched_pair = attr.ib(type=MatchedPair, default=None)
+  """MatchedPair: A pair of images with pixel-to-pixel matches"""
+
+  points_2d = attr.ib(type=Points2D, default=None)
+  """Points2D: An array of 2D points, usually associated with a camera image"""
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class Sample(object):
+  """A `Sample` is a group of :class:`~psegs.datum.stamped_datum.StampedDatum`
+  instances that centers around a specific event or purpose.  For example, a
+  `Sample` may group all datums around a specific timestamp; in particular,
+  a `Sample` may be used to synchronized camera, lidar, and label
+  data.  A `Sample` is a container for a set of data specified in a
+  :class:`~psegs.datum.uri.DatumSelection`.
+  """
+
+  datums = attr.ib(type=typing.List[StampedDatum], default=[])
+  """List[StampedDatum]: All datums associated with this `Sample`"""
+
+  uri = attr.ib(type=URI, default=None)
+  """URI: The URI addressing this `Sample` (and group of datums)"""
+
+  def __attrs_post_init__(self):
+    if not self.uri:
+      if self.datums:
+        # Note this might effectively select a segment_id and/or uri.extra
+        # data that is not consistent with the rest of the `datums`.  You
+        # probably want to specify your own `uri`.
+        base_uri = sorted(d.uri for d in self.datums)[0]
+        self.uri = copy.deepcopy(base_uri)
+    
+    if self.uri:
+      self.uri = copy.deepcopy(URI.from_str(self.uri))
+
+    if self.uri and not self.uri.sel_datums:
+      self.uri.sel_datums = DatumSelection.selections_from_value(self.datums)
+
+  ## Topic selectors
+
+  def topic_datums(self, topic=None, prefix=None):
+    """Return all `StampedDatum` instances for the given topic.
+
+    Args:
+      topic (str): Select all datums from this topic, e.g. `camera|front`.
+      prefix (str): Select all datums with this topic prefix; E.g.
+        `camera` selects `camera|front` and `camera|back`.
+    
+    Returns:
+      List[StampedDatum]: The selected datums
+    """
+    
+    def is_from_topic(datum):
+      if topic is not None:
+        return datum.uri.topic == topic
+      elif prefix is not None:
+        return datum.uri.topic.startswith(prefix)
+      else:
+        raise ValueError("Must specify a topic or prefix")
+    
+    return [
+      sd for sd in self.datums
+      if is_from_topic(sd)
+    ]
+
+  @property
+  def ego_poses(self):
+    """Normalized selector for the `ego_pose`
+    :class:`~psegs.datum.transform.Transform` datums in this `Sample`.
+    Returns a list of transforms.
+    """
+    return [
+      sd.transform for sd in self.datums if sd.transform is not None
+    ]
+  
+  @property
+  def camera_images(self):
+    """Normalized selector for all camera
+    :class:`~psegs.datum.camera_image.CameraImage` datums in this `Sample`.
+    """
+    return [
+      sd.camera_image for sd in self.datums if sd.camera_image is not None
+    ]
+  
+  @property
+  def lidar_clouds(self):
+    """Normalized selector for all lidar
+    :class:`~psegs.datum.point_cloud.PointCloud` datums in this `Sample`.
+
+    """
+    return [
+      sd.point_cloud for sd in self.datums if sd.point_cloud is not None
+    ]
+  
+  @property
+  def cuboid_labels(self, topic='labels|cuboids'):
+    """Normalized selector for the *label* :class:`~psegs.datum.Cuboid`
+    canonical topic.  Returns a list of cuboids flattened from all available
+    datums.
+    """
+    return list(itertools.chain.from_iterable(
+      sd.cuboids for sd in self.topic_datums(topic=topic)))
+
+  ## Utils
+
+  def get_topics(self):
+    return sorted(set(sd.uri.topic for sd in self.datums))
+  
+  def get_uri_str_to_datum(self):
+    return dict((str(datum.uri), datum) for datum in self.datums)
+
+
+###
+### Prototypes
+###
+
+# Spark (and `RowAdapter`) can automatically deduce schemas from object
+# heirarchies, but these tools need non-null, non-empty members to deduce
+# proper types.  Creating a DataFrame with an explicit schema can also
+# improve efficiently dramatically, because then Spark can skip row sampling
+# and parallelized auto-deduction.  The Prototypes below serve to provide
+# enough type information for `RowAdapter` to deduce the full av.Frame schema.
+# In the future, Spark may perhaps add support for reading Python 3 type
+# annotations, in which case the Protoypes will be obviated.
+
+# URI_PROTO_KWARGS = dict(
+#   # Core spec; most URIs will have these set
+#   dataset='proto',
+#   split='train',
+#   segment_id='proto_segment',
+#   topic='topic',
+#   timestamp=int(100 * 1e9), # In nanoseconds
+  
+#   # Uris can identify more specific things in a Frame
+#   camera='camera_1',
+#   camera_timestamp=int(100 * 1e9), # In nanoseconds
+  
+#   crop_x=0, crop_y=0,
+#   crop_w=10, crop_h=10,
+  
+#   track_id='track-001',
+
+#   extra={'key': 'value'},
+# )
+URI_PROTO_KWARGS = dict(
+  extra={'key': 'value'},
+  sel_datums=[DatumSelection(topic='t', timestamp=1)],
+)
+URI_PROTO = URI(**URI_PROTO_KWARGS)
+
+TRANSFORM_PROTO = Transform()
+
+CUBOID_PROTO = Cuboid(
+  extra={'key': 'value'},
+)
+
+# BBOX_PROTO = BBox(
+#   x=0, y=0,
+#   width=10, height=10,
+#   im_width=100, im_height=100,
+#   category_name='vehicle',
+#   au_category='car',
+
+#   cuboid=CUBOID_PROTO,
+#   cuboid_pts=np.ones((8, 3), dtype=np.float32),
+#   cuboid_center=np.array([1., 2., 3.]),
+#   cuboid_in_cam=np.ones((8, 3), dtype=np.float32),
+
+#   has_offscreen=False,
+#   is_visible=True,
+
+#   cuboid_from_cam=np.array([1., 0., 1.]),
+#   ypr_camera_local=np.ones((1, 3)),
+# )
+
+POINTCLOUD_PROTO = PointCloud(
+  cloud=np.ones((10, 3), dtype=np.float32),
+  extra={'key': 'value'},
+)
+
+CAMERAIMAGE_PROTO = CameraImage(
+  distortion_kv={'': 0.0},
+  extra={'key': 'value'},
+)
+
+MATCHED_PAIR_PROTO = MatchedPair(
+  matches_array=np.ones((10, 4), dtype=np.float32),
+  img1=CAMERAIMAGE_PROTO,
+  img2=CAMERAIMAGE_PROTO,
+  extra={'key': 'value'},
+)
+
+POINTS2D_PROTO = Points2D(
+  img=CAMERAIMAGE_PROTO,
+  points_array=np.ones((3, 2), dtype=np.float32),
+  point_attributes=['a', 'a', 'c'],
+  extra={'key': 'value'},
+)
+
+STAMPED_DATUM_PROTO = StampedDatum(
+  uri=URI_PROTO,
+  camera_image=CAMERAIMAGE_PROTO,
+  point_cloud=POINTCLOUD_PROTO,
+  cuboids=[CUBOID_PROTO],
+  transform=TRANSFORM_PROTO,
+  matched_pair=MATCHED_PAIR_PROTO,
+  points_2d=POINTS2D_PROTO,
+)
+
diff --git a/psegs/datum/transform.py b/psegs/datum/transform.py
new file mode 100644
index 0000000..376ccd2
--- /dev/null
+++ b/psegs/datum/transform.py
@@ -0,0 +1,146 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import attr
+import numpy as np
+
+from psegs.datum import datumutils as du
+from psegs.util import misc
+
+def _force_shape(shape):
+  def converter(arr):
+    return np.reshape(np.array(arr), shape)
+  return converter
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class Transform(object):
+  """An SE(3) / ROS Transform-like object.  Defaults to the identity
+  transform.  Represents a transformation from `dest_frame` from
+  `src_frame`.
+  """
+
+  rotation = attr.ib(
+              default=np.eye(3, 3),
+              converter=_force_shape((3, 3)))
+  """np.ndarray: A 3x3 rotation matrix"""
+  
+  translation = attr.ib(
+              default=np.zeros((3, 1)),
+              converter=_force_shape((3, 1)))
+  """np.ndarray: A 3x1 translation matrix"""
+  
+  src_frame = attr.ib(type=str, default="")
+  """str: Name of the source frame"""
+  
+  dest_frame = attr.ib(type=str, default="")
+  """str: Name of the destination frame"""
+  
+  def __eq__(self, other):
+    return misc.attrs_eq(self, other)
+
+  def apply(self, pts):
+    """Apply this transform (i.e. right-multiply) to `pts` and return
+    tranformed *homogeneous* points."""
+    transform = self.get_transformation_matrix()
+    pts = du.maybe_make_homogeneous(pts)
+    return transform.dot(pts.T)
+
+  @classmethod
+  def from_transformation_matrix(cls, RT, **kwargs):
+    """Create and return a `Transform` given the 3x4 [R|T] transformation
+    matrix `RT`, and forward `kwargs` to `Transform` ctor."""
+    R = RT[:3, :3]
+    T = RT[:3, 3]
+    return Transform(rotation=R, translation=T, **kwargs)
+
+  def get_transformation_matrix(self, homogeneous=False):
+    """Return a 3x4 [R|T] transform matrix (or a homogenous
+    4x4 only if `homogeneous`)"""
+    if homogeneous:
+      RT = np.eye(4, 4)
+    else:
+      RT = np.eye(3, 4)
+    RT[:3, :3] = self.rotation
+    RT[:3, 3] = self.translation.reshape(3)
+    return RT
+
+  def get_inverse(self):
+    """Create and return a new transform that is the inverse of this one."""
+    return Transform(
+      rotation=self.rotation.T,
+      translation=self.rotation.T.dot(-self.translation),
+      src_frame=self.dest_frame,
+      dest_frame=self.src_frame)
+
+  def get_xform(self, src, dest):
+    """Return a transform from `src` frame to `dest` frame ; inverses this
+    transform if necessary."""
+    assert sorted((src, dest)) == sorted((self.src_frame, self.dest_frame)), \
+      "Wanted frames (%s, %s) have frames (%s, %s)" % (
+        src, dest, self.src_frame, self.dest_frame)
+    if src == self.src_frame:
+      return copy.deepcopy(self)
+    else:
+      return self.get_inverse()
+
+  def __getitem__(self, index):
+    """Syntactic sugar for `get_xform(src, dest)`.  Example:
+
+    >>> t = Transform(src_frame='f1', dest_frame='f2')
+    >>> t['f1', 'f2'] == t
+    True
+
+    >>> t['f2', 'f1'] == t.get_inverse()
+    True
+
+    >>> t['moof']
+    ValueError
+
+    >>> t['a', 'b']
+    KeyError
+    
+    Creates and returns a new Transform instance.
+    """
+    try:
+      src, dest = index
+    except Exception as e:
+      raise ValueError("Invalid input %s, error %s" % (index, e))
+      
+    try:
+      return self.get_xform(src, dest)
+    except Exception as e:
+      raise KeyError("Can't get transform for %s, error %s" % (index, e))
+
+  def compose_with(self, other):
+    """Right-multiply (chain) this `Transform` with `other` and create a new
+    gestault `Transform` that sends points from the source of `other` to
+    the destination of this `Tranform`.
+    """
+    return Transform.from_transformation_matrix(
+              self.get_transformation_matrix(homogeneous=True).dot(
+                other.get_transformation_matrix(homogeneous=True)),
+              src_frame=other.src_frame,
+              dest_frame=self.dest_frame)
+
+  def __matmul__(self, other):
+    return self.compose_with(other)
+
+  def is_identity(self):
+    """Is this the identity transform?"""
+    return (
+      np.array_equal(self.rotation, np.eye(3, 3)) and
+      np.array_equal(self.translation, np.zeros((3, 1))))
diff --git a/psegs/datum/uri.py b/psegs/datum/uri.py
new file mode 100644
index 0000000..917e154
--- /dev/null
+++ b/psegs/datum/uri.py
@@ -0,0 +1,333 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import itertools
+import os
+import typing
+
+import attr
+import six
+import six.moves.urllib.parse
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class DatumSelection(object):
+  """A single topic-time pair to indicate one of several datums within a
+  :class:`~psegs.datum.frame.Frame`"""
+  
+  topic = attr.ib(default='', type='str')
+  """str: Name for a series of messages, e.g. '/ego_pose'"""
+
+  timestamp = attr.ib(default=0, converter=int, type='int')
+  """int: Some integer timestamp; most of `psegs` assumes a Unix time in
+  nanoseconds."""
+
+  @classmethod
+  def selections_to_string(cls, fdatums):
+    assert all(',' not in fd.topic for fd in fdatums), \
+      "Need a different topic delimiter ..."
+    datum_topic_ts = sorted((fd.topic, str(fd.timestamp)) for fd in fdatums)
+      # NB: Sorting is not required but helps with comparing equal URIs
+    datums_str = ','.join(itertools.chain.from_iterable(datum_topic_ts))
+    return datums_str
+  
+  @classmethod
+  def selections_from_value(cls, v):
+    if isinstance(v, six.string_types):
+      from oarphpy import util as oputil
+      toks = v.split(',')
+      assert len(toks) % 2 == 0, toks
+      dss = [cls(*dtoks) for dtoks in oputil.ichunked(toks, 2)]
+      return sorted(dss)
+    elif hasattr(v, '__iter__'):
+      def to_ds(vv):
+        if isinstance(vv, DatumSelection):
+          return vv
+        elif all(hasattr(vv, attr) for attr in cls.__slots__):
+          # It's `cls`-able! E.g. a URI
+          attrvals = ((attr, getattr(vv, attr)) for attr in cls.__slots__)
+          return DatumSelection(**dict(attrvals))
+        elif hasattr(vv, 'uri') and isinstance(vv.uri, URI):
+          attrvals = ((attr, getattr(vv.uri, attr)) for attr in cls.__slots__)
+          return DatumSelection(**dict(attrvals))
+        elif len(vv) == len(cls.__slots__):
+          if isinstance(vv, dict):
+            return DatumSelection(**vv)
+          else:
+            return DatumSelection(*vv)
+        else:
+          raise ValueError("Don't know what to do with %s" % (v,))
+
+      dss = sorted(to_ds(vv) for vv in v)
+      return dss
+    else:
+      raise ValueError("Don't know what to do with %s" % (v,))
+
+
+@attr.s(slots=True, eq=True,  weakref_slot=False, order=False)
+class URI(object):
+  """A URI for one specifc datum, or a group of datums (e.g. a 
+  :class:`~psegs.datum.frame.Frame`). All parameters are optional; more
+  parameters address a more specific piece of all StampedDatum data available.
+  """
+  
+  PREFIX = 'psegs://'
+  """The URL prefix or scheme denoting the URL refers to `psegs` data"""
+
+
+  ## Core Selection
+
+  dataset = attr.ib(default='', type='str')
+  """str: E.g. 'kitti'"""
+  
+  split = attr.ib(default='', type='str')
+  """str: E.g. 'train'"""
+
+  segment_id = attr.ib(default='', type='str')
+  """str: String identifier for a drive segment, e.g. a UUID"""
+  
+  timestamp = attr.ib(default=0, converter=int, type='int')
+  """int: Some integer timestamp; most of `psegs` assumes a Unix time in
+  nanoseconds."""
+  
+  topic = attr.ib(default='', type='str')
+  """str: Name for a series of messages, e.g. '/ego_pose'"""
+
+  extra = attr.ib(default={}, type=typing.Dict[str, str])
+  """Dict[str, str]: A map for adhoc extra context"""
+
+
+  ## Extended Selection
+
+  # # TODO dump this? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # track_id = attr.ib(default='', type='str')
+  # """str: A string identifier of a specific track, e.g. a UUID"""
+
+  sel_datums = attr.ib(
+                default=[], type=typing.List[DatumSelection],
+                converter=DatumSelection.selections_from_value)
+  """List[DatumSelection]: A sequence of (topic, time) pairs that help
+  encode a :class:`~psegs.datum.frame.Frame` -based `URI`."""
+  
+
+  def to_segment_uri(self):
+    cls = self.__class__
+    return cls(
+            dataset=self.dataset,
+            split=self.split,
+            segment_id=self.segment_id)
+  
+  def soft_matches_segment_of(self, other):
+    """Return true only if this URI soft-matches the segment_id, dataset,
+    and/or split of URI `other`.  A soft match allows us to wildcard ('*')
+    the one or all of the three components that define a distinct segment.
+    Note that while most datasets have globally distinct segment_id
+    names, this contraint isn't guaranteed; e.g. a segment_id name might 
+    appear in more than one split (by error or by intention).
+    
+    For example:
+      psegs://segment_id=s soft matches with psegs://segment_id=s&dataset=d
+    BUT
+      psegs://dataset=d&segment_id=s 
+        DOES NOT
+      soft match with psegs://segment_id=s 
+
+    """
+    if isinstance(other, six.string_types):
+      other = self.__class__.from_str(other)
+
+    return (
+      ((not self.segment_id) or (self.segment_id == other.segment_id)) and
+      ((not self.dataset) or (self.dataset == other.dataset)) and
+      ((not self.split) or (self.split == other.split)))
+
+  def as_tuple(self):
+    def to_tokens(k, v):
+      if bool(v):
+        if k == 'extra':
+          for ek, ev in sorted(v.items()):
+            yield ('extra.%s' % ek, ev)
+        else:
+          yield (k, v)
+
+    toks = itertools.chain.from_iterable(
+      to_tokens(f.name, getattr(self, f.name))
+      for f in attr.fields(self.__class__))
+    return tuple(toks)
+
+  def to_str(self):
+    def encode(k, v):
+      if k == 'sel_datums':
+        return DatumSelection.selections_to_string(v)
+      else:
+        return v
+    tup = self.as_tuple()
+    toks = ('%s=%s' % (k, encode(k, v)) for k, v in tup)
+    return '%s%s' % (self.PREFIX, '&'.join(toks))
+  
+  def to_urlsafe_str(self):
+    return six.moves.urllib.parse.quote_plus(self.to_str())
+
+  def to_segment_partition_relpath(self):
+    return os.path.join(
+      "dataset=%s" % (self.dataset or 'EMPTY_DATASET'),
+      "split=%s" % (self.split or 'EMPTY_SPLIT'),
+      "segment_id=%s" % (self.segment_id or 'EMPTY_SEGMENT_ID'))
+
+  def __str__(self):
+    return self.to_str()
+  
+  # def __hash__(self):
+  #   # NB: read attrs warnings: https://www.attrs.org/en/stable/hashing.html#fn1
+  #   # Consequences here:
+  #   # * We get URIs to hash like their tuple/string encoding, which is what
+  #   #    we want.
+  #   # * We do this instead of frozen=True so that URIs can be updated in-place
+  #   #    (e.g. via oarphpy.spark.RowAdapter.from_row(), or updating `.extra`).
+  #   #    Furthermore, frozen=True doesn't prevent updates inside mutable 
+  #   #    members anyways.
+  #   # * URIs will probaby never be mutated *after* being inserted into a 
+  #   #     container, thus the update-causes-silent-hash-bugs issue is likely
+  #   #     a rare edge case.
+  #   return hash(self.as_tuple())
+
+  # def __repr__(self):
+  #   kvs = ((attr, getattr(self, attr)) for attr in self.__slots__)
+  #   kwargs_str = ', '.join('%s=%s' % (k, repr(v)) for k, v in kvs)
+  #   return 'URI(%s)' % kwargs_str
+
+  # def __eq__(self, other):
+  #   if type(other) is type(self):
+  #     return all(
+  #       getattr(self, attr) == getattr(other, attr)
+  #       for attr in self.__slots__)
+  #   return False
+
+  def __lt__(self, other):
+    assert type(other) is type(self)
+    return self.as_tuple() < other.as_tuple()
+    
+    # import pdb; pdb.set_trace()
+    # def a_extra_less_than_b_extra(a, b):
+    #   # extra (dicts) are not comparable, so we need to handle them specially
+    #   assert self.__slots__[-1] == 'extra', "Schema changed?"
+    #   a_extra = a[-1]
+    #   b_extra = b[-1]
+    #   return sorted(a_extra.items()) < sorted(b_extra.items())
+    
+    # return (
+    #   (self_t[:-1] < other_t[:-1]) or 
+    #     a_extra_less_than_b_extra(self_t, other_t))
+
+
+  # def __hash__(self): # breaks equality of containers ... ~~~~~~~~~~~~~~~~~~~~~~``
+  #   return hash(self.as_tuple())
+
+  def update(self, **kwargs):
+    """Override this instance in-place with all values specified in `kwargs`.
+    (Ignores invalid values)."""
+    for k in self.__slots__:
+      if k in kwargs:
+        v = kwargs[k]
+        if k == 'sel_datums':
+          v = DatumSelection.selections_from_value(v)
+        setattr(self, k, v)
+  
+  def replaced(self, **kwargs):
+    """Create and return a copy with all values updated to those specified in
+    `kwargs`.  Similar to `namedtuple._replace()`.  Ignores invalid keys in
+    `kwargs`.  Useful for constructing a derivative URI given a base URI."""
+    uri = copy.deepcopy(self)
+    uri.update(**kwargs)
+    return uri
+
+
+
+  # def set_crop(self, bbox):#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #   self.update(
+  #     crop_x=bbox.x,
+  #     crop_y=bbox.y,
+  #     crop_w=bbox.width,
+  #     crop_h=bbox.height)
+
+  # def has_crop(self):
+  #   return all(
+  #     getattr(self, 'crop_%s' % a)
+  #     for a in ('x', 'y', 'w', 'h'))
+
+  # def get_crop_bbox(self):
+  #   return BBox(
+  #           x=self.crop_x, y=self.crop_y,
+  #           width=self.crop_w, height=self.crop_h)
+
+  # def get_viewport(self):
+  #   if self.has_crop():
+  #     return self.get_crop_bbox()
+
+  @classmethod
+  def from_str(cls, s, **overrides):
+    """Create and return a `URI` from string `s`.
+
+    Args:
+      s (string): String form of the `URI`, e.g. `psegs://dataset=test`
+      overrides (dict, optional): Override any parameters specified in `s`;
+        you can also values for otherwise unset parameters this way.
+    
+    Returns:
+      URI: The constructed instance
+    """
+
+    if isinstance(s, cls) or not bool(s):
+      return s
+
+    if s.startswith(six.moves.urllib.parse.quote_plus(URI.PREFIX)):
+      s = six.moves.urllib.parse.unquote_plus(s)
+
+    assert s.startswith(URI.PREFIX), "Missing %s in %s" % (URI.PREFIX, s)
+    toks_s = s[len(URI.PREFIX):]
+    if not toks_s:
+      return cls()
+    toks = toks_s.split('&')
+    assert all('=' in tok for tok in toks), "Bad token in %s" % (toks,)
+    
+    kwargs = {}
+    for tok in toks:
+      k, v = tok.split('=')
+      if k.startswith('extra.'):
+        k = k[len('extra.'):]
+        kwargs.setdefault('extra', {})
+        kwargs['extra'][k] = v
+      else:
+        kwargs[k] = v
+    kwargs.update(**overrides)
+    
+    return cls(**kwargs)
+  
+  def get_datum_uris(self):
+    """If this `URI` has `DatumSelection`'s, create and return
+    `URI` instances referencing each `StampedDatum` selected."""
+    return [
+      self.replaced(sel_datums=[], **attr.asdict(ds))
+      for ds in self.sel_datums
+    ]
+  
+  @classmethod
+  def segment_uri_from_datum_uris(cls, uris):
+    """Given a list of `uris`, construct and return a single (segment) `URI`
+    instance that references the given `uris` as `DatumSelection`s"""
+    assert uris, "Empty selection"
+    uris = sorted(cls.from_str(uri) for uri in uris)
+    out = uris[0].to_segment_uri()
+    return out.replaced(sel_datums=DatumSelection.selections_from_value(uris))
diff --git a/psegs/dsutil.py b/psegs/dsutil.py
new file mode 100644
index 0000000..47cb72f
--- /dev/null
+++ b/psegs/dsutil.py
@@ -0,0 +1,41 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datasets import nuscenes
+from psegs.datasets import kitti
+from psegs.datasets import kitti_sf
+from psegs.datasets import ios_lidar
+from psegs.datasets import tanks_and_temples
+
+DS_TO_UTIL_IMPL = {
+  'kitti': kitti.DSUtil,
+  'kitti_sceneflow': kitti_sf.DSUtil,
+  'nuscenes': nuscenes.NuscDSUtil,
+  'ios_lidar': ios_lidar.DSUtil,
+  'tanks_and_temples': tanks_and_temples.DSUtil,
+  
+  # NB: colmap does not require a dataset; test fixtures are built-in
+}
+
+def run(dataset):
+  assert dataset in DS_TO_UTIL_IMPL, (
+    "Unknown dataset %s, choices: %s" % (
+      dataset, sorted(DS_TO_UTIL_IMPL.keys())))
+  
+  dsutil_impl = DS_TO_UTIL_IMPL[dataset]
+
+  assert dsutil_impl.emplace()
+  assert dsutil_impl.test()
+  #assert dsutil_impl.demo()
+  # assert dsutil_impl.build_table()
diff --git a/psegs/dummyrun.py b/psegs/dummyrun.py
new file mode 100644
index 0000000..698bef2
--- /dev/null
+++ b/psegs/dummyrun.py
@@ -0,0 +1,1015 @@
+import sys
+sys.path.append('/opt/psegs')
+
+import numpy as np
+
+from psegs.exp.fused_lidar_flow import CloudFuser
+from psegs.exp.fused_lidar_flow import SampleDFFactory
+from psegs.exp.fused_lidar_flow import FusedFlowDFFactory
+
+import IPython.display
+import PIL.Image
+
+
+from psegs.exp.fused_lidar_flow import SemanticKITTIFusedFlowDFFactory
+
+
+
+# class SemanticKITTIOFlowRenderer(OpticalFlowRenderBase):
+#     FUSED_LIDAR_SD_TABLE = SemanticKITTIFusedWorldCloudTable
+
+# from psegs.exp.fused_lidar_flow import KITTI360_OurFused_FusedFlowDFFactory
+
+from psegs.exp.fused_lidar_flow import KITTI360_OurFused_FusedFlowDFFactory
+from psegs.exp.fused_lidar_flow import KITTI360_KITTIFused_SampleDFFactory
+
+        
+# from psegs.exp.fused_lidar_flow import WorldCloudCleaner
+
+
+
+# class KITTI360OFlowRenderer(OpticalFlowRenderBase):
+#     FUSED_LIDAR_SD_TABLE = KITTI360WorldCloudTableBase
+
+
+
+
+
+
+
+
+
+
+# class NuscWorldCloudTableBase(CloudFuser):
+#     SPLITS = ['train_detect', 'train_track']
+    
+#     @classmethod
+#     def _filter_ego_vehicle(cls, cloud_ego):
+#         # Note: NuScenes authors have already corrected clouds for ego motion:
+#         # https://github.com/nutonomy/nuscenes-devkit/issues/481#issuecomment-716250423
+#         # But have not filtered out ego self-returns
+#         cloud_ego = cloud_ego[np.where(  ~(
+#                         (cloud_ego[:, 0] <= 1.5) & (cloud_ego[:, 0] >= -1.5) &  # Nusc lidar +x is +right
+#                         (cloud_ego[:, 1] <= 2.5) & (cloud_ego[:, 0] >= -2.5) &  # Nusc lidar +y is +forward
+#                         (cloud_ego[:, 1] <= 1.5) & (cloud_ego[:, 0] >= -1.5)    # Nusc lidar +z is +up
+#         ))]
+#         return cloud_ego
+
+
+
+# class NuscKFOnlyLCCDFFactory(TaskLidarCuboidCameraDFFactory):
+    
+#     SRC_SD_TABLE = NuscStampedDatumTableFactory
+    
+#     @classmethod
+#     def build_df_for_segment(cls, spark, segment_uri):
+#         datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
+#         datum_df.registerTempTable('datums')
+#         print('Building tasks table for %s ...' % segment_uri.segment_id)
+        
+#         # Nusc doesn't have numerical task_ids so we'll have to induce
+#         # one via lidar timestamp.
+#         # NB: for Nusc: can group by nuscenes-sample-token FOR KEYFRAMES-ONLY DATA
+#         task_data_df = spark.sql("""
+#             SELECT 
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,
+#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,
+#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token
+#             FROM datums
+#             WHERE 
+#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (
+#               uri.extra['nuscenes-label-channel'] is NULL OR 
+#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
+#             ) AND (
+#               uri.topic LIKE '%cuboid%' OR
+#               uri.topic LIKE '%lidar%' OR
+#               uri.topic LIKE '%camera%'
+#             )
+#             GROUP BY uri.extra.`nuscenes-sample-token`
+#             ORDER BY lidar_time
+#         """)
+#         sample_tokens_ordered = [r.sample_token for r in task_data_df.select('sample_token').collect()]
+#         task_to_stoken = [
+#             {'task_id': task_id, 'sample_token': sample_token}
+#             for task_id, sample_token in enumerate(sample_tokens_ordered)
+#         ]
+#         task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)
+#         task_id_df = spark.createDataFrame(task_id_rdd)
+#         tasks_df = task_data_df.join(task_id_df, on=['sample_token'], how='inner')
+#         tasks_df = tasks_df.persist()
+#         print('... done.')
+#         return tasks_df
+
+
+# class NuscAllFramesLCCDFFactory(TaskLidarCuboidCameraDFFactory):
+    
+#     SRC_SD_TABLE = NuscStampedDatumTableLabelsAllFrames
+    
+#     @classmethod
+#     def build_df_for_segment(cls, spark, segment_uri):
+#         datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
+#         datum_df.registerTempTable('datums')
+#         print('Building tasks table for %s ...' % segment_uri.segment_id)
+        
+#         task_data_df = spark.sql("""
+#             SELECT 
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
+#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,
+#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,
+#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token
+#             FROM datums
+#             WHERE 
+#             (
+#               uri.extra['nuscenes-label-channel'] is NULL OR 
+#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
+#             ) AND (
+#               uri.topic LIKE '%cuboid%' OR
+#               uri.topic LIKE '%lidar%' OR
+#               uri.topic LIKE '%camera%'
+#             )
+#             GROUP BY uri.extra.`nuscenes-sample-token`
+#             ORDER BY lidar_time
+#         """)
+#         sample_tokens_ordered = [r.sample_token for r in task_data_df.select('sample_token').collect()]
+#         task_to_stoken = [
+#             {'task_id': task_id, 'sample_token': sample_token}
+#             for task_id, sample_token in enumerate(sample_tokens_ordered)
+#         ]
+#         task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)
+#         task_id_df = spark.createDataFrame(task_id_rdd)
+#         tasks_df = task_data_df.join(task_id_df, on=['sample_token'], how='inner')
+#         tasks_df = tasks_df.persist()
+#         print('... done.')
+#         return tasks_df
+        
+# class NuscWorldCloudTableBase(CloudFuser):
+#     SPLITS = ['train_detect', 'train_track']
+    
+#     @classmethod
+#     def _filter_ego_vehicle(cls, cloud_ego):
+#         # Note: NuScenes authors have already corrected clouds for ego motion:
+#         # https://github.com/nutonomy/nuscenes-devkit/issues/481#issuecomment-716250423
+#         # But have not filtered out ego self-returns
+#         cloud_ego = cloud_ego[np.where(  ~(
+#                         (cloud_ego[:, 0] <= 1.5) & (cloud_ego[:, 0] >= -1.5) &  # Nusc lidar +x is +right
+#                         (cloud_ego[:, 1] <= 2.5) & (cloud_ego[:, 0] >= -2.5) &  # Nusc lidar +y is +forward
+#                         (cloud_ego[:, 1] <= 1.5) & (cloud_ego[:, 0] >= -1.5)    # Nusc lidar +z is +up
+#         ))]
+#         return cloud_ego
+    
+# class NuscKFOnlyFusedWorldCloudTable(NuscWorldCloudTableBase):
+#     FUSED_LIDAR_SD_TABLE = NuscKFOnlyLCCDFFactory
+
+# class NuscAllFramesFusedWorldCloudTable(NuscWorldCloudTableBase):
+#     FUSED_LIDAR_SD_TABLE = NuscAllFramesLCCDFFactory
+    
+
+# class NuscKeyframesOFlowRenderer(OpticalFlowRenderBase):
+#     FUSED_LIDAR_SD_TABLE = NuscKFOnlyFusedWorldCloudTable
+
+# class NuscAllFramesOFlowRenderer(OpticalFlowRenderBase):
+#     FUSED_LIDAR_SD_TABLE = NuscAllFramesFusedWorldCloudTable
+
+def build_sample_id_map(spark, outpath, only_segments=[]):
+  from pathlib import Path
+  from psegs import util
+  from psegs import datum
+
+  only_segments = [datum.URI.from_str(u) for u in only_segments]
+
+  n_total = len(only_segments) if only_segments else None
+  from oarphpy import util as oputil
+
+  t = oputil.ThruputObserver(name='build_sample_idx', n_total=n_total)
+
+  from psegs.exp.fused_lidar_flow import NuscFusedFlowDFFactory
+  from psegs.exp.fused_lidar_flow import KITTI360_KITTIFused_FusedFlowDFFactory
+  from psegs.exp.fused_lidar_flow import KITTI360_OurFused_FusedFlowDFFactory
+
+  Rs = (
+    # SemanticKITTIFusedFlowDFFactory -- none of this as of writing
+    NuscFusedFlowDFFactory,
+    KITTI360_KITTIFused_FusedFlowDFFactory,
+    KITTI360_OurFused_FusedFlowDFFactory,
+  )
+  for R in Rs:
+    seg_uris = R.SAMPLE_DF_FACTORY.SRC_SD_TABLE.get_all_segment_uris()
+    if only_segments:
+      seg_uris = [
+        datum.URI.from_str(u)
+        for u in (
+          set(str(s) for s in only_segments) & set(str(s) for s in seg_uris)
+        )
+      ]
+    for suri in seg_uris:
+      t.start_block()
+
+      sdest = (
+        Path(outpath) / 
+          ('dataset=' + suri.dataset) / 
+          ('split=' + suri.split) / 
+          ('segment_id=' + suri.segment_id))
+      if sdest.exists():
+        util.log.info("Have %s" % sdest)
+        t.stop_block(n=1)
+        continue
+
+      util.log.info("Indexing %s" % suri)
+      
+      sample_df = R.SAMPLE_DF_FACTORY.build_df_for_segment(spark, suri)
+      spark.catalog.dropTempView('sample_df')
+      sample_df.registerTempTable('sample_df')
+
+      uri_exps = (
+        ('pc_sds', '0 AS ci_height, 0 AS ci_width'),
+        ('cuboids_sds', '0 AS ci_height, 0 AS ci_width'),
+        ('ci_sds',
+          """
+            sd.camera_image.height AS ci_height,
+            sd.camera_image.width AS ci_width
+          """),
+      )
+      index_df = None
+      for expr in uri_exps:
+        attrname, ci_expr = expr
+        df = spark.sql("""
+                SELECT
+                  "{dataset}"     AS dataset,
+                  "{split}"       AS split,
+                  "{segment_id}"  AS segment_id,
+                  BIGINT(sample_id) AS sample_id,
+                  sd.uri AS uri,
+                  {ci_expr}
+                
+                FROM (
+                  SELECT sample_id, EXPLODE({attrname}) AS sd
+                  FROM sample_df
+                )
+        """.format(
+                  dataset=suri.dataset,
+                  split=suri.split,
+                  segment_id=suri.segment_id,
+                  attrname=attrname,
+                  ci_expr=ci_expr))
+        if index_df is None:
+          index_df = df
+        else:
+          index_df = index_df.union(df)
+
+      index_df = index_df.persist()
+      index_df = index_df.coalesce(5)
+      index_df.write.save(
+        mode='append',
+        path=outpath,
+        partitionBy=['dataset', 'split', 'segment_id'],
+        format='parquet',
+        compression='lz4')
+      
+      util.log.info("Done with %s" % suri)
+      t.stop_block(n=1)
+      t.maybe_log_progress(every_n=1)
+
+def task_row_to_flow_record(task_row):
+  pkl_path = task_row['pkl_path']
+  import pickle
+  with open(pkl_path, 'rb') as f:
+    pkldata = pickle.load(f)
+  ci1_uri = pkldata['ci1_uri']
+  # ci2_uri = pkldata['ci2_uri'] # broken before 4/7
+  uvdv1_uvdv2 = pkldata['uvdij1_visible_uvdij2_visible']
+
+  # hacks we screwed up
+  toks = pkl_path.split('->')
+  assert len(toks) == 2, pkl_path
+  ci1_sid_fname = int(toks[0].split('_')[-1])
+  ci2_sid_fname = int(toks[1].split('_')[0])
+
+  import itertools
+  sampledata_rows = list(itertools.chain.from_iterable(
+    rs for rs in task_row['collect_list(sample_datas)']))
+  assert sampledata_rows
+  
+  from oarphpy.spark import RowAdapter
+  sid_to_rows = dict()
+  for r in sampledata_rows:
+    r = RowAdapter.from_row(r)
+    sid_to_rows.setdefault(r.sample_id, [])
+    sid_to_rows[r.sample_id].append(r)
+  assert ci1_sid_fname in sid_to_rows, (ci1_sid_fname, sid_to_rows.keys())
+  assert ci2_sid_fname in sid_to_rows, (ci2_sid_fname, sid_to_rows.keys())
+  ci1_sid = ci1_sid_fname
+  ci2_sid = ci2_sid_fname
+  
+  ci1_rows = sid_to_rows[ci1_sid]
+  ci2_rows = sid_to_rows[ci2_sid]
+
+
+  ci1_recs = [r for r in ci1_rows if r.uri == ci1_uri]
+  assert len(ci1_recs) == 1, ci1_recs
+  ci1_rec = ci1_recs[0]
+
+  ci2_recs = [r for r in ci2_rows if r.uri.topic == ci1_uri.topic]
+    # b/c ci2_uri broken in pickles before 4/7
+  assert len(ci2_recs) == 1, ci2_recs
+  ci2_rec = ci2_recs[0]
+  ci2_uri = ci2_rec.uri
+    # b/c ci2_uri broken in pickles before 4/7
+
+  ci1_h, ci1_w = ci1_rec.ci_height, ci1_rec.ci_width
+  assert (ci1_h, ci1_w) != (0, 0), (ci1_h, ci1_w)
+  ci2_h, ci2_w = ci2_rec.ci_height, ci2_rec.ci_width
+  assert (ci2_h, ci2_w) != (0, 0), (ci2_h, ci2_w)
+  
+  from psegs.exp.fused_lidar_flow import RenderedCloud
+  uvdvis1 = uvdv1_uvdv2[:, :4]
+  uvdvis2 = uvdv1_uvdv2[:, 4:]
+
+  clouds = [
+    RenderedCloud(
+      sample_id=ci1_sid,
+      ego_pose_uri=ci1_uri,
+      uvdvis=uvdvis1,
+      ci_uris=[r.uri for r in ci1_rows if 'camera' in r.uri.topic],
+      cuboids_uris=[r.uri for r in ci1_rows if 'cuboids' in r.uri.topic],
+      pc_uris=[r.uri for r in ci1_rows if 'lidar' in r.uri.topic],
+    ),
+    RenderedCloud(
+      sample_id=ci2_sid,
+      ego_pose_uri=ci2_uri,
+      uvdvis=uvdvis2,
+      ci_uris=[r.uri for r in ci2_rows if 'camera' in r.uri.topic],
+      cuboids_uris=[r.uri for r in ci2_rows if 'cuboids' in r.uri.topic],
+      pc_uris=[r.uri for r in ci2_rows if 'lidar' in r.uri.topic],
+    ),
+  ]
+
+  from psegs.exp.fused_lidar_flow import FlowRecord
+  assert (ci1_h, ci1_w) == (ci2_h, ci2_w)
+  uri = ci1_uri.to_segment_uri()
+  sids_str = ','.join(str(c.sample_id) for c in clouds)
+  uri = uri.replaced(extra={'psegs_flow_sids': sids_str})
+  flow_record = FlowRecord(
+                  uri=uri,
+                  uri_key=str(uri),
+                  clouds=clouds,
+                  u_min=0.0, u_max=float(ci1_w),
+                  v_min=0.0, v_max=float(ci1_h))
+  
+  return flow_record
+
+def pickles_to_flow_records(
+      pickles_path,
+      dest_path,
+      index_cache_path='/opt/psegs/dataroot/sample_idx/sidx.parquet',
+      max_n=-1):
+
+  from psegs import util
+
+  from psegs.spark import Spark
+  spark = Spark.getOrCreate()
+
+  from oarphpy import util as oputil
+  import os
+  PSEGS_OFLOW_PKL_PATHS = [
+      os.path.abspath(p)
+      for p in oputil.all_files_recursive(
+                  pickles_path,
+                  pattern='*.pkl')
+  ]
+  util.log.info("Have %s pickles" % len(PSEGS_OFLOW_PKL_PATHS))
+
+  import random
+  r = random.Random(1337)
+  r.shuffle(PSEGS_OFLOW_PKL_PATHS)
+  if max_n > 0:
+    PSEGS_OFLOW_PKL_PATHS = PSEGS_OFLOW_PKL_PATHS[:max_n]
+  path_rdd = spark.sparkContext.parallelize(
+                PSEGS_OFLOW_PKL_PATHS,
+                numSlices=len(PSEGS_OFLOW_PKL_PATHS))
+
+  ### Read pkl data in indexed format... 
+  ### ... in the end we'll have to read the pickles twice.
+  def to_join_key(uri, sample_id):
+    return ''.join((
+                uri.dataset, uri.split, uri.segment_id,
+                uri.topic, str(sample_id)))
+  def to_pkl_idx(path):
+    import pickle
+    with open(path, 'rb') as f:
+      row = pickle.load(f)
+    ci1_uri = row['ci1_uri']
+    ci2_uri = row['ci2_uri']
+    
+    # hacks we screwed up
+    toks = path.split('->')
+    assert len(toks) == 2, path
+    ci1_sid_fname = int(toks[0].split('_')[-1])
+    ci2_sid_fname = int(toks[1].split('_')[0])
+
+    return {
+      'ci1_uri_seg': str(row['ci1_uri'].to_segment_uri()),
+      'ci1_uri_key': to_join_key(ci1_uri, ci1_sid_fname),
+      # 'ci2_uri_seg': str(row['ci2_uri'].to_segment_uri()),
+      'ci2_uri_key': to_join_key(ci2_uri, ci2_sid_fname),
+      'pkl_path': path,
+    }
+
+  pkl_idx_rdd = path_rdd.map(to_pkl_idx)
+  pkl_idx_rdd = pkl_idx_rdd.cache()
+  pkl_idx_df = spark.createDataFrame(pkl_idx_rdd, samplingRatio=1.0)
+  pkl_idx_df = pkl_idx_df.persist()
+  util.log.info("Have pickle index of size %s" % pkl_idx_df.count())
+
+
+  ### Build Sample Index as necessary
+  seg_uris = [
+    r.ci1_uri_seg
+    for r in pkl_idx_df.select('ci1_uri_seg').distinct().collect()
+  ]
+  util.log.info("Have %s segments to do %s" % (len(seg_uris), sorted(seg_uris)))
+
+  build_sample_id_map(spark, index_cache_path, only_segments=seg_uris)
+  sample_idx_df = spark.read.parquet(index_cache_path)
+  
+  from psegs import datum
+  segs = set(datum.URI.from_str(s).segment_id for s in seg_uris)
+  sample_idx_df = sample_idx_df.filter(sample_idx_df.segment_id.isin(segs))
+  sample_idx_df = sample_idx_df.persist()
+
+  util.log.info(
+    "Read index for %s segments" % (
+      sample_idx_df.select('segment_id').distinct().count()))
+  # util.log.info(sample_idx_df.count())
+
+  # create "map" of ci_uri (str) -> all sample data associated with that ci_uri
+  from oarphpy.spark import RowAdapter
+  jt_rdd = sample_idx_df.rdd.map(
+                              lambda r: (str(r.dataset + r.split + r.segment_id + str(r.sample_id)), r)
+                              ).groupByKey().flatMap(
+                                  lambda kvs: [
+                                    {
+                                      'ci_uri_key': to_join_key(v.uri, v.sample_id),
+                                      'sample_datas': kvs[1].data,
+                                    }
+                                    for v in kvs[1]
+                                    if 'camera' in v.uri.topic
+                                  ])
+  # jt_rdd = jt_rdd.cache()
+  jt_df = spark.createDataFrame(jt_rdd, samplingRatio=0.5)
+  jt_df = jt_df.repartition('ci_uri_key').persist()
+  util.log.info("Have %s sample data indexed by camera image" % jt_df.count())
+  
+
+  ### Join indices together and convert!
+
+  joined = pkl_idx_df.join(
+                jt_df,
+                (pkl_idx_df.ci1_uri_key == jt_df.ci_uri_key) |
+                  (pkl_idx_df.ci2_uri_key == jt_df.ci_uri_key) )
+  task_df = joined.groupBy('pkl_path').agg({'sample_datas': 'collect_list'})
+  task_df = task_df.persist()
+  util.log.info("Have %s tasks to do" % task_df.count())
+
+  # Do this in chunks because it keeps failing due to network otherwise
+  task_paths = [r.pkl_path for r in task_df.select('pkl_path').collect()]
+
+  from oarphpy import util as oputil
+  t = oputil.ThruputObserver(name='save_chunks', n_total=len(task_paths))
+  for task_chunk in oputil.ichunked(task_paths, 100):
+    t.start_block()
+    chunk_df = task_df.filter(task_df.pkl_path.isin(list(task_chunk)))
+
+    frec_rdd = chunk_df.rdd.map(task_row_to_flow_record)
+    frec_rdd = frec_rdd.map(RowAdapter.to_row)
+    frec_rdd = frec_rdd.repartition(len(task_chunk))
+
+    # import pyspark
+    # frec_rdd = frec_rdd.persist(pyspark.StorageLevel.DISK_ONLY)
+    
+    from psegs.exp.fused_lidar_flow import FLOW_RECORD_PROTO  
+    schema = RowAdapter.to_schema(FLOW_RECORD_PROTO)
+    frec_df = spark.createDataFrame(frec_rdd, schema=schema)
+
+    # frec_df = frec_df.persist()
+    frec_df = frec_df.withColumn('dataset', frec_df['uri.dataset'])
+    frec_df = frec_df.withColumn('split', frec_df['uri.split'])
+    frec_df = frec_df.withColumn('segment_id', frec_df['uri.segment_id'])
+
+    frec_df.write.save(
+          path=dest_path,
+          mode='append',
+          format='parquet',
+          partitionBy=['dataset', 'split', 'segment_id'],
+          compression='lz4')
+    util.log.info("Saved some to %s" % dest_path)
+    t.stop_block(n=len(task_chunk))
+    t.maybe_log_progress(every_n=1)
+    # frec_df.unpersist()
+    # frec_rdd.unpersist()
+
+
+  # import pickle
+  # pickle.dump(task_df.take(1)[0], open('/tmp/task_row.pkl', 'wb'))
+
+  # import ipdb; ipdb.set_trace()
+  # print()
+
+  # def to_pkl_jrow(path):
+  #   from oarphpy.spark import RowAdapter
+  #   from pyspark.sql import Row
+  #   import pickle
+  #   pkldata_str = open(path, 'rb').read()
+  #   pkldata = pickle.loads(pkldata_str)
+    
+  #   pkldata.pop('v2v_flow')
+
+  #   jrow = Row(
+  #           ci1_uri_str=str(pkldata['ci1_uri']),
+  #           ci2_uri_str=str(pkldata['ci2_uri']),
+  #           pkldata_str=pkldata_str)
+  #   return jrow
+
+  #   # # asdf = row.pop('uvdij1_visible_uvdij2_visible')
+  #   # # row['uvd_viz1_uvd_viz2'] = asdf
+  #   # # from psegs.datum import URI
+  #   # # row['segment_uri'] = URI.from_str(row['ci1_uri']).to_segment_uri()
+  #   # return RowAdapter.to_row(Row(**row))
+
+  # pkl_rdd = path_rdd.map(to_pkl_jrow)
+  # import pyspark
+  # pkl_rdd = pkl_rdd.persist(pyspark.StorageLevel.DISK_ONLY)
+  
+  # # from psegs.datum.stamped_datum import URI_PROTO
+  # # import numpy as np
+  # # schema = RowAdapter.to_schema(Row(
+  # #   ci1_uri=URI_PROTO,
+  # #   ci2_uri=URI_PROTO,
+  # #   uvd_viz1_uvd_viz2=np.zeros((1, 4 + 4)),
+  # # ))
+  # pkl_df = spark.createDataFrame(pkl_rdd, samplingRatio=0.5)
+  
+  # joined = pkl_df.join(
+  #               jt_df,
+  #               (pkl_df.ci1_uri_str == jt_df.ci_uri_str) |
+  #                 (pkl_df.ci2_uri_str == jt_df.ci_uri_str) )
+  # task_df = joined.groupBy('pkl_path').agg({'sample_datas': 'collect_list'})
+
+
+  # import ipdb; ipdb.set_trace()
+  # print()
+
+  # # df = df.withColumn('dataset', df['ci1_uri.dataset'])
+  # # df = df.withColumn('split', df['ci1_uri.split'])
+  # # df = df.withColumn('segment_id', df['ci1_uri.segment_id'])
+  
+  # import ipdb; ipdb.set_trace()
+
+  # df.write.save(
+  #       path=dest_path,
+  #       format='parquet',
+  #       partitionBy=['dataset', 'split', 'segment_id'],
+  #       compression='lz4')
+  
+
+
+"""
+
+analysis:
+  * cuboid hit rates
+  * sample-to-sample could nearest neighbor "error" 
+
+general (beyond-just-pairs) design:
+sample_ids [s1, s2, ... ]
+ci_uris [ [ ci1 ], [ ci2 ], ... ] <-- len-1 for oflow, could be len N for sflow
+cu_uris [ [ cu11, cu12, .. ], [ cu21, cu22, ... ], ... ]
+pc_uris [ [ pc1 ], [ pc2 ], ... ] 
+ego_pose_uris [ ego1, ego2, ... ] <-- always len 1, for sflow these are the uvd origins ?
+
+uvdvis [ uvdvis1, uvdvis2, ... ]
+
+(these arrays could also include not just uvd but world frame xyz perhaps.  perhaps
+even rgb-normal?  .. surfel...)
+
+
+
+"""
+
+
+# from psegs.exp.fused_lidar_flow import KITTI360_OurFused
+# from psegs.exp.fused_lidar_flow import KITTI360_KITTIFused
+# from psegs.exp.fused_lidar_flow import NuscFlowSDTable
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+### BEGIN HACK
+
+
+import attr
+import cv2
+import imageio
+import math
+import os
+import PIL.Image
+import six
+
+import numpy as np
+
+from oarphpy import plotting as op_plt
+from oarphpy.spark import CloudpickeledCallable
+img_to_data_uri = lambda x: op_plt.img_to_data_uri(x, format='png')
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class OpticalFlowPair(object):
+    """A flyweight for a pair of images with an optical flow field.
+    Supports lazy-loading of large data attributes."""
+        
+    dataset = attr.ib(type=str, default='')
+    """To which dataset does this pair belong?"""
+    
+    id1 = attr.ib(type=str, default='')
+    """Identifier or URI for the first image"""
+    
+    id2 = attr.ib(type=str, default='')
+    """Identifier or URI for the second image"""
+    
+    img1 = attr.ib(default=None)
+    """URI or numpy array or CloudPickleCallable for the first image (source image)"""
+
+    img2 = attr.ib(default=None)
+    """URI or numpy array or CloudpickeledCallable for the second image (target image)"""
+    
+    flow = attr.ib(default=None)
+    """A numpy array or callable or CloudpickeledCallable representing optical flow from img1 -> img2"""
+    
+    ## Optional Attributes (For Select Datasets)
+    
+    diff_time_sec = attr.ib(type=float, default=0.0)
+    """Difference in time (in seconds) between the views / poses depicted in `img1` and `img2`."""
+    
+    translation_meters = attr.ib(type=float, default=0.0)
+    """Difference in ego translation (in meters) between the views / poses depicted in `img1` and `img2`."""
+
+    # to add:
+    # diff time seconds
+    # semantic image for frame 1, frame 2 [could be painted by cuboids]
+    # instance images for frame 1, frame 2 [could be painted by cuboids]
+    #   -- for colored images, at first just pivot all oflow metrics by colors
+    # get uvdviz1 uvdviz2 (scene flow)
+    #   * for deepeform, their load_flow will work
+    #   * for kitti, we have to read their disparity images
+    # get uvd1 uvd2 (lidar for nearest neighbor stuff)
+    # depth image for frame 1, frame 2 [could be interpolated by cuboids]
+    #   -- at first bucket the depth coarsely and pivot al oflow by colors
+    
+    def get_img1(self):
+        if isinstance(self.img1, CloudpickeledCallable):
+            self.img1 = self.img1()
+        if isinstance(self.img1, six.string_types):
+            self.img1 = imageio.imread(self.img1)
+        return self.img1
+    
+    def get_img2(self):
+        if isinstance(self.img2, CloudpickeledCallable):
+            self.img2 = self.img2()
+        if isinstance(self.img2, six.string_types):
+            self.img2 = imageio.imread(self.img2)
+        return self.img2
+    
+    def get_flow(self):
+        if not isinstance(self.flow, (np.ndarray, np.generic)):
+            self.flow = self.flow()
+        return self.flow
+    
+    def to_html(self):
+        im1 = self.get_img1()
+        im2 = self.get_img2()
+        flow = self.get_flow()
+        fviz = draw_flow(im1, flow)
+        html = """
+            <table>
+            
+            <tr><td style="text-align:left"><b>Dataset:</b> {dataset}</td></tr>
+            
+            <tr><td style="text-align:left"><b>Source Image:</b> {id1}</td></tr>
+            <tr><td><img src="{im1}" /></td></tr>
+
+            <tr><td style="text-align:left"><b>Target Image:</b> {id2}</td></tr>
+            <tr><td><img src="{im2}" /></td></tr>
+
+            <tr><td style="text-align:left"><b>Flow</b></td></tr>
+            <tr><td><img src="{fviz}" /></td></tr>
+            </table>
+        """.format(
+                dataset=self.dataset,
+                id1=self.id1, id2=self.id2,
+                im1=img_to_data_uri(im1), im2=img_to_data_uri(im2),
+                fviz=img_to_data_uri(fviz))
+        return html
+
+def draw_flow(img, flow, step=8):
+    """Based upon OpenCV sample: https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py"""
+    h, w = img.shape[:2]
+    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
+    fx, fy = flow[y,x].T
+    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
+    lines = np.int32(lines + 0.5)
+    vis = img.copy()
+    cv2.polylines(vis, lines, 0, (0, 255, 0))
+    for (x1, y1), (_x2, _y2) in lines:
+        cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
+    return vis
+
+
+
+#### END HACK
+
+
+
+
+### BEGIN PROPOSE MODULE
+
+# from cheap_optical_flow_eval_analysis.ofp import OpticalFlowPair
+
+from oarphpy.spark import CloudpickeledCallable
+
+from psegs.exp.fused_lidar_flow import FlowRecTable
+
+PSEGS_SYNTHFLOW_DEMO_RECORD_URIS = (
+  'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=4340,4339',
+  'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=11219,11269',
+
+  'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=40009,40010',
+  'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=50013,50014',
+
+  # 'psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=11103,11104',
+  # 'psegs://dataset=kitti-360-fused&split=train&segment_id=2013_05_28_drive_0000_sync&extra.psegs_flow_sids=1181,1182',
+
+  # 'psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0002&extra.psegs_flow_sids=10016,10017',
+  # 'psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0582&extra.psegs_flow_sids=60035,60036',
+
+  # 'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0393&extra.psegs_flow_sids=50017,50018',
+  # 'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0501&extra.psegs_flow_sids=40019,40020',
+)
+
+def flow_rec_to_fp(flow_rec, sample):
+  fr = flow_rec
+
+  uri_str_to_datum = sample.get_uri_str_to_datum()
+
+  # Find the camera_images associated with `flow_rec`
+  ci1_url_str = str(flow_rec.clouds[0].ci_uris[0])
+  ci1_sd = uri_str_to_datum[ci1_url_str]
+  ci1 = ci1_sd.camera_image
+
+  ci2_url_str = str(flow_rec.clouds[1].ci_uris[0])
+  ci2_sd = uri_str_to_datum[ci2_url_str]
+  ci2 = ci2_sd.camera_image
+
+  import numpy as np
+  world_T1 = ci1.ego_pose.translation
+  world_T2 = ci2.ego_pose.translation
+  translation_meters = np.linalg.norm(world_T2 - world_T1)
+
+  id1 = ci1_url_str + '&extra.psegs_flow_sids=' + str(fr.clouds[0].sample_id)
+  id2 = ci2_url_str + '&extra.psegs_flow_sids=' + str(fr.clouds[1].sample_id)
+
+  fp = OpticalFlowPair(
+          dataset=fr.uri.dataset + '/' + fr.uri.split,
+          id1=id1,
+          id2=id2,
+          img1=CloudpickeledCallable(lambda: ci1.image),
+          img2=CloudpickeledCallable(lambda: ci2.image),
+          flow=CloudpickeledCallable(lambda: fr.to_optical_flow()),
+
+          diff_time_sec=abs(ci2_sd.uri.timestamp - ci1_sd.uri.timestamp),
+          translation_meters=translation_meters)
+  return fp
+
+def psegs_synthflow_create_fps(
+        spark,
+        flow_record_pq_table_path,
+        record_uris,
+        include_cuboids=False,
+        include_point_clouds=False):
+
+  T = FlowRecTable(spark, flow_record_pq_table_path)
+  rec_sample_rdd = T.get_records_with_samples_rdd(
+                          record_uris=record_uris,
+                          include_cameras=True,
+                          include_cuboids=include_cuboids,
+                          include_point_clouds=include_point_clouds)
+
+  fps = [
+    flow_rec_to_fp(flow_rec, sample)
+    for flow_rec, sample in rec_sample_rdd.collect()
+  ]
+
+  return fps
+
+def psegs_synthflow_iter_fp_rdds(
+        spark,
+        flow_record_pq_table_path,
+        fps_per_rdd=100,
+        include_cuboids=False,
+        include_point_clouds=False):
+  
+  T = FlowRecTable(spark, flow_record_pq_table_path)
+  ruris = T.get_record_uris()
+
+  # Ensure a sort so that pairs from similar segments will load in the same
+  # RDD -- that makes joins smaller and faster
+  ruris = sorted(ruris)
+
+  from oarphpy import util as oputil
+  for ruri_chunk in oputil.ichunked(ruris, fps_per_rdd):
+    frec_sample_rdd = T.get_records_with_samples_rdd(
+                          record_uris=rids,
+                          include_cuboids=include_cuboids,
+                          include_point_clouds=include_point_clouds)
+    fp_rdd = frec_sample_rdd.map(flow_rec_to_fp)
+    yield fp_rdd
+
+
+### END PROPOSE MODULE
+
+
+
+def convert_again(in_pq, out_pq):
+
+  from psegs.spark import Spark
+  spark = Spark.getOrCreate()
+
+  df = spark.read.parquet(in_pq)
+
+  def convert(fr):
+    from pyspark import Row
+    from oarphpy.spark import RowAdapter
+    fr = RowAdapter.from_row(fr)
+
+    uri = fr.uri
+
+    key_uris = [c.ego_pose_uri for c in fr.clouds]
+    uri = uri.replaced(sel_datums=key_uris)
+    key = str(uri)
+
+    fr.uri = uri
+    fr.uri_key = key
+
+    row = RowAdapter.to_row(fr)
+    return row
+    # row = row.asDict()
+    # row['dataset'] = uri.dataset
+    # row['split'] = uri.split
+    # row['segment_id'] = uri.segment_id
+    # return Row(
+    #           dataset=uri.dataset,
+    #           split=uri.split,
+    #           segment_id=uri.segment_id,
+    #           **row.asDict())
+  
+  
+
+  from oarphpy.spark import RowAdapter
+  from psegs.exp.fused_lidar_flow import FLOW_RECORD_PROTO  
+  schema = RowAdapter.to_schema(FLOW_RECORD_PROTO)
+  frec_df = spark.createDataFrame(df.rdd.map(convert), schema=schema)
+
+  # frec_df = frec_df.persist()
+  frec_df = frec_df.withColumn('dataset', frec_df['uri.dataset'])
+  frec_df = frec_df.withColumn('split', frec_df['uri.split'])
+  frec_df = frec_df.withColumn('segment_id', frec_df['uri.segment_id'])
+
+  frec_df.write.save(
+          path=out_pq,
+          format='parquet',
+          partitionBy=['dataset', 'split', 'segment_id'],
+          compression='lz4')
+
+
+
+
+if __name__ == '__main__':
+  # from psegs.spark import Spark
+  # spark = Spark.getOrCreate()
+
+  # # R = KITTI360_OurFused_FusedFlowDFFactory
+  # # R = KITTI360_KITTIFused_FusedFlowDFFactory
+
+  # R = NuscFusedFlowDFFactory
+
+  # seg_uris = R.SRC_SD_T().get_all_segment_uris()
+  # # R.build(spark=spark, only_segments=['psegs://segment_id=scene-0594'])#seg_uris[0]])
+  # R.build(spark=spark, only_segments=seg_uris[150:200])
+
+  # import pickle
+  # task_row = pickle.load(open('/tmp/task_row.pkl', 'rb'))
+  # rec = task_row_to_flow_record(task_row)
+  # import ipdb; ipdb.set_trace()
+
+
+
+
+  # pickles_to_flow_records(
+  #   '/opt/psegs/dataroot/oflow_pickles',
+  #   '/outer_root/media/Costco8000/psegs_synthflow.parquet/',
+  #   max_n=-1)
+  # print('yay!')
+  # assert False
+
+  from psegs.spark import Spark
+  spark = Spark.getOrCreate()
+
+
+  # fps = psegs_synthflow_create_fps(
+  #           spark,
+  #           '/outer_root/media/rocket4q/psegs_flow_records_short',
+  #           PSEGS_SYNTHFLOW_DEMO_RECORD_URIS)
+  # import ipdb; ipdb.set_trace()
+
+
+
+
+
+
+  convert_again(
+     '/outer_root/media/rocket4q/psegs_synthflow.parquet',
+     '/outer_root/media/Costco8000/psegs_flow_records_FULL_fixed'
+  )
+  import ipdb; ipdb.set_trace()
+
+
+
+  T = FlowRecTable(spark, '/outer_root/media/rocket4q/psegs_flow_records_short_fixed')
+  rids = T.get_record_uris()
+  print('rids', len(rids))
+  
+  # import pprint
+  # pprint.pprint([str(r) for r in rids])
+  # assert False
+  # rids = [r for r in rids if 'kitti' in r.dataset]
+  # rids = rids[:100]
+
+  
+  rdd = T.get_records_with_samples_rdd(record_uris=rids)
+
+  #print('second now')
+  #big_rdd = T.get_records_with_samples_rdd(record_uris=rids[10:12])
+  #print(big_rdd.count())
+  #print('done')
+
+  flow_rec, sample = rdd.take(1)[0]
+
+  # print(flow_rec.to_html(camera_images=sample.camera_images))
+
+  import ipdb; ipdb.set_trace()
+  print()
+
+  flow_df = spark.read.parquet('/outer_root/media/rocket4q/psegs_flow_records_short')
+
+  rec = flow_df.take(1)[0]
+  from oarphpy.spark import RowAdapter
+  rec = RowAdapter.from_row(rec)
+
+  rec.to_html()
+
+  import ipdb; ipdb.set_trace()
+  print()
+
+
+
+  # R = NuscKeyframesOFlowRenderer
+
+  # R = SemanticKITTIOFlowRenderer
+
+  # R = KITTI360OFlowRenderer
+
+  # # R.MAX_TASKS_PER_SEGMENT = 2
+
+  # seg_uris = R.FUSED_LIDAR_SD_TABLE.get_all_segment_uris()
+  # R.build(spark=spark, only_segments=[seg_uris[0]])
+
+  
+
+
+
diff --git a/psegs/exp/README.md b/psegs/exp/README.md
new file mode 100644
index 0000000..cc3ed1d
--- /dev/null
+++ b/psegs/exp/README.md
@@ -0,0 +1,6 @@
+The `exp` module in PSegs includes support code for experiments.  The support
+code is staged here and may be later moved to an independent PSegs extension
+project.
+
+Modules in `exp` should import from core `psegs`, but core `psegs` should
+typically not import from `exp`.
\ No newline at end of file
diff --git a/psegs/exp/__init__.py b/psegs/exp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/psegs/exp/fused_lidar_flow.py b/psegs/exp/fused_lidar_flow.py
new file mode 100644
index 0000000..a04d74d
--- /dev/null
+++ b/psegs/exp/fused_lidar_flow.py
@@ -0,0 +1,3895 @@
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import os
+
+import attr
+import numpy as np
+import pandas as pd
+
+from oarphpy import util as oputil
+
+from psegs import datum
+from psegs import util
+from psegs.conf import C
+from psegs.spark import Spark
+from psegs.table.sd_table import StampedDatumTableFactory
+
+import numpy as np
+
+###############################################################################
+### Lidar Fusion
+
+
+### Utils & Core Fusion Algo Pieces
+
+def get_point_idx_in_cuboid(cuboid, pc=None, cloud_ego=None):
+  import numpy as np
+
+  given = cloud_ego
+  if cloud_ego is None:
+    assert pc is not None
+    cloud = pc.get_cloud()
+    cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud[:, :3]).T
+  
+  cloud_obj = np.zeros(cloud_ego.shape)
+  xyz_ego = cloud_ego[:, :3]
+  xyz_obj = cuboid.obj_from_ego.get_inverse().apply(xyz_ego).T # TODO check with bev plots ... #######
+  cloud_obj[:, :3] = xyz_obj[:, :3]
+  cloud_obj[:, 3:] = cloud_ego[:, 3:]
+
+#     print('cuboid.obj_from_ego', cuboid.obj_from_ego.translation)
+#     print(cuboid.track_id, 'cuboid.obj_from_ego', cuboid.obj_from_ego.translation, 'cloud_obj', np.mean(cloud_obj, axis=0))
+  
+  # Filter to just object
+  hl, hw, hh = .5 * cuboid.length_meters, .5 * cuboid.width_meters, .5 * cuboid.height_meters
+  in_box = (#np.where(
+      (cloud_obj[:, 0] >= -hl) & (cloud_obj[:, 0] <= hl) &
+      (cloud_obj[:, 1] >= -hw) & (cloud_obj[:, 1] <= hw) &
+      (cloud_obj[:, 2] >= -hh) & (cloud_obj[:, 2] <= hh))
+#     print(in_box, hl, hw, hh, np.mean(cloud_obj, axis=0))
+#     print('in_box', in_box[0].sum())
+  return in_box, cloud_obj
+    # cloud_obj = cloud_obj[in_box]
+    
+    # return cloud_obj
+
+def _move_clouds_to_ego_and_concat(point_clouds, camera_images=None):
+  clouds_ego = []
+  for pc in point_clouds:
+    c = pc.get_cloud()[:, :3] # TODO: can we keep colors?
+    c_ego = pc.ego_to_sensor.get_inverse().apply(c).T
+
+    if camera_images:
+      from psegs import datum
+      c_ego = datum.PointCloud.paint_ego_cloud(c_ego, camera_images=camera_images)
+
+    clouds_ego.append(c_ego)
+  if clouds_ego:
+    cloud_ego = np.vstack(clouds_ego)
+  else:
+    if camera_images:
+      cloud_ego = np.zeros((0, 6))
+    else:
+      cloud_ego = np.zeros((0, 3))
+  return cloud_ego
+
+
+
+###############################################################################
+### Output Data Structures and Utils
+
+
+def draw_flow(img, flow, step=8):
+  """Based upon OpenCV sample: https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py"""
+  import cv2
+  h, w = img.shape[:2]
+  y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
+  fx, fy = flow[y,x].T
+  lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
+  lines = np.int32(lines + 0.5)
+  vis = img.copy()
+  cv2.polylines(vis, lines, 0, (0, 255, 0))
+  for (x1, y1), (_x2, _y2) in lines:
+      cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
+  return vis
+
+
+def warp_flow_forwards(img, flow):
+    """Given an image, apply the given optical flow `flow`.  Returns not only the warped
+    image, but a `mask` indicating warped pixels (i.e. there was non-zero flow *into* these pixels ).
+    With some help from https://stackoverflow.com/questions/41703210/inverting-a-real-valued-index-grid/46009462#46009462
+    """
+    h, w = img.shape[:2]
+    pts = flow.copy()
+    pts[:, :, 0] += np.arange(w)
+    pts[:, :, 1] += np.arange(h)[:, np.newaxis]
+    exclude = (flow[:, :, :2] == np.array([0, 0])).all(axis=-1)
+    if exclude.all():
+        # No flow anywhere!
+        return img.copy(), np.zeros((h, w)).astype(np.bool)
+    else:
+        inpts = pts[~exclude]
+    
+    from scipy.interpolate import griddata
+    inpts = np.reshape(inpts, [-1, 2])
+    grid_y, grid_x = np.mgrid[:h, :w]
+    chan_out = []
+    for ch in range(img.shape[-1]):
+        spts = img[:, :, ch][~exclude].reshape([-1, 1])
+        mapped = griddata(inpts, spts, (grid_x, grid_y), method='linear')
+        chan_out.append(mapped.astype(img.dtype))
+    out = np.stack(chan_out, axis=-1)
+    out = out.reshape([h, w, len(chan_out)])
+
+    # mask = np.reshape(inpts, [-1, 2])
+    # mask = np.rint(mask).astype(np.int)
+    # mask = mask[np.where((mask[:, 0] >= 0) & (mask[:, 0] < w) & (mask[:, 1] >= 0) & (mask[:, 1] < h))]
+    # valid_mask = np.zeros((h, w))
+    # valid_mask[mask[:, 1], mask[:, 0]] = 1
+    
+    # return out, valid_mask.astype(np.bool)
+    return out
+
+
+def viz_oflow_pair(ci1, ci2, uvdvis1, uvdvis2, v2v_flow=None):
+  from psegs.util.plotting import draw_xy_depth_in_image
+
+  im1 = ci1.image
+  im2 = ci2.image
+
+  uvd1 = uvdvis1[uvdvis1[:, -1] == 1, :3]
+  uvd2 = uvdvis2[uvdvis2[:, -1] == 1, :3]
+
+  def put_label(img, s):
+    import cv2
+    FONT_SCALE = 0.8
+    FONT = cv2.FONT_HERSHEY_PLAIN
+    PADDING = 2 # In pixels
+
+    ret = cv2.getTextSize(s, FONT, fontScale=FONT_SCALE, thickness=1)
+    ((text_width, text_height), _) = ret
+
+    cv2.putText(
+        img,
+        s,
+        (10, 10),
+        FONT,
+        FONT_SCALE,
+        (128, 128, 128), # text_color
+        1) # thickness
+
+  debug_im1_uvd1 = im1.copy()
+  draw_xy_depth_in_image(debug_im1_uvd1, uvd1)
+  put_label(debug_im1_uvd1, 'img1+cloud1')
+  
+  debug_im1_uvd2 = im1.copy()
+  draw_xy_depth_in_image(debug_im1_uvd2, uvd2)
+  put_label(debug_im1_uvd2, 'img1+cloud2')
+  
+  debug_im2_uvd1 = im2.copy()
+  draw_xy_depth_in_image(debug_im2_uvd1, uvd1)
+  put_label(debug_im2_uvd1, 'img2+cloud2')
+  
+  debug_im2_uvd2 = im2.copy()
+  draw_xy_depth_in_image(debug_im2_uvd2, uvd2)
+  put_label(debug_im2_uvd2, 'img2+cloud2')
+
+  debug_rows = [
+    np.concatenate([debug_im1_uvd1, debug_im1_uvd2], axis=1),
+    np.concatenate([debug_im2_uvd1, debug_im2_uvd2], axis=1),
+  ]
+
+  if v2v_flow is not None:
+    debug_im1_flow = draw_flow(im1.copy(), v2v_flow)
+    debug_im1_warped = warp_flow_forwards(im1.copy(), v2v_flow)
+    debug_rows += [
+      np.concatenate([debug_im1_flow, debug_im1_warped], axis=1),
+    ]
+  
+  debug_full = np.concatenate(debug_rows, axis=0)
+  return debug_full
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class RenderedCloud(object):
+  sample_id = attr.ib(default=0)
+  ego_pose_uri = attr.ib(
+    type=datum.URI, default=None, converter=datum.URI.from_str)
+  
+  uvdvis = attr.ib(type=np.ndarray, default=None)
+
+  ci_uris = attr.ib(default=[])
+  cuboids_uris = attr.ib(default=[])
+  pc_uris = attr.ib(default=[])
+
+  def to_html(self):
+    
+    def mmm(col):
+      return (col.min(), col.max(), col.mean())
+
+    rows = [
+      ['sample_id',     '', self.sample_id],
+      ['ego_pose_uri',  '', str(self.ego_pose_uri)],
+      ['uvdvis',        'shape', self.uvdvis.shape],
+      ['uvdvis', 'u min | max | mean', mmm(self.uvdvis[:, 0])],
+      ['uvdvis', 'v min | max | mean', mmm(self.uvdvis[:, 1])],
+      ['uvdvis', 'd min | max | mean', mmm(self.uvdvis[:, 2])],
+      ['uvdvis', 'num visible', np.sum(self.uvdvis[:, 3] == 1)]
+    ]
+    rows += [['Camera Image URIs', '', '']]
+    for uri in self.ci_uris:
+      rows += [['', '', str(uri)]]
+    rows += [['Cuboids URIs', '', '']]
+    for uri in self.cuboids_uris:
+      rows += [['', '', str(uri)]]
+    rows += [['Point Cloud URIs', '', '']]
+    for uri in self.pc_uris:
+      rows += [['', '', str(uri)]]
+
+    from tabulate import tabulate
+    return tabulate(rows, tablefmt="html")
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class FlowRecord(object):
+
+  uri = attr.ib(
+    type=datum.URI, default=None, converter=datum.URI.from_str)
+
+  uri_key = attr.ib(default=str(datum.URI()))
+
+  clouds = attr.ib(default=[])
+
+  u_min = attr.ib(default=0.0, type=float)
+  u_max = attr.ib(default=0.0, type=float)
+  v_min = attr.ib(default=0.0, type=float)
+  v_max = attr.ib(default=0.0, type=float)
+
+  def num_clouds(self):
+    return len(self.clouds)
+  
+  def to_optical_flow(self):
+    # Only support pairs of images for now
+    assert self.num_clouds() == 2, self.num_clouds()
+    assert self.u_min == 0, self.u_min
+    assert self.v_min == 0, self.v_min
+
+    h, w = int(self.v_max), int(self.u_max)
+    uvdvis1 = self.clouds[0].uvdvis
+    uvdvis2 = self.clouds[1].uvdvis
+    visible_both = ((uvdvis1[:, -1] == 1) & (uvdvis2[:, -1] == 1))
+    visboth_uv1 = uvdvis1[visible_both, :2]
+    visboth_uv2 = uvdvis2[visible_both, :2]
+    ij1 = np.rint(visboth_uv1[:, (0, 1)])
+    ij_flow = np.hstack([
+      ij1, visboth_uv2 - visboth_uv1
+    ])
+    v2v_flow = np.zeros((h, w, 2))
+    xx = ij_flow[:, 0].astype(np.int)
+    yy = ij_flow[:, 1].astype(np.int)
+    v2v_flow[yy, xx] = ij_flow[:, 2:4]
+
+    return v2v_flow
+  
+  def get_debug_image(self, camera_images=[]):
+    if camera_images:
+      # Only support optical flow pairs for now
+      assert len(camera_images) == 2, len(camera_images)
+      ci1, ci2 = camera_images
+      uvdvis1 = self.clouds[0].uvdvis
+      uvdvis2 = self.clouds[1].uvdvis
+      v2v_flow = self.to_optical_flow()
+      return viz_oflow_pair(ci1, ci2, uvdvis1, uvdvis2, v2v_flow=v2v_flow)
+    else:
+      # TODO pure cloud viz
+      return None
+    
+  def to_html(self, camera_images=[]):
+
+    from tabulate import tabulate
+    rows = [
+      ['uri', str(self.uri)],
+      ['u min/max', (self.u_min, self.u_max)],
+      ['v min/max', (self.v_min, self.v_max)],
+    ]
+    core_html = tabulate(rows, tablefmt="html")
+
+    debug_img = self.get_debug_image(camera_images=camera_images)
+    if debug_img is not None:
+      from oarphpy.plotting import img_to_img_tag
+      debug_img_html = img_to_img_tag(debug_img, format='png')
+    else:
+      debug_img_html = ''
+
+    HTML = """
+      FlowRecord<br/>
+      {core}<br/>
+      {debug_img_html}<br/><br/>
+
+      Clouds<br/>
+      {clouds}
+      """.format(
+            core=core_html,
+            debug_img_html=debug_img_html,
+            clouds="<br/>".join(c.to_html() for c in self.clouds))
+    return HTML
+
+
+    
+
+
+
+    # rows += [['Clouds', '']]
+    # for cloud in self.clouds:
+    #   rows += [['', cloud.to_html()]]
+    
+
+
+    #   ['ego_pose_uri',  '', str(self.ego_pose_uri)],
+    #   ['uvdvis',        'shape', self.uvdvis.shape],
+    #   ['uvdvis', 'u min | max | mean', mmm(self.uvdvis[:, 0])],
+    #   ['uvdvis', 'v min | max | mean', mmm(self.uvdvis[:, 1])],
+    #   ['uvdvis', 'd min | max | mean', mmm(self.uvdvis[:, 2])],
+    #   ['uvdvis', 'num visible', np.sum(self.uvdvis[:, 3] == 1)]
+    # ]
+
+RENDERED_CLOUD_PROTO = RenderedCloud(
+                        sample_id=(1 << 65), # Force bigint
+                        ego_pose_uri= datum.URI_PROTO,
+                        uvdvis=       np.ones((10, 4), dtype=np.float32),
+                        ci_uris=      [datum.URI_PROTO],
+                        cuboids_uris= [datum.URI_PROTO],
+                        pc_uris=      [datum.URI_PROTO])
+  
+FLOW_RECORD_PROTO = FlowRecord(
+                      uri=datum.URI_PROTO,
+                      clouds=[RENDERED_CLOUD_PROTO])
+
+
+
+
+# def iter_cleaned_world_clouds(SD_Table, task):
+#     pcs = [T.from_row(rr) for rr in task.pcs]
+#     cuboids = [T.from_row(c) for c in task.cuboids]
+#     for pc in pcs:
+        
+#         # for nusc we gotta filter the returns off ego vehicle !!!!!!!!!!!! -- note we may get these in lidarseg
+#         cloud = pc.cloud[:, :3]
+# #         cloud = cloud[np.where(  ~(
+# #                         (cloud[:, 0] <= 1.5) & (cloud[:, 0] >= -1.5) &  # Nusc lidar +x is +right
+# #                         (cloud[:, 1] <= 2.5) & (cloud[:, 0] >= -2.5) &  # Nusc lidar +y is +forward
+# #                         (cloud[:, 1] <= 1.5) & (cloud[:, 0] >= -1.5)   # Nusc lidar +z is +up
+# #         ))]
+#         # KITTI EDIT
+        
+        
+#         cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud[:, :3]).T
+    
+#         # Filter out all cuboids
+#         n_before = cloud_ego.shape[0]
+#         for cuboid in cuboids:
+#             xform = cuboid.obj_from_ego.get_inverse() # TODO check with bev plots ...
+#             cloud_obj = xform.apply(cloud_ego).T 
+    
+#             # Filter to just object
+#             hl, hw, hh = .5 * cuboid.length_meters, .5 * cuboid.width_meters, .5 * cuboid.height_meters
+#             outside_box = np.where(
+#                     np.logical_not(
+#                         (cloud_obj[:, 0] >= -hl) & (cloud_obj[:, 0] <= hl) &
+#                         (cloud_obj[:, 1] >= -hw) & (cloud_obj[:, 1] <= hw) &
+#                         (cloud_obj[:, 2] >= -hh) & (cloud_obj[:, 2] <= hh)))
+#             cloud_obj = cloud_obj[outside_box]
+            
+#             cloud_ego = xform.get_inverse().apply(cloud_obj).T
+        
+#         T_world_to_ego = pc.ego_pose
+#         cloud_world = T_world_to_ego.apply(cloud_ego).T # why is this name backwards?? -- hmm works for nusc too
+
+#         print('filtered', cloud_world.shape[0] - n_before)
+#         yield cloud_world
+    
+# # iclouds = culi_tasks_df.repartition(5000).rdd.flatMap(iter_cleaned_world_clouds).toLocalIterator(prefetchPartitions=True) # KITTI EDIT iterator
+
+class SampleDFFactory(object):
+  """Adapt a `StampedDatumTable` to a table of "samples" where each task has
+  all point clouds, cuboids, and camera images associated with a specific
+  time point or event.  (Some datasets, like KITTI and Waymo OD, refer to
+  these as "frames"; we use the word "sample" to distinguish these groupings
+  from the unrelated frames-of-reference e.g. lidar frame, world frame, etc).
+
+  Each row in a Sample DataFrame contains sensor data and labels for a single
+  point in time.  Since different sensors record asynchronously (and at 
+  diferrent rates), each sample is essentially a synchronization (a grouping)
+  of the sensor data.  Each dataset needs to have data syncrhonized
+  differently.
+
+  The (integer) Sample IDs express the temporal order of consecutive
+  samples in a segment.  Sample IDs are in chronological order: sample S+1
+  should contain data for an event one time-step after sample S. The IDs need
+  not be dense (there can be gaps e.g. 1, 2, 3, 7, 8, 9) but any gaps may
+  impact downstream users.
+
+  Create a DataFrame here (vs an RDD) so that it's cheap to omit columns / 
+  sensors when needed.  Moreover, pairings of samples (e.g. for Flow) can be
+  done more efficiently using a DataFrame.
+  """
+
+  SRC_SD_TABLE = None
+
+  @classmethod
+  def table_schema(cls):
+    """Return a copy of the expected table schema.  Subclasses only need this
+    in rare cases, e.g. if one of the columns will always be empty / null"""
+    if not hasattr(cls, '_schema'):
+      from psegs.datum.stamped_datum import STAMPED_DATUM_PROTO
+      from oarphpy.spark import RowAdapter
+      from pyspark.sql import Row
+      PROTO_ROW = Row(
+                    sample_id=0,
+                    pc_sds=[STAMPED_DATUM_PROTO],
+                    cuboids_sds=[STAMPED_DATUM_PROTO],
+                    ci_sds=[STAMPED_DATUM_PROTO])
+      cls._schema = RowAdapter.to_schema(PROTO_ROW)
+    return cls._schema
+
+  @classmethod
+  def build_df_for_segment(cls, spark, segment_uri):
+    """The DF should have rows like:
+    Row(sample_id | list[Point_cloud] | list[cuboids] | list[camera_image])"""
+    raise NotImplementedError()
+
+
+
+
+
+class CloudFuser(object):
+  """
+  read SD table and emit a topic lidar|objects_fused ; write plys to disk
+
+  """
+
+  FUSER_ALGO_NAME = 'naive_fuser'
+
+  FUSE_OBJ_INCLUDE_RGB = True
+
+  @classmethod
+  def get_fused_obj_sds(cls, spark, segment_uri, sample_df):
+    
+    # Maybe build fused objects if we have not already
+    requested_track_ids = cls._get_track_ids(sample_df)
+    seg_index = cls._get_seg_index(segment_uri)
+    if seg_index is not None:
+      have_track_ids = set(seg_index['track_id'])
+    else:
+      have_track_ids = set()
+
+    track_ids_to_build = set(requested_track_ids) - set(have_track_ids)
+    if track_ids_to_build:
+      util.log.info(
+        "Sample has %s objects, have %s objects, building %s objects ..." % (
+        len(requested_track_ids), len(have_track_ids), len(track_ids_to_build)))
+      cls._build_fused_for_tracks(
+        spark, segment_uri, track_ids_to_build, sample_df)
+      util.log.info("... done building fused object clouds.")
+    
+    # Now build and return StamptedDatum flyweights
+    seg_index = cls._get_seg_index(segment_uri)
+    util.log.info("Using fused object clouds: %s" % str(seg_index))
+    datums = []
+    for _, row in seg_index.iterrows():
+      track_id = str(row['track_id'])
+      cloud_path = row['path']
+      n_points = row['n_points']
+
+      uri = copy.deepcopy(segment_uri)
+      uri.topic = 'lidar|objects_fused|' + cls.FUSER_ALGO_NAME
+      uri.track_id = track_id
+
+      def _load_cloud(path=cloud_path): # force capture by copy
+        import open3d as o3d
+        import numpy as np
+        pcd = o3d.io.read_point_cloud(str(path))
+        cloud = np.asarray(pcd.points)
+        if pcd.has_colors():
+          cloud = np.hstack([cloud, np.asarray(pcd.colors).astype(np.float32)])
+        return cloud
+
+      if n_points > 0:
+        cloud_factory = _load_cloud
+      else:
+        cloud_factory = lambda: np.zeros((0, 3))
+
+      pc = datum.PointCloud(
+        sensor_name=uri.topic + '|' + track_id,
+        timestamp=uri.timestamp,
+        cloud_factory=cloud_factory,
+        ego_to_sensor=datum.Transform(), # Hack! cloud is in world frame
+        ego_pose=datum.Transform(),
+        extra={'track_id': track_id})
+      datums.append(datum.StampedDatum(uri=uri, point_cloud=pc))
+    return datums
+
+  ## Utils
+
+  @classmethod
+  def obj_cloud_base_path(cls):
+    return C.DATA_ROOT / 'fused_obj_clouds' / cls.FUSER_ALGO_NAME
+  
+  @classmethod
+  def obj_cloud_seg_basepath(cls, segment_uri):
+    return (cls.obj_cloud_base_path() / 
+              segment_uri.dataset / segment_uri.split / 
+              segment_uri.segment_id )
+
+  @classmethod
+  def obj_cloud_path(cls, segment_uri, tag):
+    from slugify import slugify
+    base_path = cls.obj_cloud_seg_basepath(segment_uri)
+    fname = 'fused_obj.%s.ply' % slugify(tag)
+    return base_path / fname
+
+  @classmethod
+  def obj_cloud_idx_path(cls, segment_uri):
+    base_path = cls.obj_cloud_seg_basepath(segment_uri)
+    return base_path / "cloud_idx.csv"
+
+  ## Support
+
+  @classmethod
+  def _get_seg_index(cls, segment_uri):
+    path = cls.obj_cloud_idx_path(segment_uri)
+    if path.exists():
+      return pd.read_csv(path)
+    else:
+      return None
+
+  @classmethod
+  def _get_track_ids(cls, sample_df):
+    from pyspark.sql import functions as F
+    sd_df = sample_df.select(F.explode(F.col('cuboids_sds')))
+    cuboid_df = sd_df.select(F.explode(F.col('col.cuboids')))
+    track_id_df = cuboid_df.select('col.track_id')
+    return set(r.track_id for r in track_id_df.collect())
+  
+  @classmethod
+  def _build_fused_for_tracks(
+          cls, spark, segment_uri, track_ids_to_build, sample_df):
+
+    if not cls.FUSE_OBJ_INCLUDE_RGB:
+      sample_df.select('sample_id', 'pc_sds', 'cuboids_sds')
+    
+    from pyspark.sql import functions as F
+    sd_df = sample_df.select(F.explode(F.col('cuboids_sds')))
+    cuboid_df = sd_df.select(F.explode(F.col('col.cuboids')))
+    track_df = cuboid_df.select('col.track_id', 'col.category_name')
+    track_id_to_category = dict(
+      (r.track_id, r.category_name) for r in track_df.collect())
+
+    from pyspark.accumulators import AccumulatorParam
+    from collections import Counter
+    class CounterAccumulator(AccumulatorParam):
+      def zero(self, value):
+        return Counter({})
+      def addInPlace(self, value1, value2):
+        return value1 + value2
+
+    sc = spark.sparkContext
+    C_acc = sc.accumulator(Counter(), CounterAccumulator())
+
+    class IterObjCloudKV(object):
+      def __init__(self, C_acc):
+        self.C_acc = C_acc
+
+      def __call__(self, sample_row):
+        import itertools
+        from oarphpy.spark import RowAdapter
+        from collections import Counter
+        FROM_ROW = RowAdapter.from_row
+        t = MyT(name='process_sample_row')
+        t.start_block()
+        counter = Counter()
+
+        from threadpoolctl import threadpool_limits
+        with threadpool_limits(limits=1, user_api='blas'):
+
+          cis = []
+          if hasattr(sample_row, 'ci_sds'):
+            cis = [FROM_ROW(rr).camera_image for rr in sample_row.ci_sds]
+          pcs = [FROM_ROW(rr).point_cloud for rr in sample_row.pc_sds]
+          cloud_ego = _move_clouds_to_ego_and_concat(pcs, camera_images=cis)
+
+          cuboid_sds = [
+            FROM_ROW(cu) for cu in sample_row.cuboids_sds
+          ]
+          cuboids = list(itertools.chain.from_iterable(
+            (cu for cu in sd.cuboids if cu.track_id in track_ids_to_build)
+            for sd in cuboid_sds))
+
+          for cuboid in cuboids:
+            in_box, cloud_obj = get_point_idx_in_cuboid(cuboid, cloud_ego=cloud_ego)
+            cloud_obj = cloud_obj[in_box]
+
+            # TODO: add sample_id as a column?
+
+            t.update_tallies(n=1, num_bytes=cloud_obj.nbytes)
+            yield (cuboid.track_id, cloud_obj)
+        
+        t.stop_block()
+        counter['n_point_clouds'] += len(pcs)
+        counter['n_camera_images'] += len(cis)
+        counter['n_cuboids'] += len(cuboids)
+        counter['cloud_ego_MBytes'] += 1e-6 * cloud_ego.nbytes
+        counter['t_process_sample_row'] = t
+        self.C_acc += counter
+
+    def concat_obj_clouds(c1, c2):
+      return np.vstack([c1, c2])
+
+    import threading
+    exit_event = threading.Event()
+    def spin_log():
+      REPORT_EVERY_SEC = 10
+      import time
+      start_wait = time.time()
+      while not exit_event.is_set():
+        import pprint
+        if time.time() - start_wait >= REPORT_EVERY_SEC:
+          util.log.info(pprint.pformat(C_acc.value))
+          start_wait = time.time()
+        time.sleep(0.5)
+    bkg_th = threading.Thread(target=spin_log, args=())
+    bkg_th.daemon = True
+    bkg_th.start()
+
+    from pyspark import StorageLevel
+    iter_obj_cloud_kv = IterObjCloudKV(C_acc)
+    track_obj_rdd = sample_df.rdd.flatMap(iter_obj_cloud_kv)
+    track_obj_rdd = track_obj_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+    tid_to_obj_cloud_rdd = track_obj_rdd.reduceByKey(concat_obj_clouds)
+
+    t = MyT(name='save_fused_objects', n_total=len(track_ids_to_build))
+    t.start_block()
+    idx_rows = []
+    for tid_cloud in tid_to_obj_cloud_rdd.toLocalIterator():
+      track_id, obj_cloud = tid_cloud
+      category = track_id_to_category[track_id]
+
+      n_points = obj_cloud.shape[0]
+      dest_path = cls.obj_cloud_path(segment_uri, track_id + '.' + category)
+      oputil.mkdir(dest_path.parent)
+      
+      if n_points > 0:
+        import open3d as o3d
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(obj_cloud[:, :3])
+        if obj_cloud.shape[-1] > 3:
+          pcd.colors = o3d.utility.Vector3dVector(obj_cloud[:, 3:] / 256.)
+        o3d.io.write_point_cloud(str(dest_path), pcd)
+        util.log.info("... saved fused %s to %s ..." % (track_id, dest_path))
+
+      idx_row = {
+        'track_id': str(track_id),
+        'category': category,
+        'n_points': n_points,
+        'cloud_shape': obj_cloud.shape,
+        'cloud_MBytes': 1e-6 * oputil.get_size_of_deep(obj_cloud),
+        'path': dest_path,
+      }
+      idx_rows.append(idx_row)
+
+      t.update_tallies(n=1, num_bytes=obj_cloud.nbytes, new_block=True)
+      t.maybe_log_progress(every_n=20)
+    util.log.info("... wrote clouds, stats: %s" % str(t))
+
+    exit_event.set()
+    bkg_th.join()
+
+    import pandas as pd
+    seg_index = pd.DataFrame(idx_rows)
+    existing_seg_index = cls._get_seg_index(segment_uri)
+    if existing_seg_index is not None:
+      seg_index = pd.concat([seg_index, existing_seg_index])
+    seg_index.to_csv(cls.obj_cloud_idx_path(segment_uri))
+
+    util.log.info("Saved fused clouds. Wrote %2.f MBytes" % (
+      seg_index['cloud_MBytes'].sum()))
+    util.log.info("Stats:")
+    with pd.option_context('display.max_colwidth', None):
+      util.log.info(str(seg_index))
+    
+  
+
+
+  ### Subclass API
+
+  @classmethod
+  def _should_build_world_cloud(cls, segment_uri):
+    return False # hacks no more build to disk
+    # return not cls.world_cloud_path(segment_uri).exists()
+
+  @classmethod
+  def _should_build_obj_clouds(cls, segment_uri):
+    if not cls.HAS_OBJ_CLOUDS:
+      return False
+    seg_basepath = cls.obj_cloud_seg_basepath(segment_uri)
+    return oputil.missing_or_empty(str(seg_basepath))
+
+
+  ## Utils
+
+  # @classmethod
+  # def SRC_SD_T(cls):
+  #   return cls.FUSED_LIDAR_SD_TABLE.SRC_SD_TABLE
+
+  # @classmethod
+  # def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):
+  #   # "need RDD of Row(task_id | list[Point_cloud] | list[cuboids])"
+  #   df = cls.FUSED_LIDAR_SD_TABLE.build_df_for_segment(spark, segment_uri)
+  #   df = df.select('task_id', 'point_clouds', 'cuboids')
+  #   T = cls.SRC_SD_T()
+  #   unpacked_rdd = df.rdd.map(T.from_row)
+  #   return unpacked_rdd
+
+  ## World Cloud Fusion
+
+  @classmethod
+  def _filter_ego_vehicle(cls, cloud_ego):
+    """Optionally filter self-returns in cloud in the ego frame for some
+    datasets (e.g. NuScenes)"""
+    return cloud_ego
+
+  @classmethod
+  def _get_cleaned_world_cloud(cls, point_clouds, cuboids):
+    cleaned_clouds = []
+    pruned_counts = []
+    for pc in point_clouds:
+      cloud = pc.get_cloud()[:, :3] # TODO: can we keep colors?
+      cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud).T
+    
+      cloud_ego = cls._filter_ego_vehicle(cloud_ego)
+
+      # Filter out all cuboids
+      n_before = cloud_ego.shape[0]
+      for cuboid in cuboids:
+        in_box, _ = get_point_idx_in_cuboid(cuboid, cloud_ego=cloud_ego)
+        cloud_ego = cloud_ego[~in_box]
+      n_after = cloud_ego.shape[0]
+
+      T_world_to_ego = pc.ego_pose
+      cloud_world = T_world_to_ego.apply(cloud_ego).T # why is this name backwards?? -- hmm works for nusc too
+
+      cleaned_clouds.append(cloud_world)
+      pruned_counts.append(n_before - n_after)
+    return np.vstack(cleaned_clouds), pruned_counts
+
+  @classmethod
+  def _task_to_clean_world_cloud(cls, task_row):
+    pcs = task_row.point_clouds
+    cuboids = task_row.cuboids
+    world_cloud, pruned_counts = cls._get_cleaned_world_cloud(pcs, cuboids)
+    return world_cloud, pruned_counts
+
+
+  # Object Cloud Fusion
+
+  
+
+  ### Core Impl
+
+  ## World Clouds
+
+  @classmethod
+  def world_clouds_base_path(cls):
+    return C.DATA_ROOT / 'fused_world_clouds' / cls.FUSER_ALGO_NAME
+
+  @classmethod
+  def world_cloud_path(cls, segment_uri):
+    return (cls.world_clouds_base_path() / 
+              segment_uri.dataset / segment_uri.split / 
+              segment_uri.segment_id / 'fused_world.ply')
+
+  @classmethod
+  def _build_world_cloud(cls, spark, segment_uri, culi_tasks_rdd):
+    if not cls._should_build_world_cloud(segment_uri):
+      return
+    
+    dest_path = cls.world_cloud_path(segment_uri)
+    oputil.mkdir(dest_path.parent)
+    util.log.info("Building world cloud to %s ..." % dest_path)
+
+    n_tasks = culi_tasks_rdd.count()
+    util.log.info("... fusing %s tasks ..." % n_tasks)
+    world_cloud_rdd = culi_tasks_rdd.map(cls._task_to_clean_world_cloud)
+    
+    # Force fusion before we pull clouds to the driver (prevent an OOM)
+    from pyspark import StorageLevel
+    world_cloud_rdd = world_cloud_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+    t = oputil.ThruputObserver(name='FuseWorldClouds', n_total=n_tasks)
+    t.start_block()
+    n_bytes_n_pruned_rdd = world_cloud_rdd.map(
+      lambda c_pc: (oputil.get_size_of_deep(c_pc[0]), c_pc[1]))
+    n_bytes_n_pruned = n_bytes_n_pruned_rdd.collect()
+    n_bytes = sum(nn[0] for nn in n_bytes_n_pruned)
+    t.stop_block(n=n_tasks, num_bytes=n_bytes)    
+    t.maybe_log_progress(every_n=1)
+    n_pruned = np.array([nn[1] for nn in n_bytes_n_pruned])
+    util.log.info("Total points pruned: %s" % np.sum(n_pruned))
+    util.log.info("Avg pts pruned per cloud: %s" % np.mean(n_pruned))
+
+    iclouds = world_cloud_rdd.toLocalIterator(prefetchPartitions=True)
+    iclouds = oputil.ThruputObserver.to_monitored_generator(
+                iclouds, name='CollectWorldClouds',
+                log_freq=500, n_total=n_tasks, log_on_del=True) # fixme log_on_del!~~~~~~~~~
+    
+    # Pull one partition at a time to avoid a driver OOM
+    clouds = list(iclouds)#world_cloud_rdd.collect()
+    if len(clouds) > 0:
+      world_cloud = np.vstack(clouds)
+    else:
+      world_cloud = np.zeros((0, 3))
+    util.log.info(
+      "... computed world cloud for %s of shape %s (%.2f GB) ..." % (
+        segment_uri.segment_id, world_cloud.shape,
+        1e-9 * oputil.get_size_of_deep(world_cloud)))
+    
+    util.log.info("... writing ply to %s ..." % dest_path)
+    import open3d as o3d
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(world_cloud)
+    o3d.io.write_point_cloud(str(dest_path), pcd)
+    util.log.info("... done writing ply.")
+
+  ## Object Clouds
+
+  
+
+  @classmethod
+  def _build_object_clouds(cls, spark, segment_uri, culi_tasks_rdd):
+    if not cls._should_build_obj_clouds(segment_uri):
+      return
+
+    # Map task rows to rows of (partial) obj cloud s.  We create a dataframe
+    # from the result because it will better help Spark budget memory.
+    util.log.info("Pruning object clouds ...")
+    obj_cloud_row_rdd = culi_tasks_rdd.flatMap(cls._task_to_obj_cloud_rows)
+    obj_cloud_df = spark.createDataFrame(obj_cloud_row_rdd)
+    obj_cloud_df = obj_cloud_df.persist()
+    n_rows = obj_cloud_df.count()
+    n_tracks = obj_cloud_df.select('track_id').distinct().count()
+    util.log.info("... have %s clouds of %s objects to fuse ..." % (
+      n_rows, n_tracks))
+    
+    
+    # Now fuse object clouds and save to disk
+    seg_basepath = cls.obj_cloud_seg_basepath(segment_uri)
+    util.log.info("... fusing obj clouds, saving to %s ..." % seg_basepath)
+    grouped = obj_cloud_df.rdd.groupBy(lambda r: r.track_id)
+    
+    def _fuse_and_save(track_id_irows):
+      track_id, irows = track_id_irows
+      obj_cloud = cls._get_fused_cloud(irows)
+      n_points = obj_cloud.shape[0]
+
+      dest_path = cls.obj_cloud_path(segment_uri, track_id)
+      oputil.mkdir(dest_path.parent)
+      
+      if n_points > 0:
+        import open3d as o3d
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(obj_cloud)
+        o3d.io.write_point_cloud(str(dest_path), pcd)
+
+      idx_row = {
+        'track_id': str(track_id),
+        'n_points': n_points,
+        'cloud_shape': obj_cloud.shape,
+        'cloud_MBytes': 1e-6 * oputil.get_size_of_deep(obj_cloud),
+        'path': dest_path,
+      }
+      return idx_row
+
+    all_idx_rows = grouped.map(_fuse_and_save).collect()
+    idx_df = pd.DataFrame(all_idx_rows)
+
+    util.log.info("Saved fused clouds to %s. Wrote %2.f MBytes" % (
+      seg_basepath, idx_df['cloud_MBytes'].sum()))
+    util.log.info("Stats:")
+
+    with pd.option_context('display.max_colwidth', None):
+      util.log.info(str(idx_df))
+    
+    idx_df.to_csv(cls.obj_cloud_idx_path(segment_uri))
+
+  @classmethod
+  def _build_fused_clouds(cls, spark, segment_uris=None):
+    util.log.info("%s building fused clouds ..." % cls.__name__)
+
+    segment_uris = segment_uris or cls.SRC_SD_T().get_all_segment_uris()
+    n_segs = len(segment_uris)
+    util.log.info("... have %s segments to fuse ..." % n_segs)
+
+    t = oputil.ThruputObserver(name='FuseEachSegment', n_total=n_segs)
+    for suri in segment_uris:
+      t.start_block()# TODO add a log to stop block or give a loop body wrapper ....
+      util.log.info("... working on %s ..." % suri.segment_id)
+
+      need_to_work = (
+        cls._should_build_world_cloud(suri) or 
+        cls._should_build_obj_clouds(suri))
+      if need_to_work:
+        culi_tasks_rdd = cls._get_task_lidar_cuboid_rdd(spark, suri)
+        cls._build_world_cloud(spark, suri, culi_tasks_rdd)
+        cls._build_object_clouds(spark, suri, culi_tasks_rdd)
+      else:
+        util.log.info(
+          "... skipping %s; world and obj clouds done" % suri.segment_id)
+        util.log.info("World Cloud: %s" % cls.world_cloud_path(suri))
+        util.log.info("Obj Clouds: %s" % cls.obj_cloud_seg_basepath(suri))
+      
+      t.stop_block(n=1)
+      t.maybe_log_progress(every_n=1)
+
+    util.log.info("... %s done fusing clouds." % cls.__name__)
+
+  ### StampedDatumTable Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    uris = cls.SRC_SD_T().get_all_segment_uris()
+    uris = [u for u in uris if (u.split in cls.SPLITS)]
+    return uris
+
+  @classmethod
+  def _create_datum_rdds(
+    cls, spark, existing_uri_df=None, only_segments=None):
+
+    if existing_uri_df is not None:
+      util.log.warn("Note: resume mode not supported in %s" % cls.__name__)
+
+    seg_uris = cls.get_all_segment_uris()
+    if only_segments:
+      util.log.info("Filtering to only %s segments" % len(only_segments))
+      seg_uris = [
+          uri for uri in seg_uris
+          if any(
+            suri.soft_matches_segment(uri) for suri in only_segments)
+      ]
+    
+    cls._build_fused_clouds(spark, segment_uris=seg_uris)
+
+    sds = []
+    for seg_uri in seg_uris:
+      # sds.append(cls._create_world_cloud_sd(seg_uri)) # hacks no more world clouds ~~~~~~~~~~~~
+      if cls.HAS_OBJ_CLOUDS:
+        sds.extend(cls._create_obj_cloud_sds(seg_uri))
+    datum_rdds = [spark.sparkContext.parallelize(sds)]
+    return datum_rdds
+
+  @classmethod
+  def _create_world_cloud_sd(cls, segment_uri):
+    uri = copy.deepcopy(segment_uri)
+    uri.topic = 'lidar|world_fused|' + cls.FUSER_ALGO_NAME
+      
+    wcloud_path = cls.world_cloud_path(segment_uri)
+      
+    def _load_cloud(path):
+      import open3d as o3d
+      import numpy as np
+      path = str(path)
+      util.log.info("Reading world cloud %s GB at %s" % (
+        1e-9 * os.path.getsize(path), path))
+      pcd = o3d.io.read_point_cloud(path)
+      return np.asarray(pcd.points)
+
+    pc = datum.PointCloud(
+      sensor_name=uri.topic,
+      timestamp=uri.timestamp,
+      cloud_factory=lambda: _load_cloud(wcloud_path),
+      ego_to_sensor=datum.Transform(), # Hack! cloud is in world frame
+      ego_pose=datum.Transform())
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+
+  @classmethod
+  def _create_obj_cloud_sds(cls, segment_uri):
+    idx_df = pd.read_csv(cls.obj_cloud_idx_path(segment_uri))
+    print('_create_obj_cloud_sds', idx_df)
+    for _, row in idx_df.iterrows():
+      track_id = str(row['track_id'])
+      cloud_path = row['path']
+      n_points = row['n_points']
+
+      uri = copy.deepcopy(segment_uri)
+      uri.topic = 'lidar|objects_fused|' + cls.FUSER_ALGO_NAME
+      uri.track_id = track_id
+
+      def _load_cloud(path=cloud_path): # force capture by copy
+        import open3d as o3d
+        import numpy as np
+        pcd = o3d.io.read_point_cloud(str(path))
+        return np.asarray(pcd.points)
+
+      if n_points > 0:
+        cloud_factory = _load_cloud
+      else:
+        cloud_factory = lambda: np.zeros((0, 3))
+
+      pc = datum.PointCloud(
+        sensor_name=uri.topic + '|' + track_id,
+        timestamp=uri.timestamp,
+        cloud_factory=cloud_factory,
+        ego_to_sensor=datum.Transform(), # Hack! cloud is in world frame
+        ego_pose=datum.Transform(),
+        extra={'track_id': track_id})
+      yield datum.StampedDatum(uri=uri, point_cloud=pc)
+
+
+
+###############################################################################
+### Optical Flow from Fused Lidar
+
+
+###############################################################################
+## FROM PAPER SCRATCH
+
+
+## PSEGS
+
+
+def color_to_opencv(color):
+  r, g, b = np.clip(color, 0, 255).astype(int).tolist()
+  return b, g, r
+
+def rgb_for_distance(d_meters, period_meters=10.):
+  """Given a distance `d_meters` or an array of distances, return an
+  `np.array([r, g, b])` color array for the given distance (or a 2D array
+  of colors if the input is an array)).  We choose a distinct hue every
+  `period_meters` and interpolate between hues for `d_meters`.
+  """
+  from oarphpy.plotting import hash_to_rbg
+
+  if not isinstance(d_meters, np.ndarray):
+    d_meters = np.array([d_meters])
+  
+  SEED = 10 # Colors for 0 and 1 look too similar otherwise
+  max_bucket = int(np.ceil(d_meters.max() / period_meters))
+  bucket_to_color = np.array(
+    [hash_to_rbg(bucket + SEED) for bucket in range(max_bucket + 2)])
+
+  # Use numpy's indexing for fast "table lookup" of bucket ids (bids) in
+  # the "table" bucket_to_color
+  bucket_below = np.floor(d_meters / period_meters)
+  bucket_above = bucket_below + 1
+
+  color_below = bucket_to_color[bucket_below.astype(int)]
+  color_above = bucket_to_color[bucket_above.astype(int)]
+
+  # For each distance, interpolate to *nearest* color based on L1 distance
+  d_relative = d_meters / period_meters
+  l1_dist_below = np.abs(d_relative - bucket_below)
+  l1_dist_above = np.abs(d_relative - bucket_above)
+
+  colors = (
+    (1. - l1_dist_below) * color_below.T + 
+    (1. - l1_dist_above) * color_above.T)
+
+  colors = colors.T
+  if len(d_meters) == 1:
+    return colors[0]
+  else:
+    return colors
+
+def draw_xy_depth_px_in_image(img, pts, alpha=.7):
+  """
+  new!
+  Draw a point cloud `pts` in `img`. Point color interpolates between
+  standard colors for each 10-meter tick.
+
+  Args:
+    img (np.array): Draw in this image.
+    pts (np.array): An array of N by 3 points in form
+      (pixel x, pixel y, depth meters).
+    dot_size (int): Size of the dot to draw for each point.
+    alpha (float): Blend point color using weight [0, 1].
+  """
+
+  import cv2
+
+  # OpenCV can't draw transparent colors, so we use the 'overlay image' trick:
+  # First draw dots an an overlay...
+  overlay = img.copy()
+
+  pts = pts.copy()
+  pts = pts[-pts[:, -1].argsort()]
+    # short by distance descending; let colors of nearer points
+    # override colors of farther points
+  print(pts.shape)
+
+  colors = rgb_for_distance(pts[:, 2])
+  # print(colors.shape)
+  colors = np.clip(colors, 0, 255).astype(int)
+  # print(colors.shape)
+  for i, ((x, y), color) in enumerate(zip(pts[:, :2].tolist(), colors.tolist())):
+    x = int(round(x))
+    y = int(round(y))
+    if y >= overlay.shape[0] or x >= overlay.shape[1]:
+        continue
+    overlay[y, x, :] = color
+#     print(color)
+    
+    # if i > 0 and ((i % 500000) == 0):
+    #     print(i, flush=True)
+
+  # Now blend!
+  img[:] = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+
+
+
+
+
+
+
+
+
+
+
+## Common Support
+
+# collect img1, pose1
+# collect img2, pose2
+# collect all clouds (save in RAM)
+# collect all labels / cuboids
+
+def make_homo(cloud):
+  if cloud.shape[-1] == 4:
+    return cloud
+  else:
+    out = np.ones((cloud.shape[0], 4))
+    out[:, :3] = cloud[:, :3]
+    return out
+
+
+
+import numba
+from numba import jit
+
+@jit(nopython=True)
+def get_nearest_idx(uvd, dist_eps):
+  if uvd.shape[0] == 0:
+    return np.ones(0, dtype=np.int64).flatten()
+
+  max_u = max(0, int(np.rint(np.max(uvd[:, 0]))))
+  max_v = max(0, int(np.rint(np.max(uvd[:, 1]))))
+  nearest = np.full((max_u + 1, max_v + 1, 2), np.Inf)
+
+  # NB: numba accelerates this for loop 10x-100x+
+  for r in range(uvd.shape[0]):
+    d = uvd[r, 2]
+    u = int(np.rint(uvd[r, 0]))
+    v = int(np.rint(uvd[r, 1]))
+    if u >= 0 and v >= 0 and d >= dist_eps and d < nearest[u, v, 1]:
+      nearest[u, v, 1] = d
+      nearest[u, v, 0] = r
+
+  rs = nearest[:, :, 0].flatten()
+  return rs[rs != np.Inf].astype(np.int64)
+
+@jit(nopython=True)
+def get_masked_idx(uvd, uvd_mask):
+  
+  ij_idx = np.zeros((uvd.shape[0], 3), dtype=np.int64)
+  ij_idx[:, :2] = np.rint(uvd[:, (0, 1)])
+  ij_idx[:, 2] = np.arange(uvd.shape[0])
+
+  ijd_mask = np.zeros((uvd.shape[0], 3), dtype=uvd_mask.dtype)
+  ijd_mask[:, :2] = np.rint(uvd_mask[:, (0, 1)])
+  ijd_mask[:, 2] = uvd_mask[:, 3]
+
+  max_i = int(max(ij_idx[:, 0].max(), mask_ij[:, 0].max()))
+  max_j = int(max(ij_idx[:, 1].max(), mask_ij[:, 1].max()))
+
+  im_idx = np.full((max_i + 1, max_j + 1), -1, dtype=np.int64)
+  ii, jj, idx = ij_idx[:, 0], ij_idx[:, 1], ij_idx[:, 2]
+  im_idx[ii, jj] = idx
+    # Now im_idx has a 'pixel' value of 0 or greater for every row in
+    # `uvd`; otherwise, im_idx has 'pixel' value -1
+
+  # NB: numba accelerates this for loop 10x-100x+
+  masked = np.zeros(uvd.shape[0], dtype=np.bool)
+  for r in range(ijd_mask.shape[0]):
+    i = int(ijd_mask[r, 0])
+    j = int(ijd_mask[r, 1])
+    d_mask = ijd_mask[r, 2]
+    idx = im_idx[i, j]
+    if idx >= 0 and d_mask < uvd[idx, 2]:
+      masked[idx] = 1
+        # Since `uvd_mask` has a closer point than `uvd`, this point
+        # in `uvd` is to be masked
+  
+  return masked
+
+
+
+# def world_to_uvd_visible(
+#         camera_pose=np.eye(4),
+#         P=np.eye(4),
+#         image_size=(100, 200),
+#         T_lidar2cam=np.eye(4),
+#         T_ego2lidar=np.eye(4),
+#         world_cloud=np.zeros((0, 3))):
+  
+#   w, h = image_size
+#   xyz_ego_t = np.matmul(camera_pose, world_cloud.T)
+  
+#   uvd = P.dot(T_lidar2cam.dot( T_ego2lidar.dot( xyz_ego_t ) ) )
+#   uvd[0:2, :] /= uvd[2, :]
+#   uvd = uvd.T
+#   uvd = uvd[:, :3]
+
+#   in_cam_frustum = np.where(
+#       (np.rint(uvd[:, 0]) >= 0) & 
+#       (np.rint(uvd[:, 0]) <= w - 1) &
+#       (np.rint(uvd[:, 1]) >= 0) & 
+#       (np.rint(uvd[:, 1]) <= h - 1) &
+#       (uvd[:, 2] >= 0.001))
+
+#   uvd_in_cam = uvd[in_cam_frustum]
+
+#   # Now prune to nearest points
+#   nearest_idx = get_nearest_idx(uvd_in_cam)
+
+#   uvd_visible = np.hstack([uvd, np.zeros((uvd.shape[0], 1))])
+#   idx = np.arange(uvd_visible.shape[0])[in_cam_frustum][nearest_idx]
+#   uvd_visible[idx, -1] += 1
+#       # Visible: in the camera frustum, AND is nearest point for the pixel.
+#       # TODO: Try to interpolate for neighboring pixels?
+
+#   return uvd_visible
+
+ # 100 microns if cloud is in meters
+DEFAULT_MIN_DIST = 0.0001
+
+def render_world_to_uvd_visible(
+        viewer_pose1=np.eye(4),
+        viewer_pose2=np.eye(4),
+        
+        projection='pinhole', # or 'spherical'
+
+        # Use a pinhole camera viewer / projection
+        K=np.eye(4),
+        image_size=(100, 200),
+        
+        # Use a spherical viewer / projection
+        yaw_bin_radians=0.,
+        yaw_limits_radians=(-float('inf'), float('inf')),
+        pitch_bin_radians=0.,
+        pitch_limits_radians=(-float('inf'), float('inf')),
+        
+        # T_lidar2viewer=np.eye(4),
+        # T_ego2lidar=np.eye(4),
+        T_ego2viewer=np.eye(4),
+        world_cloud1=np.zeros((0, 3)),
+        world_cloud2=None, # Or provide a xyz point cloud for cloud in view 2
+        
+        clip_invisible_both=True,
+        min_dist=DEFAULT_MIN_DIST):
+  """
+  Render the given xyz point cloud(s) `world_cloud1` (and optionally
+  `world_cloud2`) in the space of a 'viewer' (e.g. a camera) and
+  return an float32 numpy array (a _table_) of:
+       uvd visible 1          uvd visible 2
+    u1 | v1 | d1 | viz1 | u2 | v2 | d2 | viz2 |
+  For each row in the output:
+   * The float values `u` and `v` are the "horizontal" and "vertical" axes.  
+      For a camera, `u` is the width dimension (x-axis) and `v` is the
+      height dimension (y-axis).  For a spherical projection, `u` is 
+      the left-right (yaw) axis and `v` is the up-down (pitch) axis.
+   * The float value `d` is depth or distance from the viewer's origin.
+      This value has the same units as the world cloud (e.g. meters).
+   * The float value `viz` is 1 if the point is visible from the viewer pose
+      and 0 otherwise (i.e. the point is occluded).
+   * The first columns represent data for the first viewer pose and the last
+      columns represent data for the last viewer pose.
+   * Rows indicate correspondence between points in the two viewer poses--
+      each row in the output is data for the same physical point in both
+      viewer frames.
+   * Points that are invisible in *both* frames will be omitted only if
+      `clip_invisible_both`.
+  
+  """
+  import time
+  start = time.time()
+
+  hworld_cloud1 = make_homo(world_cloud1)
+  xyz_ego_1 = np.matmul(viewer_pose1, hworld_cloud1.T)
+  # xyz_viewer_1 = T_lidar2viewer.dot( T_ego2lidar.dot( xyz_ego_1 ) )
+  xyz_viewer_1 = T_ego2viewer.dot( xyz_ego_1 )
+  
+  if world_cloud2 is None:
+    xyz_ego_2 = np.matmul(viewer_pose2, hworld_cloud1.T)
+  else:
+    assert world_cloud2.shape == world_cloud1.shape, \
+      "Cloud rows must indicate point correspondence!"
+    hworld_cloud2 = make_homo(world_cloud2)
+    xyz_ego_2 = np.matmul(viewer_pose2, hworld_cloud2.T)
+  # xyz_viewer_2 = T_lidar2viewer.dot( T_ego2lidar.dot( xyz_ego_2 ) )
+  xyz_viewer_2 = T_ego2viewer.dot( xyz_ego_2 )
+  # print('in viewer frame', time.time() - start)
+  if projection == 'pinhole':
+
+    #@jit(nopython=True)  ~~ TODO why is jit slower? nb: appears w/out jit does multithread~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    def to_uvdvis(xyz):
+      w, h = image_size
+
+      uvd = K.dot(xyz)
+      uvd[0:2, :] /= uvd[2, :]
+      uvd = uvd.T
+      uvd = uvd[:, :3]
+      
+      in_cam_frustum_idx = np.where(
+        (np.rint(uvd[:, 0]) >= 0) & 
+        (np.rint(uvd[:, 0]) <= w - 1) &
+        (np.rint(uvd[:, 1]) >= 0) & 
+        (np.rint(uvd[:, 1]) <= h - 1) &
+        (uvd[:, 2] >= min_dist)
+      )
+        # NB: we tried to JIT this expression and JIT is slower
+
+      nearest_idx = get_nearest_idx(uvd[in_cam_frustum_idx], min_dist)
+        # Ignoring the out-of-frustum points helps runtime considerably
+
+      uvdvis = np.hstack([uvd, np.zeros((uvd.shape[0], 1))])
+      vis_idx = np.arange(uvd.shape[0])[in_cam_frustum_idx][nearest_idx]
+      uvdvis[vis_idx, -1] = 1
+      return uvdvis
+      
+      
+#       idx = np.arange(uvdij_visible.shape[0])[in_cam_frustum][nearest_idx]
+#     uvdij_visible[idx, -1] += 1
+
+#       in_cam_frustum = np.where(
+#         (uvdij[:, 0] >= 0) & 
+#         (uvdij[:, 0] <= w - 1) &
+#         (uvdij[:, 1] >= 0) & 
+#         (uvdij[:, 1] <= h - 1) &
+#         (uvdij[:, 2] >= 0.01))
+
+#       uvdij_in_cam = uvdij[in_cam_frustum]
+
+
+# #     uvdij, uvdij_in_cam = project_to_uvd(P, pose, hfused_world_cloud, T_lidar2cam, T_ego2lidar, w, h)
+#     print(time.time() - start, 'projected to uvd in cam', uvdij_in_cam.shape, 1e-9 * uvdij_in_cam.nbytes)
+
+    
+# #     print('render using pandas %s ...' % (uvdij_in_cam.shape,))
+# #     import pandas as pd
+# #     start = time.time()
+# #     df = pd.DataFrame(uvdij_in_cam[:, 2:], columns=['d', 'i', 'j'])
+# #     df['id'] = df.index
+# #     nearest_idx = df.groupby(['i', 'j'])['d'].idxmin().to_numpy()
+# #     print(time.time() - start, 'done pandas, %s winners' % (nearest_idx.shape,))
+# #     print(nearest_idx[:10])
+    
+#     print('render using numba %s ...' % (uvdij_in_cam.shape,))
+#     start = time.time()
+#     nearest_idx = get_nearest_idx(uvdij_in_cam)
+#     print(time.time() - start, 'done numba, %s winners' % (nearest_idx.shape,))
+# #     print(nearest_idx[:10])
+    
+#     uvdij_visible = np.hstack([uvdij, np.zeros((uvdij.shape[0], 1))])
+#     idx = np.arange(uvdij_visible.shape[0])[in_cam_frustum][nearest_idx]
+#     uvdij_visible[idx, -1] += 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#       fstart = time.time()
+#       xxx = 
+#       print(xxx.shape)
+#       nearest_idx = get_nearest_idx(uvd[in_cam_frustum == True], min_dist)
+#       print('numba time', time.time() - fstart)
+#       is_nearest = np.zeros(uvd.shape[0], dtype=np.bool) # NB need bool_ for numba: https://github.com/numba/numba/issues/1311
+#       is_nearest[nearest_idx] = 1
+
+#       visible = ( in_cam_frustum & is_nearest ) #np.zeros((uvd.shape[0], 1)) + ( in_cam_frustum & is_nearest)
+#       visible = np.expand_dims(visible, axis=1)
+
+#       uvdvis = np.hstack((uvd, visible.astype(uvd.dtype)))
+      
+#       return uvdvis
+
+    uvdvis1 = to_uvdvis(xyz_viewer_1)
+    uvdvis2 = to_uvdvis(xyz_viewer_2)
+
+  elif projection == 'spherical':
+    raise ValueError('TODO')
+  else:
+    raise ValueError("Unsupported %s" % projection)
+
+  if clip_invisible_both:
+    visible_either = ((uvdvis1[:, -1] == 1) | (uvdvis2[:, -1] == 1))
+    # print('visible_either', visible_either.sum())
+    uvd_viz1_uvd_viz2 = np.hstack([
+      uvdvis1[visible_either], uvdvis2[visible_either]
+    ])
+  else:
+    uvd_viz1_uvd_viz2 = np.hstack([uvdvis1, uvdvis2])
+  # print('done', time.time() - start)
+  return uvd_viz1_uvd_viz2
+
+
+def merge_uvd_viz1_uvd_viz2(
+        uvd_viz1_uvd_viz2_pair1,
+        uvd_viz1_uvd_viz2_pair2,
+        min_dist=DEFAULT_MIN_DIST):
+  """Reduce operation: combine two uvdvis-pairs and return only
+    the nearest points across both pairs."""
+  import time
+  start = time.time()
+
+  # try:
+  # if uvd_viz1_uvd_viz2_pair1.shape[0]:
+  #   print(
+  #     'uvd_viz1_uvd_viz2_pair1',
+  #     uvd_viz1_uvd_viz2_pair1.shape,
+  #     uvd_viz1_uvd_viz2_pair1.max(axis=0),
+  #     uvd_viz1_uvd_viz2_pair1.min(axis=0))
+  # if uvd_viz1_uvd_viz2_pair2.shape[0]:
+  #   print(
+  #     'uvd_viz1_uvd_viz2_pair2',
+  #     uvd_viz1_uvd_viz2_pair2.shape,
+  #     uvd_viz1_uvd_viz2_pair2.max(axis=0),
+  #     uvd_viz1_uvd_viz2_pair2.min(axis=0))
+
+  merged_uvd_viz1_uvd_viz2 = np.vstack([
+    uvd_viz1_uvd_viz2_pair1,
+    uvd_viz1_uvd_viz2_pair2
+  ])
+  # except Exception as e:
+  #   print('asdgadsgs', e, uvd_viz1_uvd_viz2_pair1.shape, uvd_viz1_uvd_viz2_pair2.shape)
+  #   raise e
+
+  def get_nearest_update_visible(uvdvis):
+    is_visible = np.where(uvdvis[:, -1] == 1)
+    nearest_idx = get_nearest_idx(uvdvis[is_visible], min_dist)
+      # Ignore invisible points: they can't _become_ visible
+    vis_idx = np.arange(uvdvis.shape[0])[is_visible][nearest_idx]
+    uvdvis[:, -1] = 0
+    uvdvis[vis_idx, -1] = 1
+
+  uvdvis1 = merged_uvd_viz1_uvd_viz2[:, :4]
+  get_nearest_update_visible(uvdvis1)
+  # nearest_idx = get_nearest_idx(uvdvis1, min_dist)
+  # is_nearest = np.zeros(uvdvis1.shape[0], dtype=np.bool)
+  # is_nearest[nearest_idx] = 1
+  # print('is_nearest', is_nearest.shape)
+  # uvdvis1[:, -1] = ((uvdvis1[:, -1] == 1) & is_nearest)
+  
+  uvdvis2 = merged_uvd_viz1_uvd_viz2[:, 4:]
+  get_nearest_update_visible(uvdvis2)
+  # nearest_idx = get_nearest_idx(uvdvis2, min_dist)
+  # is_nearest = np.zeros(uvdvis2.shape[0], dtype=np.bool)
+  # is_nearest[nearest_idx] = 1
+  # print('is_nearest', is_nearest.shape)
+  # uvdvis2[:, -1] = ((uvdvis2[:, -1] == 1) & is_nearest)
+
+  visible_either = ((uvdvis1[:, -1] == 1) | (uvdvis2[:, -1] == 1))
+  # print('merge visible_either', visible_either.sum())
+  merged_uvd_viz1_uvd_viz2 = np.hstack([
+    uvdvis1[visible_either], uvdvis2[visible_either]
+  ])
+  print('merge in ', time.time() - start)
+  print('merged_uvd_viz1_uvd_viz2', merged_uvd_viz1_uvd_viz2.shape)
+  return merged_uvd_viz1_uvd_viz2
+
+
+  # is_nearest = np.zeros(uvd.shape[0], dtype=np.bool) # NB need bool_ for numba: https://github.com/numba/numba/issues/1311
+  # is_nearest[nearest_idx] = 1
+
+  # visible = ( in_cam_frustum & is_nearest ) #np.zeros((uvd.shape[0], 1)) + ( in_cam_frustum & is_nearest)
+  # visible = np.expand_dims(visible, axis=1)
+
+  # nearest_idx1 = get_nearest_idx(uvdvis1, min_dist)
+  # nearest_idx2 = get_nearest_idx(uvdvis2, min_dist)
+
+
+
+
+  # merged_uvd_viz1_uvd_viz2 = np.vstack([
+  #   uvd_viz1_uvd_viz2_pair1,
+  #   uvd_viz1_uvd_viz2_pair2
+  # ])
+  # nearest_idx = get_nearest_idx(merged_uvd_viz1_uvd_viz2, min_dist)
+  # res = merged_uvd_viz1_uvd_viz2[nearest_idx]
+  # print('merge in ', time.time() - start)
+  # return res
+
+
+# def merge_uvd_nearest(uvd1, uvd2):
+#   uvd = np.vstack([uvd1, uvd2])
+#   nearest_idx = get_nearest_idx(uvd)
+#   return uvd[nearest_idx]
+
+
+def render_oflow_pair(
+      ci1=datum.CameraImage(),
+      ci2=datum.CameraImage(),
+      world_cloud1=np.zeros((0, 3)),
+      world_cloud2=np.zeros((0, 3)), # or None to use world_cloud1
+      mask_world=np.zeros((0, 3))):
+
+  ego_pose1 = ci1.ego_pose.get_inverse().get_transformation_matrix(homogeneous=True) # not sure why need inv...........
+  ego_pose2 = ci2.ego_pose.get_inverse().get_transformation_matrix(homogeneous=True)
+
+  K = np.eye(4)
+  K[:3, :3] = ci1.K[:3, :3]
+
+  w, h = ci1.width, ci2.height
+
+  T_ego2cam = ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True)
+
+  uvd_viz1_uvd_viz2 = render_world_to_uvd_visible(
+                          viewer_pose1=ego_pose1,
+                          viewer_pose2=ego_pose2,
+                          projection='pinhole',
+                          K=K,
+                          image_size=(w, h),
+                          T_ego2viewer=T_ego2cam,
+                          world_cloud1=world_cloud1,
+                          world_cloud2=world_cloud2,
+                          clip_invisible_both=True)
+    
+  if mask_world.shape[0] > 0:
+    # Get masks in uvd space
+    mask_uvd_viz1_uvd_viz2 = render_world_to_uvd_visible(
+                                viewer_pose1=ego_pose1,
+                                viewer_pose2=ego_pose2,
+                                projection='pinhole',
+                                K=K,
+                                image_size=(w, h),
+                                T_ego2viewer=T_ego2cam,
+                                world_cloud1=mask_world)
+    
+    uvdvis1 = uvd_viz1_uvd_viz2[:, :4]
+    uvdvis2 = uvd_viz1_uvd_viz2[:, 4:]
+
+    # Apply masks: make masked pixels invisible
+    masked = get_masked_idx(
+                uvdvis1,
+                mask_uvd_viz1_uvd_viz2[:, 0:3])
+    uvdvis1[masked, -1] = 0
+
+    masked = get_masked_idx(
+                uvdvis2,
+                mask_uvd_viz1_uvd_viz2[:, 4:8])
+    uvdvis2[masked, -1] = 0
+
+    # Re-apply clip_invisible_both
+    visible_either = ((uvdvis1[:, -1] == 1) | (uvdvis2[:, -1] == 1))
+    uvd_viz1_uvd_viz2 = np.hstack([
+      uvdvis1[visible_either], uvdvis2[visible_either]
+    ])
+  
+  return uvd_viz1_uvd_viz2
+
+
+
+def compute_optical_flows(
+      world_cloud=np.zeros((0, 3)),
+      T_ego2lidar=np.eye(4),
+      T_lidar2cam=np.eye(4),
+      P=np.eye(4),
+      cam_height_pixels=0,
+      cam_width_pixels=0,
+
+      ego_pose1=np.eye(4),
+      ego_pose2=np.eye(4),
+      moving_1=np.zeros((0, 3)),
+      moving_2=np.zeros((0, 3)),
+
+      img1_factory=lambda: np.zeros((1, 1, 3)),
+      img2_factory=lambda: np.zeros((1, 1, 3)),
+      debug_title=''):
+  
+  h, w = cam_height_pixels, cam_width_pixels
+  
+  pose1 = np.linalg.inv(ego_pose1) # FIXME for Semantic KITTI ??
+  pose2 = np.linalg.inv(ego_pose2) # FIXME for Semantic KITTI ??
+  print('diff Tx_pose1->Tx_pose2', pose2[:, -1] - pose1[:, -1])
+  
+  hfused_world_cloud = make_homo(world_cloud)
+  is_moving_ignore = np.zeros((hfused_world_cloud.shape[0], 1))
+  
+
+  # Add all moving things at t1 and t2 to environment; we'll mask them
+  # TODO NO MASK!! ?? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+  hfused_world_cloud = np.vstack([
+    hfused_world_cloud,
+    make_homo(moving_1),
+    make_homo(moving_2),
+  ])
+#     is_moving_ignore = np.vstack([
+#       is_moving_ignore,
+#       np.ones((m1.shape[0], 1)),
+#       np.ones((m2.shape[0], 1)),
+#     ])
+
+  def world_to_uvdij_visible_t(pose):
+    import time
+    start = time.time()
+    
+    print('hfused_world_cloud', hfused_world_cloud.shape, 1e-9 * hfused_world_cloud.nbytes)
+    xyz_ego_t = np.matmul(pose, hfused_world_cloud.T)
+    print(time.time() - start, 'projected to xyz_ego_t')
+#     print('xyz_ego_t mean min max', np.mean(xyz_ego_t, axis=1), np.min(xyz_ego_t, axis=1), np.max(xyz_ego_t, axis=1))
+
+
+    uvd = P.dot(T_lidar2cam.dot( T_ego2lidar.dot( xyz_ego_t ) ) )
+    uvd[0:2, :] /= uvd[2, :]
+    uvd = uvd.T
+    uvd = uvd[:, :3]
+    ij = np.rint(uvd[:, (0, 1)]) # Group by rounded pixel coord; need orig (u, v) for sub-pixel flow
+    uvdij = np.hstack([uvd, ij])
+    print(time.time() - start, 'projected to uvd')
+
+    in_cam_frustum = np.where(
+        (uvdij[:, 0] >= 0) & 
+        (uvdij[:, 0] <= w - 1) &
+        (uvdij[:, 1] >= 0) & 
+        (uvdij[:, 1] <= h - 1) &
+        (uvdij[:, 2] >= 0.01))
+
+    uvdij_in_cam = uvdij[in_cam_frustum]
+#     uvdij, uvdij_in_cam = project_to_uvd(P, pose, hfused_world_cloud, T_lidar2cam, T_ego2lidar, w, h)
+    print(time.time() - start, 'projected to uvd in cam', uvdij_in_cam.shape, 1e-9 * uvdij_in_cam.nbytes)
+
+    
+#     print('render using pandas %s ...' % (uvdij_in_cam.shape,))
+#     import pandas as pd
+#     start = time.time()
+#     df = pd.DataFrame(uvdij_in_cam[:, 2:], columns=['d', 'i', 'j'])
+#     df['id'] = df.index
+#     nearest_idx = df.groupby(['i', 'j'])['d'].idxmin().to_numpy()
+#     print(time.time() - start, 'done pandas, %s winners' % (nearest_idx.shape,))
+#     print(nearest_idx[:10])
+    
+    print('render using numba %s ...' % (uvdij_in_cam.shape,))
+    start = time.time()
+    nearest_idx = get_nearest_idx(uvdij_in_cam)
+    print(time.time() - start, 'done numba, %s winners' % (nearest_idx.shape,))
+#     print(nearest_idx[:10])
+    
+    uvdij_visible = np.hstack([uvdij, np.zeros((uvdij.shape[0], 1))])
+    idx = np.arange(uvdij_visible.shape[0])[in_cam_frustum][nearest_idx]
+    uvdij_visible[idx, -1] += 1
+       # visible: in the camera frustum, AND is nearest point for the pixel.
+       # then we'll flow from that pt. TODO: try to average flows for a single pixel?
+    print(time.time() - start, 'done select visible from numba', uvdij_visible.shape, 1e-9 * uvdij_visible.nbytes)
+
+    # OK next task is to allow fused tables to have mask / ignore clouds
+    # that blot out stuff in the flow.  add that and then we can run this junk
+    # if len(all_moving_clouds_t1t2): # TODO NEED THIS FOR SEMANTIC KITTI ~~~~~~~~~~~~~~~~~
+    #   uvdij_visible[np.where(is_moving_ignore == 1)[0], -1] = 0
+    
+    return uvdij_visible
+    
+    
+  uvdij_visible1 = world_to_uvdij_visible_t(pose1)
+  uvdij_visible2 = world_to_uvdij_visible_t(pose2)
+  
+  if debug_title:
+    import imageio
+    basepath = '/opt/psegs/test_run_output/' + debug_title
+    debug = img1_factory().copy()
+    draw_xy_depth_px_in_image(debug, uvdij_visible1[uvdij_visible1[:, -1] == 1][:, :3])
+    print('project1')
+    # imshow(debug)
+    imageio.imwrite(basepath + '.img1.png' , debug)
+
+    debug = img2_factory().copy()
+    draw_xy_depth_px_in_image(debug, uvdij_visible2[uvdij_visible2[:, -1] == 1][:, :3])
+    print('project2')
+    # imshow(debug)
+    imageio.imwrite(basepath + '.img2.png' , debug)
+  
+  # old format -- need this to make flow map
+  visible_both = ((uvdij_visible1[:, -1] == 1) & (uvdij_visible2[:, -1] == 1))
+  
+  visboth_uv1 = uvdij_visible1[visible_both, :2]
+  visboth_uv2 = uvdij_visible2[visible_both, :2]
+  ij_flow = np.hstack([
+    uvdij_visible1[visible_both, 3:5], visboth_uv2 - visboth_uv1
+  ])
+  v2v_flow = np.zeros((h, w, 2))
+  xx = ij_flow[:, 0].astype(np.int)
+  yy = ij_flow[:, 1].astype(np.int)
+  v2v_flow[yy, xx] = ij_flow[:, 2:4]
+  
+  # v2o_flow = np.zeros((h, w, 2)) # ignore for now TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # o2v_flow = np.zeros((h, w, 2)) # ignore for now TODO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+  # return v2v_flow, v2o_flow, o2v_flow
+
+  # new format
+  import time
+  start = time.time()
+  visible_either = ((uvdij_visible1[:, -1] == 1) | (uvdij_visible2[:, -1] == 1))
+  uvdij1_visible_uvdij2_visible = np.hstack([
+    uvdij_visible1[visible_either], uvdij_visible2[visible_either]
+  ])
+  print('uvdij1_visible_uvdij2_visible', time.time() - start, uvdij1_visible_uvdij2_visible.shape, 1e-9 * uvdij1_visible_uvdij2_visible.nbytes)
+
+  return uvdij1_visible_uvdij2_visible, v2v_flow
+
+
+
+  
+  
+  
+  # build array, each row is a 3d point at times t1 and t2
+  # xyz t1 | xyz t2 | (is_moving - are xyz diff?) | img1 uvd | img2 uvd | is visible img1 | is visible img2
+  # ** for moving stuff, careful to append correct xyz ...
+  # ** for semantic kitti, need to make moving stuff verboten at both timesteps ...
+  
+  # then compute:
+  # vis change | uvd img1 | uvd img2
+  
+  # the break that above into 3 flows
+  
+  # create fused unproj for img1
+  # place moving objs for img1
+  
+  # create fused unproj for img2
+  # place moving objs for img2
+
+
+# def compute_optical_flows(
+#     t1_i=0, img1=None, 
+#     t2_i=1, img2=None,
+#     pose1=None,
+#     pose2=None,
+#     fused_world_cloud=None, # pre-filter these to remove moving stuff !!
+#     all_moving_objs=[],
+#     all_moving_clouds_t1t2=[],
+#     P=None,
+#     T_lidar2cam=None,
+#     T_ego2lidar=None):
+  
+#   h, w, c = img1.shape
+  
+#   pose1 = np.linalg.inv(pose1) # FIXME for Semantic KITTI
+#   pose2 = np.linalg.inv(pose2) # FIXME for Semantic KITTI
+#   print('diff Tx_pose1->Tx_pose2', pose2[:, -1] - pose1[:, -1])
+  
+#   hfused_world_cloud = make_homo(fused_world_cloud)
+#   is_moving_ignore = np.zeros((hfused_world_cloud.shape[0], 1))
+  
+#   if len(all_moving_clouds_t1t2):
+#     # Add all moving things at t1 and t2 to environment; we'll mask them
+#     m1 = all_moving_clouds_t1t2[0]
+#     m2 = all_moving_clouds_t1t2[1]
+#     hfused_world_cloud = np.vstack([
+#       hfused_world_cloud,
+#       make_homo(m1),
+#       make_homo(m2),
+#     ])
+# #     is_moving_ignore = np.vstack([
+# #       is_moving_ignore,
+# #       np.ones((m1.shape[0], 1)),
+# #       np.ones((m2.shape[0], 1)),
+# #     ])
+
+#   def world_to_uvdij_visible_t(pose):
+#     import time
+#     start = time.time()
+    
+#     xyz_ego_t = np.matmul(pose, hfused_world_cloud.T)
+#     print(time.time() - start, 'projected to xyz_ego_t')
+# #     print('xyz_ego_t mean min max', np.mean(xyz_ego_t, axis=1), np.min(xyz_ego_t, axis=1), np.max(xyz_ego_t, axis=1))
+
+
+#     uvd = P.dot(T_lidar2cam.dot( T_ego2lidar.dot( xyz_ego_t ) ) )
+#     uvd[0:2, :] /= uvd[2, :]
+#     uvd = uvd.T
+#     uvd = uvd[:, :3]
+#     ij = np.rint(uvd[:, (0, 1)]) # Group by rounded pixel coord; need orig (u, v) for sub-pixel flow
+#     uvdij = np.hstack([uvd, ij])
+#     print(time.time() - start, 'projected to uvd')
+
+#     in_cam_frustum = np.where(
+#         (uvdij[:, 0] >= 0) & 
+#         (uvdij[:, 0] <= w - 1) &
+#         (uvdij[:, 1] >= 0) & 
+#         (uvdij[:, 1] <= h - 1) &
+#         (uvdij[:, 2] >= 0.01))
+
+#     uvdij_in_cam = uvdij[in_cam_frustum]
+# #     uvdij, uvdij_in_cam = project_to_uvd(P, pose, hfused_world_cloud, T_lidar2cam, T_ego2lidar, w, h)
+#     print(time.time() - start, 'projected to uvd in cam')
+
+    
+# #     print('render using pandas %s ...' % (uvdij_in_cam.shape,))
+# #     import pandas as pd
+# #     start = time.time()
+# #     df = pd.DataFrame(uvdij_in_cam[:, 2:], columns=['d', 'i', 'j'])
+# #     df['id'] = df.index
+# #     nearest_idx = df.groupby(['i', 'j'])['d'].idxmin().to_numpy()
+# #     print(time.time() - start, 'done pandas, %s winners' % (nearest_idx.shape,))
+# #     print(nearest_idx[:10])
+    
+#     print('render using numba %s ...' % (uvdij_in_cam.shape,))
+#     start = time.time()
+#     nearest_idx = get_nearest_idx(uvdij_in_cam)
+#     print(time.time() - start, 'done numba, %s winners' % (nearest_idx.shape,))
+# #     print(nearest_idx[:10])
+    
+#     uvdij_visible = np.hstack([uvdij, np.zeros((uvdij.shape[0], 1))])
+#     idx = np.arange(uvdij_visible.shape[0])[in_cam_frustum][nearest_idx]
+#     uvdij_visible[idx, -1] += 1
+#        # visible: in the camera frustum, AND is nearest point for the pixel.
+#        # then we'll flow from that pt. TODO: try to average flows for a single pixel?
+
+#     if len(all_moving_clouds_t1t2):
+#       uvdij_visible[np.where(is_moving_ignore == 1)[0], -1] = 0
+    
+#     return uvdij_visible
+    
+    
+#   uvdij_visible1 = world_to_uvdij_visible_t(pose1)
+#   uvdij_visible2 = world_to_uvdij_visible_t(pose2)
+  
+#   if True:
+#     debug = img1.copy()
+#     draw_xy_depth_px_in_image(debug, uvdij_visible1[uvdij_visible1[:, -1] == 1][:, :3])
+#     print('project1')
+#     imshow(debug)
+    
+#     debug = img2.copy()
+#     draw_xy_depth_px_in_image(debug, uvdij_visible2[uvdij_visible2[:, -1] == 1][:, :3])
+#     print('project2')
+#     imshow(debug)
+  
+#   visible_both = ((uvdij_visible1[:, -1] == 1) & (uvdij_visible2[:, -1] == 1))
+  
+#   visboth_uv1 = uvdij_visible1[visible_both, :2]
+#   visboth_uv2 = uvdij_visible2[visible_both, :2]
+#   ij_flow = np.hstack([
+#     uvdij_visible1[visible_both, 3:5], visboth_uv2 - visboth_uv1
+#   ])
+#   v2v_flow = np.zeros((h, w, 2))
+#   xx = ij_flow[:, 0].astype(np.int)
+#   yy = ij_flow[:, 1].astype(np.int)
+#   v2v_flow[yy, xx] = ij_flow[:, 2:4]
+  
+#   v2o_flow = np.zeros((h, w, 2)) # ignore for now
+#   o2v_flow = np.zeros((h, w, 2)) # ignore for now
+#   return v2v_flow, v2o_flow, o2v_flow
+  
+#   # build array, each row is a 3d point at times t1 and t2
+#   # xyz t1 | xyz t2 | (is_moving - are xyz diff?) | img1 uvd | img2 uvd | is visible img1 | is visible img2
+#   # ** for moving stuff, careful to append correct xyz ...
+#   # ** for semantic kitti, need to make moving stuff verboten at both timesteps ...
+  
+#   # then compute:
+#   # vis change | uvd img1 | uvd img2
+  
+#   # the break that above into 3 flows
+  
+#   # create fused unproj for img1
+#   # place moving objs for img1
+  
+#   # create fused unproj for img2
+#   # place moving objs for img2
+    
+    
+
+## END FROM PAPER SCRATCH
+###############################################################################
+
+
+class RenderOFlowTasksWorker(object):
+  
+  def __init__(self, T_ego2lidar, fused_datum_sample, render_func):
+    import threading
+    self._shared = threading.Lock()
+    self._track_id_to_fused_cloud = None
+    self._world_cloud = None
+    self.T_ego2lidar = T_ego2lidar
+    self.fused_datum_sample = fused_datum_sample
+    self.render_func = render_func
+  
+  def __getstate__(self):
+    d = dict(self.__dict__)
+    d.pop('_shared')
+    d.pop('_track_id_to_fused_cloud')
+    d.pop('_world_cloud')
+    return d
+
+  def __setstate__(self, d):
+    for k, v in d.items():
+      setattr(self, k, v)
+    import threading
+    self._shared = threading.Lock()
+    self._track_id_to_fused_cloud = None
+    self._world_cloud = None
+
+  def get_track_id_to_fused_cloud(self):
+    with self._shared:
+      from oarphpy.spark import RowAdapter
+      FROM_ROW = RowAdapter.from_row
+      if self._track_id_to_fused_cloud is None:
+        print('track_id_to_fused_cloud loading')
+        track_id_to_fused_cloud = {}
+        for pc in self.fused_datum_sample.lidar_clouds:
+          if 'lidar|objects_fused' in pc.sensor_name:
+            cucloud = FROM_ROW(pc)
+            track_id = cucloud.extra['track_id']
+            track_id_to_fused_cloud[track_id] = cucloud.get_cloud()
+            print(track_id, track_id_to_fused_cloud[track_id].shape)
+        print('track_id_to_fused_cloud', len(track_id_to_fused_cloud))
+        self._track_id_to_fused_cloud = track_id_to_fused_cloud
+      return self._track_id_to_fused_cloud
+
+  def get_world_cloud(self):
+    with self._shared:
+      if self._world_cloud is None:
+        print('get_world_cloud loading')
+        from oarphpy.spark import RowAdapter
+        FROM_ROW = RowAdapter.from_row
+        world_cloud = None
+        for pc in self.fused_datum_sample.lidar_clouds:
+          if 'lidar|world_fused' in pc.sensor_name:
+            pc = FROM_ROW(pc)
+            print('loading cloud', pc.sensor_name)
+            world_cloud = pc.get_cloud()
+            print('loaded')
+            break
+        assert world_cloud is not None, fused_datum_sample.get_topics()
+        print('cfcloud', world_cloud.shape)
+        self._world_cloud = world_cloud
+      return self._world_cloud
+
+  def __call__(self, trow):
+    T_ego2lidar = self.T_ego2lidar
+    track_id_to_fused_cloud = self.get_track_id_to_fused_cloud()
+    world_cloud = self.get_world_cloud()
+
+    from oarphpy.spark import RowAdapter
+    FROM_ROW = RowAdapter.from_row
+
+    def union_all(it):
+      import itertools
+      return list(itertools.chain.from_iterable(it))
+      
+    cuboids1 = union_all(FROM_ROW(sd.cuboids) for sd in trow.cuboids_sds_t1)
+    cuboids2 = union_all(FROM_ROW(sd.cuboids) for sd in trow.cuboids_sds_t2)
+    ci1_sds = [FROM_ROW(sd) for sd in trow.ci_sds_t1]
+    ci2_sds = [FROM_ROW(sd) for sd in trow.ci_sds_t2]
+    cname_to_cisd1 = dict((sd.camera_image.sensor_name, sd) for sd in ci1_sds)
+    cname_to_cisd2 = dict((sd.camera_image.sensor_name, sd) for sd in ci2_sds)
+    all_cams = sorted(set(cname_to_cisd1.keys()) & set(cname_to_cisd2.keys()))
+    
+    rows_out = []
+    for sensor_name in all_cams:
+      import time
+      start = time.time()
+      
+      ci_sd1 = cname_to_cisd1[sensor_name]
+      ci_sd2 = cname_to_cisd2[sensor_name]
+      ci1 = ci_sd1.camera_image
+      ci2 = ci_sd2.camera_image
+      print('starting cam', sensor_name, str(ci_sd1.uri))
+      
+      cam_height_pixels = ci1.height
+      cam_width_pixels = ci1.width
+      assert (ci1.width, ci1.height) == (ci2.width, ci2.height)
+
+      # Pose all objects for t1 and t2
+      moving_1 = np.zeros((0, 3))
+      for cuboid in cuboids1:
+        cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+        cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+        cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+        moving_1 = np.vstack([moving_1, cloud_world])
+      print('moving_1', moving_1.shape)
+      
+      moving_2 = np.zeros((0, 3))
+      for cuboid in cuboids2:
+        cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+        cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+        cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+        moving_2 = np.vstack([moving_2, cloud_world])
+      print('moving_2', moving_2.shape)
+      
+  
+      movement = ci1.ego_pose.translation - ci2.ego_pose.translation
+      print('movement', movement)
+      if np.linalg.norm(movement) < 0.01:
+          print('less than 1cm movement...')
+          continue
+  
+      # T_ego2cam = ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True)
+      # T_lidar2cam = T_ego2cam @ np.linalg.inv(T_ego2lidar)
+  
+      P = np.eye(4)
+      P[:3, :3] = ci1.K[:3, :3]
+  
+      pose1 = ci1.ego_pose.get_transformation_matrix(homogeneous=True)
+      pose2 = ci2.ego_pose.get_transformation_matrix(homogeneous=True)
+      uvdij1_visible_uvdij2_visible, v2v_flow = self.render_func(
+                  world_cloud=world_cloud,
+                  T_ego2lidar=np.eye(4), # T_ego2lidar nope this is np.eye(4) for kitti and nusc
+          
+                  # KITTI-360 and nusc too wat i guess ego is lidar?
+                  T_lidar2cam=ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True),
+
+                  P=P,
+                  cam_height_pixels=cam_height_pixels,
+                  cam_width_pixels=cam_width_pixels,
+
+                  ego_pose1=pose1,
+                  ego_pose2=pose2,
+                  moving_1=moving_1,
+                  moving_2=moving_2,
+
+
+                  img1_factory=lambda: ci1.image,
+                  img2_factory=lambda: ci2.image,
+                  debug_title=trow.oflow_task_id)
+      
+      print('render_func in', time.time() - start)
+
+      row_out = {
+        'ci1_uri': ci_sd1.uri,
+        'ci2_uri': ci_sd1.uri,
+        'uvdij1_visible_uvdij2_visible': uvdij1_visible_uvdij2_visible,
+        'v2v_flow': v2v_flow,
+      }
+
+      rows_out.append(row_out)
+    return rows_out
+  
+  FLOCK_PATH = '/tmp/psegs_RenderOFlowTasksWorker.lock'
+  def single_machine_map_rows(self, trows):
+    trows = list(trows)
+    print('single_machine_map_rows working on', len(trows))
+    if not trows:
+      return []
+    assert os.path.exists(self.FLOCK_PATH)
+    import fasteners
+    lock = fasteners.InterProcessLock(self.FLOCK_PATH)
+    with lock:
+      print(os.getpid(), 'starting with flock', self.FLOCK_PATH)
+      
+      # Hack: each worker thread needs temp space proportional to
+      # world cloud size, so choose number of threads to not spill
+      # into swap too badly.
+      world_cloud = self.get_world_cloud()
+      world_cloud_bytes = world_cloud.nbytes
+      print('world_cloud_bytes', world_cloud_bytes)
+      import psutil
+      total_mem_bytes = psutil.virtual_memory().total # is exclusive of swap
+      print('total_mem', total_mem_bytes)
+      num_thread = max(1, int(total_mem_bytes / (10. * world_cloud_bytes)))
+      print('num_thread', num_thread)
+      
+      # import multiprocessing
+      # num_thread = 4 # semantic kitti
+      # num_thread = 1 # kitti-360
+      # num_thread = multiprocessing.cpu_count() # nusc keyframes only
+
+      import concurrent.futures
+      with concurrent.futures.ThreadPoolExecutor(max_workers=num_thread) as executor:
+        results = list(executor.map(self, trows))
+
+      # p = ThreadPool(num_thread)
+      # print('num_thread', num_thread)
+      # ress = p.map(self.__call__, trows)
+
+      # import itertools
+      # results = list(itertools.chain.from_iterable(ress))
+
+      print('single_machine_map_rows done with', len(results))
+      return results
+
+class OpticalFlowRenderBase(object):
+
+  FUSED_LIDAR_SD_TABLE = None
+
+  RENDERER_ALGO_NAME = 'naive_shortest_ray'
+
+  MAX_TASKS_SEED = 1337
+  MAX_TASKS_PER_SEGMENT = -1
+  TASK_OFFSETS = (1,)# 5)
+
+  render_func = compute_optical_flows # TODO for python notebook drafting .........................
+
+  @classmethod
+  def TASK_DF_FACTORY(cls):
+    return cls.FUSED_LIDAR_SD_TABLE.TASK_DF_FACTORY
+
+  @classmethod
+  def SRC_SD_T(cls):
+    return cls.FUSED_LIDAR_SD_TABLE.SRC_SD_T()
+
+  @classmethod
+  def _get_T_ego2lidar(cls, task_df):
+    from pyspark.sql import functions as F
+    LIDAR_TOPIC = 'lidar'
+
+    # Hacky! we just pick the first point cloud ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    pc_df = task_df.select(F.explode(task_df.pc_sds).alias('pc_sd'))
+    row = pc_df.first()['pc_sd']
+    sd = cls.SRC_SD_T().from_row(row)
+    pc = sd.point_cloud
+    T_ego2lidar = pc.ego_to_sensor.get_transformation_matrix(homogeneous=True)
+    return T_ego2lidar
+
+  @classmethod
+  def _get_oflow_task_df(cls, spark, task_df):
+    # Optionally limit by number of tasks.
+    # We do this by filtering on task_id because it's much cheaper than e.g.
+    # trying to sort the table below by rand() and then doing a LIMIT.
+    task_id_filter_clause = ''
+    if cls.MAX_TASKS_PER_SEGMENT > 0:
+      print('restrict to', cls.MAX_TASKS_PER_SEGMENT)
+      task_ids = [r.task_id for r in task_df.select('task_id').collect()]
+      from random import Random
+      r = Random(cls.MAX_TASKS_SEED)
+      r.shuffle(task_ids)
+      task_ids = task_ids[:cls.MAX_TASKS_PER_SEGMENT]
+      tid_str = ", ".join(str(tid) for tid in task_ids)
+      task_id_filter_clause = "AND cuci_1.task_id in ( %s )" % tid_str
+
+    # Compute tasks pairs for flow
+    task_id_join_clauses = [
+      "( cuci_1.task_id = (cuci_2.task_id + %s) )" % offset
+      for offset in cls.TASK_OFFSETS
+    ]
+    task_id_join_clause = " OR ".join(task_id_join_clauses)
+
+    # Build the flow pair task table
+    spark.catalog.dropTempView('oflow_culici_tasks_df')
+    task_df.registerTempTable('oflow_culici_tasks_df')
+    oflow_task_df = spark.sql(
+      """
+        SELECT
+          CONCAT(cuci_1.task_id, '->', cuci_2.task_id)
+            AS oflow_task_id,
+          
+          cuci_1.task_id AS task_id_1,
+          cuci_1.cuboids_sds AS cuboids_sds_t1,
+          cuci_1.ci_sds AS ci_sds_t1,
+
+          cuci_2.task_id AS task_id_2,
+          cuci_2.cuboids_sds AS cuboids_sds_t2,
+          cuci_2.ci_sds AS ci_sds_t2
+        
+        FROM
+          oflow_culici_tasks_df AS cuci_1, oflow_culici_tasks_df AS cuci_2
+        
+        WHERE
+          SIZE(cuci_1.ci_sds) > 0 AND
+          SIZE(cuci_2.ci_sds) > 0 AND
+          ( {task_id_join_clause} ) {task_id_filter_clause}
+      """.format(
+            task_id_join_clause=task_id_join_clause,
+            task_id_filter_clause=task_id_filter_clause))
+
+    return oflow_task_df
+
+  
+
+
+  # @classmethod
+  # def _render_oflow_tasks(cls, T_ego2lidar, fused_datum_sample, itask_rows):
+  #   from oarphpy.spark import RowAdapter
+  #   FROM_ROW = RowAdapter.from_row
+    
+  #   track_id_to_fused_cloud = {}
+  #   for pc in fused_datum_sample.lidar_clouds:
+  #     if 'lidar|objects_fused' in pc.sensor_name:
+  #       cucloud = FROM_ROW(pc)
+  #       track_id = cucloud.extra['track_id']
+  #       track_id_to_fused_cloud[track_id] = cucloud.get_cloud()
+  #       print(track_id, track_id_to_fused_cloud[track_id].shape)
+  #   print('track_id_to_fused_cloud', len(track_id_to_fused_cloud))
+
+  #   world_cloud = None
+  #   for pc in fused_datum_sample.lidar_clouds:
+  #     if 'lidar|world_fused' in pc.sensor_name:
+  #       pc = FROM_ROW(pc)
+  #       print('loading cloud', pc.sensor_name)
+  #       world_cloud = pc.get_cloud()
+  #       print('loaded')
+  #       break
+  #   assert world_cloud is not None, fused_datum_sample.get_topics()
+  #   print('cfcloud', world_cloud.shape)
+    
+  #   def union_all(it):
+  #     import itertools
+  #     return list(itertools.chain.from_iterable(it))
+  #   for trow in itask_rows:
+  #     cuboids1 = union_all(FROM_ROW(sd.cuboids) for sd in trow.cuboids_sds_t1)
+  #     cuboids2 = union_all(FROM_ROW(sd.cuboids) for sd in trow.cuboids_sds_t2)
+  #     ci1s = [FROM_ROW(sd.camera_image) for sd in trow.ci_sds_t1]
+  #     ci2s = [FROM_ROW(sd.camera_image) for sd in trow.ci_sds_t2]
+  #     cname_to_ci1 = dict((c.sensor_name, c) for c in ci1s)
+  #     cname_to_ci2 = dict((c.sensor_name, c) for c in ci2s)
+  #     all_cams = sorted(set(cname_to_ci1.keys()) & set(cname_to_ci2.keys()))
+  #     for sensor_name in all_cams:
+  #       print(sensor_name)
+  #       import time
+  #       start = time.time()
+        
+  #       ci1 = cname_to_ci1[sensor_name]
+  #       ci2 = cname_to_ci2[sensor_name]
+        
+  #       cam_height_pixels = ci1.height
+  #       cam_width_pixels = ci1.width
+  #       assert (ci1.width, ci1.height) == (ci2.width, ci2.height)
+
+  #       # Pose all objects for t1 and t2
+  #       moving_1 = np.zeros((0, 3))
+  #       for cuboid in cuboids1:
+  #         cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+  #         cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+  #         cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+  #         moving_1 = np.vstack([moving_1, cloud_world])
+  #       print('moving_1', moving_1.shape)
+        
+  #       moving_2 = np.zeros((0, 3))
+  #       for cuboid in cuboids2:
+  #         cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+  #         cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+  #         cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+  #         moving_2 = np.vstack([moving_2, cloud_world])
+  #       print('moving_2', moving_2.shape)
+        
+    
+  #       movement = ci1.ego_pose.translation - ci2.ego_pose.translation
+  #       print('movement', movement)
+  #       if np.linalg.norm(movement) < 0.01:
+  #           print('less than 1cm movement...')
+  #           continue
+    
+  #       # T_ego2cam = ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True)
+  #       # T_lidar2cam = T_ego2cam @ np.linalg.inv(T_ego2lidar)
+    
+  #       P = np.eye(4)
+  #       P[:3, :3] = ci1.K[:3, :3]
+    
+  #       pose1 = ci1.ego_pose.get_transformation_matrix(homogeneous=True)
+  #       pose2 = ci2.ego_pose.get_transformation_matrix(homogeneous=True)
+  #       result = cls.render_func(
+  #                   world_cloud=world_cloud,
+  #                   T_ego2lidar=np.eye(4), # T_ego2lidar nope this is np.eye(4) for kitti and nusc
+            
+  #                   # KITTI-360 and nusc too wat i guess ego is lidar?
+  #                   T_lidar2cam=ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True),
+
+  #                   P=P,
+  #                   cam_height_pixels=cam_height_pixels,
+  #                   cam_width_pixels=cam_width_pixels,
+
+  #                   ego_pose1=pose1,
+  #                   ego_pose2=pose2,
+  #                   moving_1=moving_1,
+  #                   moving_2=moving_2,
+
+
+  #                   img1_factory=lambda: ci1.get_image(),
+  #                   img2_factory=lambda: ci2.get_image(),
+  #                   debug_title=trow.oflow_task_id)
+        
+  #       print('did in', time.time() - start)
+
+  #       yield result
+        
+        
+  #       # import pickle
+  #       # #path = "/opt/psegs/temp_out_fused/pair_%s_%s_%s.pkl" % (cam, t1, t2)
+  #       # path = "/outer_root/media/seagates-ext4/au_datas/temp_out_fused/pair_%s_%s_%s.pkl" % (cam, t1, t2)
+  #       # pickle.dump(data, open(path, 'wb'))
+  #       # print(path)
+
+  #   print()
+
+  @classmethod
+  def build(cls, spark=None, only_segments=None):
+    with Spark.sess(spark) as spark:
+      segment_uris = only_segments or cls.SRC_SD_T().get_all_segment_uris()
+      
+      for suri in segment_uris:
+        task_df = cls.TASK_DF_FACTORY().build_df_for_segment(spark, suri)
+        print('num tasks', task_df.count())
+        fused_datum_sample = cls.FUSED_LIDAR_SD_TABLE.get_sample(
+                                    suri, spark=spark)
+
+        T_ego2lidar = cls._get_T_ego2lidar(task_df)
+
+        oflow_task_df = cls._get_oflow_task_df(spark, task_df)
+        print('oflow_task_df', oflow_task_df.count())
+        worker = RenderOFlowTasksWorker(
+          T_ego2lidar, fused_datum_sample, cls.render_func)
+
+        # Hacky way to coalesce into CPU-intensive partitions
+        from oarphpy.spark import num_executors
+        n_tasks = oflow_task_df.count()
+        n_parts = int(max(1, n_tasks / (10 * num_executors(spark))))
+        print('coalesc to ', n_parts)
+        oflow_task_df = oflow_task_df.coalesce(n_parts)
+        result_rdd = oflow_task_df.rdd.mapPartitions(lambda irows: worker.single_machine_map_rows(irows))#(worker, preservesPartitioning=True)
+                # lambda irows: cls._render_oflow_tasks(
+                #       T_ego2lidar,
+                #       fused_datum_sample,
+                #       irows))
+        OUT_PATH = '/tmp/oflow_out/'
+        oputil.mkdir(OUT_PATH)
+        import pickle
+        t = oputil.ThruputObserver(name='BuildOFlow', n_total=n_tasks)
+        t.start_block()
+        for i, results in enumerate(result_rdd.toLocalIterator(prefetchPartitions=False)):
+          for j, row in enumerate(results):
+            path = os.path.join(OUT_PATH, 'oflow_%s_%s.pkl' % (i, j))
+            with open(path, 'wb') as f:
+              pickle.dump(row, f, protocol=pickle.HIGHEST_PROTOCOL)
+            print('saved to', path)
+
+          t.update_tallies(n=1, num_bytes=oputil.get_size_of_deep(results), new_block=True)
+          t.maybe_log_progress(every_n=1)
+
+
+
+
+class WorldCloudCleaner(object):
+  
+  # def __init__(self, ego_box=None):
+  #   self._ego_box = ego_box
+  #   self.__thruput = oputil.ThruputObserver(name='WorldCloudCleaner.clouds_thru')
+  #   self.__pruned_stats = []
+
+  # @classmethod
+  # def _thruput(self):
+  #   if not hasattr(self, '_thurput_impl'):
+  #     self._thurput_impl = oputil.ThruputObserver(
+  #       name='WorldCloudCleaner.clouds_thru', log_on_del=True)
+  #   return self._thurput_impl
+
+  # @classmethod
+  # def _pruned_stats(cls):
+
+
+  # def __log_pruned(self):
+  #   print('len(self.__pruned_stats)', len(self.__pruned_stats))
+  #   if (len(self.__pruned_stats) % 10) == 0:
+  #     pruned_stats = np.array(self.__pruned_stats)
+  #     REPORT = """
+  #       Total points pruned: {total_pruned}
+  #       Total frac pruned: {total_frac_pruned}
+  #       Avg pruned per cloud: {avg_per_cloud}
+  #     """.format(
+  #       total_pruned=np.sum(pruned_stats[:, 1]),
+  #       total_frac_pruned=(
+  #         np.sum(pruned_stats[:, 1]) / np.sum(pruned_stats[:, 0])),
+  #       avg_per_cloud=np.mean(pruned_stats[:, 1]))
+  #     util.log.info(REPORT)
+
+  @classmethod
+  def _filter_ego_vehicle(cls, cloud_ego):
+    """Optionally filter self-returns in cloud in the ego frame for some
+    datasets (e.g. NuScenes)"""
+    return cloud_ego
+
+  def get_cleaned_world_cloud(self, point_clouds, cuboids):
+    assert point_clouds
+
+    cleaned_clouds = []
+    n_pruned = 0
+    for pc in point_clouds:
+      # self._thruput().start_block()
+      cloud = pc.get_cloud()[:, :3] # TODO: can we keep colors?
+      cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud).T
+    
+      cloud_ego = self._filter_ego_vehicle(cloud_ego)
+
+      # Filter out all cuboids
+      n_before = cloud_ego.shape[0]
+      for cuboid in cuboids:
+        in_box, _ = get_point_idx_in_cuboid(cuboid, cloud_ego=cloud_ego)
+        cloud_ego = cloud_ego[~in_box]
+      n_after = cloud_ego.shape[0]
+      n_pruned += (n_before - n_after)
+
+      T_world_to_ego = pc.ego_pose
+      cloud_world = T_world_to_ego.apply(cloud_ego).T # why is this name backwards?? -- hmm works for nusc too
+
+      cleaned_clouds.append(cloud_world)
+      
+      # self._thruput().stop_block(
+      #         n=1, num_bytes=oputil.get_size_of_deep(cloud_world))
+      # self._thruput().maybe_log_progress(every_n=1)
+      # self.__log_pruned()
+    return np.vstack(cleaned_clouds), n_pruned
+
+
+class OpticalFlowRenderer(object):
+  
+  def world_cloud_to_uvd_visible(self, point_cloud, camera_image):
+    pass
+
+  def merge_uvd_visible(self, uvd_visible_1, uvd_visible_2):
+    pass
+
+
+class MyT(oputil.ThruputObserver):
+  def __gt__(self, v):
+    if isinstance(v, (MyT, oputil.ThruputObserver)):
+      return self.name > v.name
+    else:
+      return self.n > v
+    
+  def __lt__(self, v):
+    if isinstance(v, (MyT, oputil.ThruputObserver)):
+      return self.name < v.name
+    else:
+      return self.n < v
+  
+  @classmethod
+  def union(cls, thruputs):
+    u = cls()
+    for t in thruputs:
+      u += t
+    return u
+
+  def __add__(self, other):
+    if isinstance(other, (MyT, oputil.ThruputObserver)):
+      return self.union((self, other))
+    else:
+      return self
+  
+  def __repr__(self):
+    return str(self)
+
+class FusedFlowDFFactory(object):
+
+  """
+
+  A) Create flow pairs
+  B) Fuse data for the pair
+  C) Render the pair for samples 1 and 2
+  D) save and/or debug the output
+
+  Deets:
+  AA) allow (and study!) large-displacement pairs
+  
+  BB) start and debug with just ONE CLOUD
+  BB) allow sampling to just 50% or N% of clouds
+  BB) allow either fused objects or single frame objects ...
+  BB) allow smoothing, KITTI fused data instead, KITTI fusing method etc
+  BB) some day graph laplacian fusing ...
+
+  CC) rendering is:
+       * 000 clean world clouds and put in mem+disk cached
+       * get a chunk of pairs
+       * use a SELECT to create a DF of 
+          chunk ID | [point cloud] | sample 1 pose / cam | sample 2 pose / cam 
+              the sample columns are static data same for *chunks* !!
+           Spark will (probably)? partition data so that point clouds don't
+           need to move
+       * RDD map reduce (maybe DF pandas map?)
+           -> map point cloud -> uvd (or xyz)
+           -> *combine* and reduce -> merge_uvd_nearest 
+                (for SF need polar to do UV)
+  DD) render and/or save to parquet
+
+  """
+
+  SAMPLE_DF_FACTORY = None
+
+  FUSED_LIDAR_SD_TABLE = None
+
+  WORLD_CLEANER_CLS = WorldCloudCleaner
+
+  # Some datasets are not amenable to fused object clouds; use this member
+  # to opt those datasets out of object clouds.
+  HAS_OBJ_CLOUDS = True
+
+  @classmethod
+  def SRC_SD_T(cls):
+    return cls.SAMPLE_DF_FACTORY.SRC_SD_TABLE
+
+  @classmethod
+  def _build_world_cloud_df(cls, spark, sample_df):
+    
+    from pyspark.sql import Row
+    from pyspark.sql import functions as F
+    from oarphpy.spark import RowAdapter
+    
+    cleaner = cls.WORLD_CLEANER_CLS()
+
+    from pyspark.accumulators import AccumulatorParam
+    # class ThruputObserverAccParam(AccumulatorParam):
+    #   def zero(self, t):
+    #     return t
+    #   def addInPlace(self, val1, val2):
+    #     val1 += val2
+    #     return val1
+    
+    from collections import Counter
+    class CounterAccumulator(AccumulatorParam):
+      def zero(self, value):
+        return Counter({})
+
+      def addInPlace(self, value1, value2):
+        return value1 + value2
+
+    
+
+    # t = oputil.ThruputObserver(name='world_clouds')
+
+    sc = spark.sparkContext
+    C_acc = sc.accumulator(Counter(), CounterAccumulator())
+
+    RETURN_PROTO = {'cloud': np.zeros((0, 3)), 'n_pruned': 0}
+    # @F.udf(returnType=RowAdapter.to_schema(RETURN_PROTO))
+    class RowToWorldCloud(object):
+      def __init__(self, cleaner, C_acc):
+        self.cleaner = cleaner
+        self.C_acc = C_acc
+
+      def __call__(self, row):#pc_sds, cuboids_sds):
+        # def row_to_world_cloud(pc_sds, cuboids_sds):
+        # def row_to_world_cloud(row):
+          
+        import itertools
+        from oarphpy.spark import RowAdapter
+        from pyspark import Row
+        from threadpoolctl import threadpool_limits
+        with threadpool_limits(limits=1, user_api='blas'):
+          from collections import Counter
+          counter = Counter()
+          t = MyT(name='world_clouds')
+          # for row in iter_rows:
+          t.start_block()
+
+          pc_sds = row.pc_sds
+          cuboids_sds = row.cuboids_sds
+
+          pcs = [RowAdapter.from_row(sdr).point_cloud for sdr in pc_sds]
+          cuboidss = [RowAdapter.from_row(sdr).cuboids for sdr in cuboids_sds]
+          cuboids = list(itertools.chain.from_iterable(cuboidss))
+
+          world_cloud, n_pruned = self.cleaner.get_cleaned_world_cloud(pcs, cuboids)
+
+          counter['n_pruned'] += n_pruned
+          counter['n_wc_pts'] += world_cloud.shape[0]
+          counter['pcs'] += len(pcs) # TODO why is this seem to report double the world clouds found ??
+          counter['cuboids'] += len(cuboids)
+
+          data = {
+            'sample_id': row.sample_id,
+            'world_cloud': world_cloud,
+          }
+          rowdata = RowAdapter.to_row(data)
+          t.stop_block(n=len(pcs), num_bytes=world_cloud.nbytes) # TODO is this the thruput we want?
+            # yield Row(**rowdata)
+          counter['t_world_clouds'] = t
+          self.C_acc += counter
+          return Row(**rowdata)
+
+    licu_df = sample_df.select('sample_id', 'pc_sds', 'cuboids_sds')
+    licu_df = licu_df.filter("SIZE(pc_sds) > 0")
+    assert licu_df.count() > 0
+    # licu_df = licu_df.repartition(
+    #             10 * licu_df.rdd.getNumPartitions(), 'sample_id')
+
+    # from pyspark import Row
+    # SCHEMA_PROTO = Row(**{
+    #         'sample_id': row.sample_id,
+    #         'world_cloud': world_cloud,
+    #       }
+      
+      
+    #   # sample_id=0, world_cloud=np.zeros((0, 3)))
+    f = RowToWorldCloud(cleaner, C_acc)
+    # world_cloud_df = spark.createDataFrame(
+    #                     licu_df.rdd.map(f),
+    #                     samplingRatio=0.25)
+    #                     # schema=RowAdapter.to_schema(SCHEMA_PROTO))
+    world_cloud_rdd = licu_df.rdd.map(f)
+
+    def spin_log():
+      import time
+      while True:
+        print('spinns')
+        import pprint
+        util.log.info(pprint.pformat(C_acc.value))
+        time.sleep(30)
+    import threading
+    bkg_th = threading.Thread(target=spin_log, args=())
+    bkg_th.daemon = True
+    bkg_th.start()
+    
+    return world_cloud_rdd#world_cloud_df
+    
+
+    
+    # func = F.udf(f, returnType=RowAdapter.to_schema(RETURN_PROTO))
+
+    # licu_df = licu_df.withColumn(
+    #             'world_cloud',
+    #             func(licu_df['pc_sds'], licu_df['cuboids_sds']))
+    #             # row_to_world_cloud(licu_df['pc_sds'], licu_df['cuboids_sds']))
+    # world_cloud_df = licu_df.select('sample_id', 'world_cloud')
+    # return world_cloud_df
+
+
+
+  # @classmethod
+  # def _get_flow_task_df(cls, spark, sample_df, flow_pairs):
+
+    
+
+
+  #   # Optionally limit by number of tasks.
+  #   # We do this by filtering on task_id because it's much cheaper than e.g.
+  #   # trying to sort the table below by rand() and then doing a LIMIT.
+  #   task_id_filter_clause = ''
+  #   if cls.MAX_TASKS_PER_SEGMENT > 0:
+  #     print('restrict to', cls.MAX_TASKS_PER_SEGMENT)
+  #     task_ids = [r.task_id for r in task_df.select('task_id').collect()]
+  #     from random import Random
+  #     r = Random(cls.MAX_TASKS_SEED)
+  #     r.shuffle(task_ids)
+  #     task_ids = task_ids[:cls.MAX_TASKS_PER_SEGMENT]
+  #     tid_str = ", ".join(str(tid) for tid in task_ids)
+  #     task_id_filter_clause = "AND cuci_1.task_id in ( %s )" % tid_str
+
+  #   # Compute tasks pairs for flow
+  #   task_id_join_clauses = [
+  #     "( cuci_1.task_id = (cuci_2.task_id + %s) )" % offset
+  #     for offset in cls.TASK_OFFSETS
+  #   ]
+  #   task_id_join_clause = " OR ".join(task_id_join_clauses)
+
+  #   # Build the flow pair task table
+  #   spark.catalog.dropTempView('oflow_culici_tasks_df')
+  #   task_df.registerTempTable('oflow_culici_tasks_df')
+  #   oflow_task_df = spark.sql(
+  #     """
+  #       SELECT
+  #         CONCAT(cuci_1.task_id, '->', cuci_2.task_id)
+  #           AS oflow_task_id,
+          
+  #         cuci_1.task_id AS task_id_1,
+  #         cuci_1.cuboids_sds AS cuboids_sds_t1,
+  #         cuci_1.ci_sds AS ci_sds_t1,
+
+  #         cuci_2.task_id AS task_id_2,
+  #         cuci_2.cuboids_sds AS cuboids_sds_t2,
+  #         cuci_2.ci_sds AS ci_sds_t2
+        
+  #       FROM
+  #         oflow_culici_tasks_df AS cuci_1, oflow_culici_tasks_df AS cuci_2
+        
+  #       WHERE
+  #         SIZE(cuci_1.ci_sds) > 0 AND
+  #         SIZE(cuci_2.ci_sds) > 0 AND
+  #         ( {task_id_join_clause} ) {task_id_filter_clause}
+  #     """.format(
+  #           task_id_join_clause=task_id_join_clause,
+  #           task_id_filter_clause=task_id_filter_clause))
+
+  #   return oflow_task_df
+
+  @classmethod
+  def build(cls, spark=None, only_segments=None):
+    with Spark.sess(spark) as spark:
+      segment_uris = cls.SRC_SD_T().get_all_segment_uris()
+      if only_segments:
+        only_segments = [datum.URI.from_str(s) for s in only_segments]
+        segment_uris = [
+          suri for suri in segment_uris
+          if any(s.soft_matches_segment(suri) for s in only_segments)
+        ]
+
+      for suri in segment_uris:
+        sample_df = cls.SAMPLE_DF_FACTORY.build_df_for_segment(spark, suri)
+        print('sample_df size', sample_df.count())
+        
+        world_cloud_df = cls._build_world_cloud_df(spark, sample_df)
+        # world_cloud_df = world_cloud_df.repartition('sample_id').persist()
+        import pyspark
+        world_cloud_df = world_cloud_df.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
+        
+        # if sample_df.count() >= 100:
+        #   util.log.info("Rendering and caching world clouds ...")
+        #   thruput_wc = oputil.ThruputObserver('RenderWorldClouds')
+        #   thruput_wc.start_block()
+        #   spark.catalog.dropTempView('world_cloud_df')
+        #   world_cloud_df.registerTempTable('world_cloud_df')
+        #   stats_df = spark.sql("""
+        #     SELECT
+        #       COUNT(*) AS n_clouds,
+        #       1e-9 * SUM(LENGTH(world_cloud.cloud.values_packed)) AS cloud_gbytes,
+        #       SUM(world_cloud.n_pruned) AS total_pruned,
+        #       SUM(world_cloud.n_pruned) / (
+        #           SUM(world_cloud.n_pruned) + SUM(world_cloud.cloud.shape[0]))
+        #         AS total_frac_pruned,
+        #       MEAN(world_cloud.n_pruned) AS avg_pruned_per_cloud,
+        #       PERCENTILE(world_cloud.n_pruned, 0.1) AS pruned_per_cloud_10th,
+        #       PERCENTILE(world_cloud.n_pruned, 0.9) AS pruned_per_cloud_90th
+        #     FROM 
+        #       world_cloud_df
+        #   """)
+        #   stats = stats_df.first().asDict()
+        #   util.log.info(
+        #     "World Cloud Stats:\n%s" % stats)
+        #   thruput_wc.stop_block(
+        #     n=stats['n_clouds'], num_bytes=1e9*stats['cloud_gbytes'])
+        #   thruput_wc.maybe_log_progress(every_n=1)
+
+
+        if cls.HAS_OBJ_CLOUDS:
+          fused_sds = cls.FUSED_LIDAR_SD_TABLE.get_fused_obj_sds(
+                                                    spark, suri, sample_df)
+        else:
+          fused_sds = []
+
+        from pyspark import StorageLevel
+        fused_sd_rdd = spark.sparkContext.parallelize(
+                        fused_sds, numSlices=len(fused_sds))
+        fused_sd_rdd = fused_sd_rdd.persist(StorageLevel.DISK_ONLY)
+        print('fused_sd_rdd', fused_sd_rdd.count())
+        
+        
+        sample_id_filter_clause = ''
+        # if cls.MAX_TASKS_PER_SEGMENT > 0:
+        #   print('restrict to', cls.MAX_TASKS_PER_SEGMENT)
+        #   task_ids = [r.task_id for r in task_df.select('task_id').collect()]
+        #   from random import Random
+        #   r = Random(cls.MAX_TASKS_SEED)
+        #   r.shuffle(task_ids)
+        #   task_ids = task_ids[:cls.MAX_TASKS_PER_SEGMENT]
+        #   tid_str = ", ".join(str(tid) for tid in task_ids)
+        #   task_id_filter_clause = "AND cuci_1.task_id in ( %s )" % tid_str
+
+        # Compute tasks pairs for flow
+        sample_id_join_clauses = [
+          "( cuci_2.sample_id = (cuci_1.sample_id + %s) )" % offset
+          for offset in (1,)#cls.TASK_OFFSETS
+        ]
+        sample_id_join_clause = " OR ".join(sample_id_join_clauses)
+
+        # Build the flow pair task table
+        spark.catalog.dropTempView('flow_pairs_df')
+        sample_df.registerTempTable('sample_df')
+        flow_pairs_df = spark.sql(
+          """
+            SELECT
+              CONCAT(cuci_1.sample_id, '->', cuci_2.sample_id)
+                AS flow_pair_id,
+              
+              cuci_1.sample_id AS sample_id_1,
+              cuci_1.cuboids_sds AS cuboids_sds_1,
+              cuci_1.ci_sds AS ci_sds_1,
+
+              cuci_2.sample_id AS sample_id_2,
+              cuci_2.cuboids_sds AS cuboids_sds_2,
+              cuci_2.ci_sds AS ci_sds_2
+            
+            FROM
+              sample_df AS cuci_1, sample_df AS cuci_2
+            
+            WHERE
+              SIZE(cuci_1.ci_sds) > 0 AND
+              SIZE(cuci_2.ci_sds) > 0 AND
+              cuci_1.sample_id >= 0 AND
+              cuci_2.sample_id >= 0 AND
+              ( {task_id_join_clause} ) {sample_id_filter_clause}
+            ORDER BY RAND(1337)
+          """.format(
+                task_id_join_clause=sample_id_join_clause,
+                sample_id_filter_clause=sample_id_filter_clause))
+        flow_pairs_df.registerTempTable('flow_pairs_df')
+
+        pairs_stats_df = spark.sql("""
+          SELECT
+            COUNT(*) AS total_sample_pairs,
+            SUM(SIZE(ci_sds_1)) AS num_camera_pose_pairs
+          FROM flow_pairs_df
+        """)
+        pairs_stats = pairs_stats_df.first().asDict()
+        util.log.info(
+          "Flow Pairs Stats:\n%s" % pairs_stats)
+
+        # world_cloud_df.registerTempTable('world_cloud_df')
+
+        thruput_pairs = oputil.ThruputObserver(
+          'RenderFlowPairs_%s' % suri.segment_id,
+          n_total=pairs_stats['total_sample_pairs'])
+        for row in flow_pairs_df.rdd.toLocalIterator():
+          thruput_pairs.start_block()
+
+
+          """
+          for each camera pair:
+            static cloud: just take xyz
+            dynamic cloud: use xyz pose 1 or xyz pose 2 in projection; need
+              *same point order for both projection*
+            project to uvdviz1, uvdviz2
+            trim invisible in both 1 and 2
+
+          """
+
+
+          from oarphpy.spark import RowAdapter
+          FROM_ROW = RowAdapter.from_row
+
+          def union_all(it):
+            import itertools
+            return list(itertools.chain.from_iterable(it))
+
+          cuboids1 = union_all(FROM_ROW(sd.cuboids) for sd in row.cuboids_sds_1)
+          cuboids2 = union_all(FROM_ROW(sd.cuboids) for sd in row.cuboids_sds_2)
+          ci1_sds = [FROM_ROW(sd) for sd in row.ci_sds_1]
+          ci2_sds = [FROM_ROW(sd) for sd in row.ci_sds_2]
+          cname_to_ci1 = dict((c.camera_image.sensor_name, c) for c in ci1_sds)
+          cname_to_ci2 = dict((c.camera_image.sensor_name, c) for c in ci2_sds)
+          all_cams = sorted(set(cname_to_ci1.keys()) & set(cname_to_ci2.keys()))
+          for sensor_name in all_cams:
+            print(sensor_name)
+            ci_sd1 = cname_to_ci1[sensor_name]
+            ci_sd2 = cname_to_ci2[sensor_name]
+            ci1 = ci_sd1.camera_image
+            ci2 = ci_sd2.camera_image
+
+
+            ## Cuboids
+            class FusedObjectCloudToWorldCloudPair(object):
+              def __init__(self, cuboids1, cuboids2):
+                self._track_id_to_cuboid1 = dict((c.track_id, c) for c in cuboids1)
+                self._track_id_to_cuboid2 = dict((c.track_id, c) for c in cuboids2)
+                self._thruput = oputil.ThruputObserver(name='FusedObjectCloudToWorldCloudPair', log_freq=1)
+                # self._cuboids1 = cuboids1
+                # self._cuboids2 = cuboids2
+                # self._track_ids = (
+                #   set(c.track_id for c in cuboids1) | 
+                #   set(c.track_id for c in cuboids2))
+              
+              def __call__(self, stamped_datum):
+                from threadpoolctl import threadpool_limits
+                with threadpool_limits(limits=1, user_api='blas'):
+                  self._thruput.start_block()
+
+                  EMPTY_CLOUD = np.zeros((0, 3))
+                  if not stamped_datum.point_cloud:
+                    return EMPTY_CLOUD, EMPTY_CLOUD
+                  pc = stamped_datum.point_cloud
+                  if 'lidar|objects_fused' not in pc.sensor_name:
+                    return EMPTY_CLOUD, EMPTY_CLOUD
+                  
+                  track_id = pc.extra['track_id']
+                  have_both_poses = (
+                    track_id in self._track_id_to_cuboid1 and
+                    track_id in self._track_id_to_cuboid2)
+                  if not have_both_poses:
+                    return EMPTY_CLOUD, EMPTY_CLOUD
+
+                  cloud_obj = pc.get_cloud()
+
+                  def render_world(t2c):
+                    cuboid = t2c[track_id]
+                    cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj[:, :3]).T
+                    cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+                    return cloud_world
+
+                  world_cloud1 = render_world(self._track_id_to_cuboid1)
+                  world_cloud2 = render_world(self._track_id_to_cuboid2)
+                  
+                  self._thruput.stop_block(n=1, num_bytes=(world_cloud1.nbytes + world_cloud2.nbytes))
+                  self._thruput.maybe_log_progress()
+                  
+                  return world_cloud1, world_cloud2
+
+            class RenderObjCloudPair(object):
+              def __init__(self):
+                self._thruput = oputil.ThruputObserver(name='RenderObjCloudPair', log_freq=10)
+              def __call__(self, wc_pair):
+                self._thruput.start_block()
+
+                world_cloud1, world_cloud2 = wc_pair
+                if (world_cloud1.shape[0] + world_cloud2.shape[0]) == 0:
+                  return np.zeros((0, 8))
+                
+                from threadpoolctl import threadpool_limits
+                with threadpool_limits(limits=1, user_api='blas'):
+                  uvd_viz1_uvd_viz2 = render_oflow_pair(
+                                        ci1, ci2,
+                                        world_cloud1=world_cloud1,
+                                        world_cloud2=world_cloud2)
+                self._thruput.stop_block(n=1, num_bytes=uvd_viz1_uvd_viz2.nbytes)
+                self._thruput.maybe_log_progress()
+                print('uvd_viz1_uvd_viz2 shape', uvd_viz1_uvd_viz2.shape)
+                return uvd_viz1_uvd_viz2
+
+            obj_to_wc = FusedObjectCloudToWorldCloudPair(cuboids1, cuboids2)
+            render_obj_wcs = RenderObjCloudPair()
+            obj_uvd_viz1_uvd_viz2_rdd = fused_sd_rdd.map(obj_to_wc).map(render_obj_wcs)
+
+            ## World Clouds
+
+            class RenderWCPartition(object):
+              def __init__(self):
+                self._thruput = oputil.ThruputObserver(name='RenderOflowPair', log_freq=1)
+              def __call__(self, iter_rows):
+                uvd_viz1_uvd_viz2_part = None
+                for row in iter_rows:
+                  self._thruput.start_block()
+                  world_cloud = FROM_ROW(row.world_cloud)
+
+                
+                  from threadpoolctl import threadpool_limits
+                  with threadpool_limits(limits=1, user_api='blas'):
+                    uvd_viz1_uvd_viz2 = render_oflow_pair(
+                      ci1, ci2, world_cloud1=world_cloud, world_cloud2=None)
+                  
+                  if uvd_viz1_uvd_viz2_part is None:
+                    uvd_viz1_uvd_viz2_part = uvd_viz1_uvd_viz2
+                  else:
+                    uvd_viz1_uvd_viz2_part = merge_uvd_viz1_uvd_viz2(
+                                                uvd_viz1_uvd_viz2_part,
+                                                uvd_viz1_uvd_viz2)
+
+                  self._thruput.stop_block(n=1, num_bytes=uvd_viz1_uvd_viz2.nbytes)
+                  self._thruput.maybe_log_progress()
+                
+                if uvd_viz1_uvd_viz2_part is None:
+                  return []
+                else:
+                  return [uvd_viz1_uvd_viz2_part]
+            render = RenderWCPartition()
+            # wc_uvd_viz1_uvd_viz2_rdd = world_cloud_df.rdd.mapPartitions(render)
+            wc_uvd_viz1_uvd_viz2_rdd = world_cloud_df.mapPartitions(render)
+
+            uvd_viz1_uvd_viz2_rdd = spark.sparkContext.union([
+              obj_uvd_viz1_uvd_viz2_rdd, wc_uvd_viz1_uvd_viz2_rdd
+            ])
+
+            reduce_uvds = (lambda u1, u2: merge_uvd_viz1_uvd_viz2(u1, u2))
+            uvd_viz1_uvd_viz2 = uvd_viz1_uvd_viz2_rdd.treeReduce(reduce_uvds, depth=3)
+              # NB: treeReduce is more efficient than reduce() because reduce()
+              # will do an O(num partitions) aggregate in the driver
+            print('final uvd_viz1_uvd_viz2', uvd_viz1_uvd_viz2.shape)
+
+            base_path = '/opt/psegs/test_run_output/'
+            fname = 'refactor_%s_%s_%s.png' % (suri.segment_id, row.flow_pair_id, sensor_name)
+            debug = viz_oflow_pair(ci1, ci2, uvd_viz1_uvd_viz2)
+            import imageio
+            imageio.imwrite(base_path + fname, debug)
+            print('saved debug', base_path + fname)
+
+            
+            h, w = ci1.height, ci1.width
+            uvdvis1 = uvd_viz1_uvd_viz2[:, :4]
+            uvdvis2 = uvd_viz1_uvd_viz2[:, 4:]
+            visible_both = ((uvdvis1[:, -1] == 1) & (uvdvis2[:, -1] == 1))
+            visboth_uv1 = uvdvis1[visible_both, :2]
+            visboth_uv2 = uvdvis2[visible_both, :2]
+            ij1 = np.rint(visboth_uv1[:, (0, 1)])
+            ij_flow = np.hstack([
+              ij1, visboth_uv2 - visboth_uv1
+            ])
+            v2v_flow = np.zeros((h, w, 2))
+            xx = ij_flow[:, 0].astype(np.int)
+            yy = ij_flow[:, 1].astype(np.int)
+            v2v_flow[yy, xx] = ij_flow[:, 2:4]
+            row_out = {
+              'ci1_uri': ci_sd1.uri,
+              'ci2_uri': ci_sd2.uri, # fixed 4/7
+              'uvdij1_visible_uvdij2_visible': uvd_viz1_uvd_viz2,
+                # TODO rename, ij part is gone ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+              'v2v_flow': v2v_flow,
+            }
+              # want for eval:
+              # * the cuboid boxes in ego so that we can do some stats about objects?
+              # * the cuboid box classes so that we can find pedestrians etc?
+              # * --> just the cuboid URIs?
+              # * info that will let us trace a xyz point over frames / time ? ...
+
+            path = os.path.join(base_path, 'refactor_%s_%s_%s_oflow.pkl' % (
+              suri.segment_id, row.flow_pair_id, sensor_name))
+            import pickle
+            with open(path, 'wb') as f:
+              pickle.dump(row_out, f, protocol=pickle.HIGHEST_PROTOCOL)
+            print('saved pkl to', path)
+
+          thruput_pairs.stop_block(n=1)
+          thruput_pairs.maybe_log_progress(every_n=1)
+
+
+          # cname_to_ci1 = dict((c.sensor_name, c) for c in ci1s)
+          #   #     cname_to_ci2 = dict((c.sensor_name, c) for c in ci2s)
+          #   #     all_cams = sorted(set(cname_to_ci1.keys()) & set(cname_to_ci2.keys()))
+          #   #     for sensor_name in all_cams:
+          #   #       print(sensor_name)
+          #   #       import time
+          #   #       start = time.time()
+
+          # uvdij1_visible_uvdij2_visible, v2v_flow = self.render_func(
+          #         world_cloud=world_cloud,
+          #         T_ego2lidar=np.eye(4), # T_ego2lidar nope this is np.eye(4) for kitti and nusc
+          
+          #         # KITTI-360 and nusc too wat i guess ego is lidar?
+          #         T_lidar2cam=ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True),
+
+          #         P=P,
+          #         cam_height_pixels=cam_height_pixels,
+          #         cam_width_pixels=cam_width_pixels,
+
+          #         ego_pose1=pose1,
+          #         ego_pose2=pose2,
+          #         moving_1=moving_1,
+          #         moving_2=moving_2,
+
+
+          #         img1_factory=lambda: ci1.image,
+          #         img2_factory=lambda: ci2.image,
+          #         debug_title=trow.oflow_task_id)
+
+
+          # class RenderUVDVisible(object):
+          #   def __init__(self, ci_sds):
+          #     self._ci_sds = ci_sds
+          #     self._thruput = oputil.ThruputObserver(name='RenderUVDVisible', log_on_del=True)
+            
+          #   def iter_renders(self, world_cloud):
+          #     for ci_sd in self._ci_sds:
+          #       ci = ci_sd.camera_image
+          #       self._thruput.start_block()
+
+          #       P = np.eye(4)
+          #       P[:3, :3] = ci1.K[:3, :3]
+          #       camera_pose = ci.ego_pose.get_transformation_matrix(homogeneous=True)
+          #       T_lidar2cam = ci.ego_to_sensor.get_transformation_matrix(homogeneous=True),
+
+          #       uvd_visible = world_to_uvd_visible(
+          #                       camera_pose=camera_pose,
+          #                       P=P,
+          #                       image_size=(ci.width, ci.height),
+          #                       T_lidar2cam=T_lidar2cam,
+          #                       T_ego2lidar=np.eye(4), # T_ego2lidar nope this is np.eye(4) for kitti and nusc
+          #                       world_cloud=world_cloud)
+                
+          #       self._thruput.stop_block(n=1, num_bytes=uvd_visible.nbytes)
+          #       self._thruput.maybe_log_progress()
+          #       yield {
+          #         'ci_uri': str(ci_sd.uri),
+          #         'uvd_visible': uvd_visible,
+          #       }
+
+
+          
+          # for sensor_name in all_cams:
+          #   t.start_block()
+  #       print(sensor_name)
+  #       import time
+  #       start = time.time()
+        
+  #       ci1 = cname_to_ci1[sensor_name]
+  #       ci2 = cname_to_ci2[sensor_name]
+        
+  #       cam_height_pixels = ci1.height
+  #       cam_width_pixels = ci1.width
+  #       assert (ci1.width, ci1.height) == (ci2.width, ci2.height)
+
+  #       # Pose all objects for t1 and t2
+  #       moving_1 = np.zeros((0, 3))
+  #       for cuboid in cuboids1:
+  #         cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+  #         cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+  #         cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+  #         moving_1 = np.vstack([moving_1, cloud_world])
+  #       print('moving_1', moving_1.shape)
+        
+  #       moving_2 = np.zeros((0, 3))
+  #       for cuboid in cuboids2:
+  #         cloud_obj = track_id_to_fused_cloud[cuboid.track_id]
+  #         cloud_ego = cuboid.obj_from_ego['ego', 'obj'].apply(cloud_obj).T
+  #         cloud_world = cuboid.ego_pose.apply(cloud_ego).T
+  #         moving_2 = np.vstack([moving_2, cloud_world])
+  #       print('moving_2', moving_2.shape)
+        
+    
+  #       movement = ci1.ego_pose.translation - ci2.ego_pose.translation
+  #       print('movement', movement)
+  #       if np.linalg.norm(movement) < 0.01:
+  #           print('less than 1cm movement...')
+  #           continue
+    
+  #       # T_ego2cam = ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True)
+  #       # T_lidar2cam = T_ego2cam @ np.linalg.inv(T_ego2lidar)
+    
+  #       P = np.eye(4)
+  #       P[:3, :3] = ci1.K[:3, :3]
+    
+  #       pose1 = ci1.ego_pose.get_transformation_matrix(homogeneous=True)
+  #       pose2 = ci2.ego_pose.get_transformation_matrix(homogeneous=True)
+  #       result = cls.render_func(
+  #                   world_cloud=world_cloud,
+  #                   T_ego2lidar=np.eye(4), # T_ego2lidar nope this is np.eye(4) for kitti and nusc
+            
+  #                   # KITTI-360 and nusc too wat i guess ego is lidar?
+  #                   T_lidar2cam=ci1.ego_to_sensor.get_transformation_matrix(homogeneous=True),
+
+  #                   P=P,
+  #                   cam_height_pixels=cam_height_pixels,
+  #                   cam_width_pixels=cam_width_pixels,
+
+  #                   ego_pose1=pose1,
+  #                   ego_pose2=pose2,
+  #                   moving_1=moving_1,
+  #                   moving_2=moving_2,
+
+
+  #                   img1_factory=lambda: ci1.get_image(),
+  #                   img2_factory=lambda: ci2.get_image(),
+  #                   debug_title=trow.oflow_task_id)
+        
+  #       print('did in', time.time() - start)
+
+  #       yield result
+
+        
+
+        # SAMPLE_WINDOW = 1000000
+        # render_df = spark.sql("""
+        #   SELECT
+        #     *
+        #   FROM
+        #     flow_pairs_df, world_cloud_df
+        #   WHERE
+        #     sample_id <= sample_id_1 + {sample_window} AND
+        #     sample_id >= sample_id_1 - {sample_window} AND
+        #     sample_id <= sample_id_2 + {sample_window} AND
+        #     sample_id >= sample_id_2 - {sample_window}
+
+        # """.format(sample_window=SAMPLE_WINDOW))
+
+        
+
+        # stats_df = spark.sql("""
+        #   SELECT
+        #     COUNT(*) AS n_clouds,
+        #     1e-9 * SUM(LENGTH(world_cloud.cloud.values_packed)) AS cloud_gbytes,
+        #     SUM(world_cloud.n_pruned) AS total_pruned,
+        #     SUM(world_cloud.n_pruned) / (
+        #         SUM(world_cloud.n_pruned) + SUM(world_cloud.cloud.shape[0]))
+        #       AS total_frac_pruned,
+        #     MEAN(world_cloud.n_pruned) AS avg_pruned_per_cloud,
+        #     PERCENTILE(world_cloud.n_pruned, 0.1) AS pruned_per_cloud_10th,
+        #     PERCENTILE(world_cloud.n_pruned, 0.9) AS pruned_per_cloud_90th
+        #   FROM 
+        #     world_cloud_df
+        # """)
+        # util.log.info(
+        #   "World Cloud Stats:\n%s" % stats_df.toPandas().transpose())
+
+        # import ipdb; ipdb.set_trace()
+        # print()
+
+
+
+
+        # REPORT = """
+        #   Total world clouds: {n_clouds} ({cloud_gbytes:%2f} GBytes)
+        #   Total points pruned: {total_pruned}
+        #   Total frac pruned: {total_frac_pruned}
+        #   Avg pruned per cloud: {avg_per_cloud}
+        # """.format(
+        #   n_clouds=stats_df.count(),
+        #   cloud_gbytes=1e-9 * stats_df['cloud_bytes'].sum(),
+        #   total_pruned=stats_df['n_pruned'].sum(),
+        #   total_frac_pruned=(
+        #     stats_df['n_pruned'].sum() / (
+        #       stats_df['n_kept'].sum() + stats_df['n_pruned'].sum())),
+        #   avg_per_cloud=stats_df['n_pruned'].mean())
+        # util.log.info(REPORT)
+        
+        
+        # world_cloud_df.show()
+        # import ipdb; ipdb.set_trace()
+        # print()
+        
+        
+        # fused_datum_sample = cls.FUSED_LIDAR_SD_TABLE.get_sample(
+        #                             suri, spark=spark)
+
+        # T_ego2lidar = cls._get_T_ego2lidar(task_df)
+
+        # oflow_task_df = cls._get_oflow_task_df(spark, task_df)
+        # print('oflow_task_df', oflow_task_df.count())
+        # worker = RenderOFlowTasksWorker(
+        #   T_ego2lidar, fused_datum_sample, cls.render_func)
+
+        # # Hacky way to coalesce into CPU-intensive partitions
+        # from oarphpy.spark import num_executors
+        # n_tasks = oflow_task_df.count()
+        # n_parts = int(max(1, n_tasks / (10 * num_executors(spark))))
+        # print('coalesc to ', n_parts)
+        # oflow_task_df = oflow_task_df.coalesce(n_parts)
+        # result_rdd = oflow_task_df.rdd.mapPartitions(lambda irows: worker.single_machine_map_rows(irows))#(worker, preservesPartitioning=True)
+        #         # lambda irows: cls._render_oflow_tasks(
+        #         #       T_ego2lidar,
+        #         #       fused_datum_sample,
+        #         #       irows))
+        # OUT_PATH = '/tmp/oflow_out/'
+        # oputil.mkdir(OUT_PATH)
+        # import pickle
+        # t = oputil.ThruputObserver(name='BuildOFlow', n_total=n_tasks)
+        # t.start_block()
+        # for i, results in enumerate(result_rdd.toLocalIterator(prefetchPartitions=False)):
+        #   for j, row in enumerate(results):
+        #     path = os.path.join(OUT_PATH, 'oflow_%s_%s.pkl' % (i, j))
+        #     with open(path, 'wb') as f:
+        #       pickle.dump(row, f, protocol=pickle.HIGHEST_PROTOCOL)
+        #     print('saved to', path)
+
+        #   t.update_tallies(n=1, num_bytes=oputil.get_size_of_deep(results), new_block=True)
+        #   t.maybe_log_progress(every_n=1)
+
+
+
+
+
+###############################################################################
+### Dataset Implementations
+
+
+### SemanticKITTI
+
+from psegs.exp.semantic_kitti import SemanticKITTISDTable
+
+class SemanticKITTSampleDFFactory(SampleDFFactory):
+    
+    SRC_SD_TABLE = SemanticKITTISDTable
+    
+    @classmethod
+    def build_df_for_segment(cls, spark, segment_uri):
+        seg_rdd = cls.SRC_SD_TABLE.get_segment_datum_rdd(spark, segment_uri)
+        
+        def to_task_row(scan_id_iter_sds):
+            scan_id, iter_sds = scan_id_iter_sds
+            camera_images = []
+            point_clouds = []
+            for sd in iter_sds:
+                if sd.camera_image is not None:
+                    camera_images.append(sd)
+                elif sd.point_cloud is not None:
+                    point_clouds.append(sd)
+            
+            from pyspark import Row
+            r = Row(
+                    sample_id=int(scan_id),
+                    pc_sds=point_clouds,
+                    cuboids_sds=[], # SemanticKITTI has no cuboids
+                    ci_sds=camera_images) 
+            from oarphpy.spark import RowAdapter
+            return RowAdapter.to_row(r)
+            
+        grouped = seg_rdd.groupBy(lambda sd: sd.uri.extra['semantic_kitti.scan_id'])
+        row_rdd = grouped.map(to_task_row)
+
+        df = spark.createDataFrame(row_rdd, schema=cls.table_schema())
+        df = df.persist()
+        return df
+
+class SemanticKITTIFusedWorldCloudTable(CloudFuser):
+    FUSED_LIDAR_SD_TABLE = SemanticKITTSampleDFFactory
+
+    # SemanticKITTI has no cuboids, so we skip this step.
+    HAS_OBJ_CLOUDS = False
+
+class SemanticKITTIFusedFlowDFFactory(FusedFlowDFFactory):
+  SAMPLE_DF_FACTORY = SemanticKITTSampleDFFactory
+  FUSED_LIDAR_SD_TABLE = SemanticKITTIFusedWorldCloudTable
+
+
+### KITTI-360 - Using only our fused lidar
+
+from psegs.datasets.kitti_360 import KITTI360SDTable
+class KITTI360_OurFused(KITTI360SDTable):
+    INCLUDE_FISHEYES = False
+    INCLUDE_FUSED_CLOUDS = False  # Use our own fused clouds
+
+class KITTI360_OurFused_SampleDFFactory(SampleDFFactory):
+    
+    SRC_SD_TABLE = KITTI360_OurFused
+
+    @classmethod
+    def build_df_for_segment(cls, spark, segment_uri):
+        from psegs import util
+        
+        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
+        datum_df.registerTempTable('datums')
+        
+        util.log.info('Building sample table for %s ...' % segment_uri)
+        
+        spark.catalog.dropTempView('kitti360_sample_df')
+        spark.sql("""
+            CACHE TABLE kitti360_sample_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
+            SELECT 
+              INT(uri.extra.`kitti-360.frame_id`) AS sample_id,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds
+            FROM datums
+            WHERE (
+              uri.topic LIKE '%cuboid%' OR
+              uri.topic LIKE '%lidar%' OR
+              uri.topic LIKE '%camera%'
+            ) AND (
+              camera_image is NULL OR (camera_image.extra.`kitti-360.has-valid-ego-pose` = 'True')
+            ) AND (
+              point_cloud is NULL OR (point_cloud.extra.`kitti-360.has-valid-ego-pose` = 'True')
+            )
+            GROUP BY sample_id
+            HAVING SIZE(pc_sds) > 0 AND SIZE(ci_sds) > 0
+        """)
+        
+        sample_df = spark.sql('SELECT * FROM kitti360_sample_df')
+        n_parts = int(max(10, sample_df.count() // 10))
+        sample_df = sample_df.repartition(n_parts, 'sample_id')
+        util.log.info('... done.')
+        return sample_df
+
+class KITTI360_OurFused_WorldCloudTableBase(CloudFuser):
+  FUSED_LIDAR_SD_TABLE = KITTI360_OurFused_SampleDFFactory
+
+class KITTI360_OurFused_FusedFlowDFFactory(FusedFlowDFFactory):
+  SAMPLE_DF_FACTORY = KITTI360_OurFused_SampleDFFactory
+  FUSED_LIDAR_SD_TABLE = KITTI360_OurFused_WorldCloudTableBase
+
+
+### KITTI-360 - Using KITTI's included fused (and smoothed) lidar
+
+class KITTI360_KITTIFused(KITTI360SDTable):
+    INCLUDE_FISHEYES = False
+    INCLUDE_FUSED_CLOUDS = True  # Use KITTI's fused clouds
+    DATASET_NAME = 'kitti-360-fused'
+
+class KITTI360_KITTIFused_SampleDFFactory(SampleDFFactory):
+    
+    SRC_SD_TABLE = KITTI360_KITTIFused
+
+    @classmethod
+    def build_df_for_segment(cls, spark, segment_uri):
+        from psegs import util
+        
+        util.log.info(
+          'Building sample table for %s ...' % segment_uri)
+
+        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
+        # datum_df = datum_df.persist()
+        
+        spark.catalog.dropTempView('datums')
+        datum_df.registerTempTable('datums')
+
+        # KITTI-360 fused clouds have data for multiple frames in each
+        # individual file.  We only want to read each file once, so let's
+        # prune the 'datums' table to contain only *distinct* clouds from
+        # the available datums (and all other non-cloud data).
+        valid_frames_df = spark.sql("""
+            SELECT
+              FIRST(uri.extra.`kitti-360.frame_id`) AS frame_id,
+              uri.extra.`kitti-360.fused_cloud_path` AS cloud_path
+            FROM datums
+            WHERE uri.topic LIKE '%lidar|fused_static%'
+            GROUP BY uri.extra.`kitti-360.fused_cloud_path`
+            """)
+        valid_fids = set(
+          r.frame_id for r in valid_frames_df.collect()
+          if r.cloud_path is not None
+        )
+        util.log.info(
+          "... found %s distinct world clouds ..." % len(valid_fids))
+        valid_fids_str = ','.join("'%s'" % fid for fid in valid_fids)
+
+        datum_df = spark.sql("""
+            SELECT *
+            FROM datums
+            WHERE
+              uri.topic NOT LIKE '%lidar%' OR
+              (
+                uri.topic LIKE '%lidar|fused_static%' AND
+                uri.extra.`kitti-360.frame_id` IN ( {valid_fids_str} )
+              )
+        """.format(valid_fids_str=valid_fids_str))
+        datum_df.registerTempTable('kitti360_kfused_datums')
+
+
+        # Now collect datums, using only the distinct fused clouds we
+        # collected above
+        spark.catalog.dropTempView('kitti360_kfused_sample_df')
+        spark.sql("""
+            CACHE TABLE 
+              kitti360_kfused_sample_df
+              OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
+            SELECT 
+              INT(uri.extra.`kitti-360.frame_id`) AS sample_id,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
+              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds
+            FROM kitti360_kfused_datums
+            WHERE (
+              uri.topic LIKE '%cuboid%' OR
+              uri.topic LIKE '%lidar|fused_static%' OR
+              uri.topic LIKE '%camera%'
+            ) AND (
+              camera_image is NULL OR (camera_image.extra.`kitti-360.has-valid-ego-pose` = 'True')
+            ) AND (
+              point_cloud is NULL OR (point_cloud.extra.`kitti-360.has-valid-ego-pose` = 'True')
+            )
+            GROUP BY sample_id
+        """)
+        
+        sample_df = spark.sql('SELECT * FROM kitti360_kfused_sample_df')
+        util.log.info('... done.')
+        return sample_df
+
+class PassThruWorldCloudCleaner(WorldCloudCleaner):
+  
+  def get_cleaned_world_cloud(self, point_clouds, cuboids):
+    # TODO need to prune isVisible for optical flow ............................................................
+    cleaned_clouds = []
+    n_pruned = 0
+    for pc in point_clouds:
+      # self._thruput().start_block()
+
+      cloud = pc.get_cloud()
+
+      # actually this doesn't make a huge difference
+      # # Only keep visible points.  These are points visible to at least one
+      # # camera.  
+      # vis_idx = pc.get_col_idx('is_visible')
+      # cloud = cloud[cloud[:, vis_idx] == 1]
+
+      cloud = cloud[:, :3] # TODO: can we keep colors?
+      cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud).T
+    
+      cloud_ego = self._filter_ego_vehicle(cloud_ego)
+
+      # skip filtering cuboids
+
+      T_world_to_ego = pc.ego_pose
+      cloud_world = T_world_to_ego.apply(cloud_ego).T # why is this name backwards?? -- hmm works for nusc too
+
+      cleaned_clouds.append(cloud_world)
+      
+      # self._thruput().stop_block(
+      #         n=1, num_bytes=oputil.get_size_of_deep(cloud_world))
+      # self._thruput().maybe_log_progress(every_n=1)
+      # self.__log_pruned()
+    if not cleaned_clouds:
+      return np.zeros((0, 3)), 0
+    else:
+      return np.vstack(cleaned_clouds), n_pruned
+
+class KITTI360_KITTIFused_FusedFlowDFFactory(FusedFlowDFFactory):
+  SAMPLE_DF_FACTORY = KITTI360_KITTIFused_SampleDFFactory
+  WORLD_CLEANER_CLS = PassThruWorldCloudCleaner
+  FUSED_LIDAR_SD_TABLE = KITTI360_OurFused_WorldCloudTableBase
+    # We'll use our own fused *dynamic objects* but use
+    # KITTI-360's fused *static world clouds*
+
+### NuScenes
+
+from psegs.datasets.nuscenes import NuscStampedDatumTableFactory
+# from psegs.datasets.nuscenes import NuscStampedDatumTableLabelsAllFrames
+
+class NuscFlowSDTable(NuscStampedDatumTableFactory):
+  SENSORS_KEYFRAMES_ONLY = False
+  LABELS_KEYFRAMES_ONLY = False
+  INCLUDE_LIDARSEG = False
+
+  # @classmethod
+  # def _get_all_segment_uris(cls):
+  #   segment_uris = super(cls, NuscFlowSDTable)._get_all_segment_uris()
+    
+  #   # Simplify 'train' for 'train-detect' and 'train-track'
+  #   for suri in segment_uris:  
+  #     if 'train' in suri.split:
+  #       suri.split = 'train'
+    
+  #   return segment_uris
+
+class NuscSampleDFFactory(SampleDFFactory):
+  SRC_SD_TABLE = NuscFlowSDTable
+
+  # For NuScenes, labels (and keyframes) are only available at 2Hz and are
+  # otherwise interpolated. The lidar scans at 2x the frequence of the cameras.
+  # For best label alignment, we:
+  # * Create some sample groups just for fusion (loose camera-lidar
+  #      constraints for decent lidar painting).
+  # * Create sample groups for rendering with only exact label-sensor alignment.
+  # You can control which sensors are grouped for rendering below.
+  GROUP_LIDAR_FOR_RENDERING = False
+  GROUP_CAMERAS_FOR_RENDERING = True
+  INCLUDE_LOOSE_LIDAR_CAMERA_FOR_FUSION = True
+  LOOSE_FUSION_TIME_WINDOW_SEC = 0.1
+
+  SAMPLE_IDS_BETWEEN_SENSORS = 10000
+
+  @classmethod
+  def build_df_for_segment(cls, spark, segment_uri):
+      from psegs import util
+      
+      util.log.info(
+        'Building sample table for %s ...' % segment_uri)
+
+      datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
+      spark.catalog.dropTempView('nusc_datums')
+      datum_df.registerTempTable('nusc_datums')
+
+      all_topics = datum_df.select('uri.topic').distinct().collect()
+      all_topics = [r[0] for r in all_topics]
+      lidar_topics = sorted(t for t in all_topics if 'lidar' in t)
+      camera_topics = sorted(t for t in all_topics if 'camera' in t)
+      
+      from pyspark.sql import Row
+      samples_by_time = spark.sql("""
+        SELECT
+          uri.extra.`nuscenes-sample-token` AS sample_token,
+          MIN(uri.timestamp) AS first_time
+          FROM nusc_datums
+          GROUP BY sample_token
+        """).collect()
+      samples_by_time = sorted(samples_by_time, key=lambda r: r.first_time)
+      samples = [
+        Row(sample_n=n, sample_token=r.sample_token)
+        for n, r in enumerate(samples_by_time)
+      ]
+      spark.catalog.dropTempView('nusc_samples')
+      nusc_samples_df = spark.createDataFrame(samples)
+      nusc_samples_df.registerTempTable('nusc_samples')
+      util.log.info("... have %s Nusc samples ..." % nusc_samples_df.count())
+
+
+      ## Fusion Samples
+      fusion_dfs = []
+      sample_id_base = -1
+      if cls.INCLUDE_LOOSE_LIDAR_CAMERA_FOR_FUSION:
+        lidar_times_df = spark.sql("""
+          SELECT
+            uri.topic AS lidar_topic,
+            uri.timestamp AS lidar_time 
+          FROM nusc_datums
+          WHERE uri.topic like '%lidar%'
+          """)
+        spark.catalog.dropTempView('nusc_lidar_times_df')
+        lidar_times_df.registerTempTable('nusc_lidar_times_df')
+        util.log.info(
+          "... adding %s lidar clouds for rendering only ..." % (
+            lidar_times_df.count(),))
+
+        fusion_df = spark.sql("""
+              SELECT 
+                -1 * lidar_time AS sample_id,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+                    FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+                    FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+                    FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds
+              FROM nusc_datums, nusc_lidar_times_df
+              WHERE
+                (
+                  uri.topic = nusc_lidar_times_df.lidar_topic AND
+                  uri.timestamp = nusc_lidar_times_df.lidar_time
+                ) OR
+                (
+                  uri.topic LIKE '%cuboid%' AND
+                  uri.timestamp = nusc_lidar_times_df.lidar_time AND
+                  uri.extra.`nuscenes-label-channel` = 
+                            SUBSTRING(
+                              nusc_lidar_times_df.lidar_topic,
+                              LENGTH('lidar|') + 1,
+                              100)
+                ) OR
+                (
+                  uri.topic LIKE '%camera%' AND
+                  nusc_lidar_times_df.lidar_time - {buf} <= uri.timestamp AND
+                  uri.timestamp <= nusc_lidar_times_df.lidar_time + {buf}
+                )
+              GROUP BY sample_id
+          """.format(
+            buf=int(1e9 * cls.LOOSE_FUSION_TIME_WINDOW_SEC)))
+        fusion_dfs.append(fusion_df)
+
+      ## Render Samples
+      render_dfs = []
+      sample_id_base = 0
+      topics_to_render = []
+      if cls.GROUP_LIDAR_FOR_RENDERING:
+        topics_to_render += lidar_topics
+      if cls.GROUP_CAMERAS_FOR_RENDERING:
+        topics_to_render += camera_topics
+      for topic in topics_to_render:
+        sample_id_base += cls.SAMPLE_IDS_BETWEEN_SENSORS
+        if 'camera' in topic:
+          channel = topic[len('camera|'):]
+        elif 'lidar' in topic:
+          channel = topic[len('lidar|'):]
+        else:
+          raise ValueError(topic)
+
+        topics_clause = """
+            (uri.topic LIKE '%cuboid%' OR uri.topic = '{topic}')
+          """.format(topic=topic)
+
+        util.log.info(
+          "... adding rendering for %s with sample id base %s ..." % (
+            topic, sample_id_base))
+        render_df = spark.sql("""
+              SELECT 
+                {sample_id_base} + nusc_samples.sample_n AS sample_id,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+                    FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+                    FILTER (
+                      WHERE uri.topic LIKE '%cuboid%' AND
+                            uri.extra.`nuscenes-label-channel` = '{channel}'
+                      ) AS cuboids_sds,
+                COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+                    FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds
+              FROM nusc_datums, nusc_samples
+              WHERE
+                uri.extra.`nuscenes-sample-token` = nusc_samples.sample_token AND
+                {topics_clause} AND
+                uri.extra.`nuscenes-is-keyframe` = 'True'
+              GROUP BY sample_id
+          """.format(
+            sample_id_base=sample_id_base,
+            channel=channel,
+            topics_clause=topics_clause))
+        render_dfs.append(render_df)
+      assert render_dfs, "Nothing to render?"
+
+      from oarphpy import spark as S
+      all_dfs = render_dfs + fusion_dfs
+      sample_df = S.union_dfs(*all_dfs)
+      sample_df = sample_df.repartition('sample_id')
+      sample_df = sample_df.persist()
+      
+      n_samples = sample_df.count()
+      util.log.info(
+        '... done building %s dataframes for %s total samples.' % (
+          len(all_dfs), n_samples))
+      return sample_df
+
+
+      # if cls.GROUP_CAMERAS_FOR_RENDERING:
+
+
+      # kf_filter = "AND uri.extra.`nuscenes-is-keyframe` = 'True'"
+      # pc_kf_filter = kf_filter if cls.LIDAR_KEYFRAMES_ONLY else ''
+      # ci_kf_filter = kf_filter if cls.CAMERAS_KEYFRAMES_ONLY else ''
+      # cu_kf_filter = kf_filter if cls.CUBOIDS_KEYFRAMES_ONLY else ''
+
+      # spark.catalog.dropTempView('nusc_sample_df')
+      # spark.sql("""
+      #     CACHE TABLE 
+      #       nusc_sample_df
+      #       OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
+      #     SELECT 
+      #       INT(uri.extra.`nuscenes-sample-offset`) AS sample_id,
+      #       COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
+      #           FILTER (WHERE uri.topic LIKE '%lidar%' {pc_kf_filter}) AS pc_sds,
+      #       COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
+      #           FILTER (WHERE uri.topic LIKE '%cuboid%' {cu_kf_filter}) AS cuboids_sds,
+      #       COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
+      #           FILTER (WHERE uri.topic LIKE '%camera%' {ci_kf_filter}) AS ci_sds
+      #     FROM nusc_datums
+      #     WHERE (
+      #       uri.topic LIKE '%cuboid%' OR
+      #       uri.topic LIKE '%lidar%' OR
+      #       uri.topic LIKE '%camera%'
+      #     )
+      #     GROUP BY sample_id
+      #     ORDER BY sample_id
+      # """.format(
+      #   pc_kf_filter=pc_kf_filter,
+      #   ci_kf_filter=ci_kf_filter,
+      #   cu_kf_filter=cu_kf_filter))
+
+      # import pdb; pdb.set_trace()
+
+      # sample_df = spark.sql('SELECT * FROM nusc_sample_df')
+      # util.log.info('... done.')
+      # return sample_df
+
+from psegs.exp.fused_lidar_flow import WorldCloudCleaner
+class NuscWorldCloudCleaner(WorldCloudCleaner):
+  
+  @classmethod
+  def _filter_ego_vehicle(cls, cloud_ego):
+      # Note: NuScenes authors have already corrected clouds for ego motion:
+      # https://github.com/nutonomy/nuscenes-devkit/issues/481#issuecomment-716250423
+      # But have not filtered out ego self-returns
+      cloud_ego = cloud_ego[np.where(  ~(
+                      (cloud_ego[:, 0] <= 1.5) & (cloud_ego[:, 0] >= -1.5) &  # Nusc lidar +x is +right
+                      (cloud_ego[:, 1] <= 3) & (cloud_ego[:, 0] >= -3) &  # Nusc lidar +y is +forward
+                      (cloud_ego[:, 1] <= 2.5) & (cloud_ego[:, 0] >= -2.5)    # Nusc lidar +z is +up
+      ))]
+      return cloud_ego
+
+class NuscWorldCloudTableBase(CloudFuser):
+  pass
+
+class NuscFusedFlowDFFactory(FusedFlowDFFactory):
+  SAMPLE_DF_FACTORY = NuscSampleDFFactory
+  WORLD_CLEANER_CLS = NuscWorldCloudCleaner
+  FUSED_LIDAR_SD_TABLE = NuscWorldCloudTableBase
+
+
+###############################################################################
+### Access to Rendered Data
+
+DEFAULT_SD_TABLES = (
+  # SemanticKITTISDTable,
+  KITTI360_OurFused,
+  KITTI360_KITTIFused,
+  NuscFlowSDTable,
+)
+
+class FlowRecTable(object):
+  """TODO comment """
+
+  def __init__(self, spark, pq_root, sd_tables=DEFAULT_SD_TABLES):
+    self._spark = spark
+    self._pq_root = pq_root
+    self._df = None
+    self._sd_tables = sd_tables
+    self._sd_ut = None
+  
+  def _get_sd_ut(self):
+    if self._sd_ut is None:
+      from psegs.table.sd_db import StampedDatumDB
+      self._sd_ut = StampedDatumDB(tables=self._sd_tables, spark=self._spark)
+      util.log.info(
+        "FlowRecTable: Have %s StampedDatumTables" % len(self._sd_tables))
+    return self._sd_ut
+
+  def _get_raw_df(self):
+    if not self._df:
+      util.log.info("FlowRecTable: Reading parquet from %s " % self._pq_root)
+      self._df = self._spark.read.parquet(self._pq_root)
+
+      # self._spark.catalog.dropTempView('psegs_frt_df')
+      # psegs_frt_df.registerTempTable('psegs_frt_df')
+
+      # self._df = self._spark.sql("""
+      #               SELECT
+      #                 CONCAT(
+      #                   'psegs://dataset=',
+      #                   segment_uri.dataset,
+      #                   '&split=',
+      #                   segment_uri.split,
+      #                   '&segment_id=',
+      #                   segment_uri.segment_id,
+      #                   '&extra.psegs_flow_sids=',
+      #                   ARRAY_JOIN(
+      #                     clouds.sample_id,
+      #                     ',')) AS uri_key,
+      #                 *
+      #               FROM psegs_frt_df
+      #   """)
+      # self._df = self._df.persist()
+
+    return self._df
+
+  def get_record_uris(self):
+    from oarphpy.spark import RowAdapter
+    df = self._get_raw_df()
+    rows = [r.uri for r in df.select('uri').collect()]
+    return [RowAdapter.from_row(r) for r in rows]
+  
+  def get_records_with_samples_rdd(
+            self,
+            record_uris=[],
+            include_cameras=True,
+            include_cuboids=False,
+            include_point_clouds=False):
+
+    df = self._get_raw_df()
+
+    if record_uris:
+      # Use the segment uri components to leverage parquet partitioning
+      from psegs import datum
+      record_suris = [
+        datum.URI.from_str(r).to_segment_uri() for r in record_uris
+      ]
+      datasets = set(u.dataset for u in record_suris)
+      splits = set(u.split for u in record_suris)
+      segment_ids = set(u.segment_id for u in record_suris)
+
+      df = df.filter(
+              df.dataset.isin(list(datasets)) &
+              df.split.isin(list(splits)) &
+              df.segment_id.isin(list(segment_ids)))
+
+    key_cloud_df = df.selectExpr('uri_key AS key', 'EXPLODE(clouds) AS c')
+    if record_uris:
+      key_cloud_df = key_cloud_df.filter(
+        key_cloud_df.key.isin([str(r) for r in record_uris]))
+
+    key_uri_dfs = []
+    if include_cameras:
+      key_uri_dfs.append(
+        key_cloud_df.selectExpr('key', 'EXPLODE(c.ci_uris) AS uri'))
+    if include_cuboids:
+      key_uri_dfs.append(
+        key_cloud_df.selectExpr('key', 'EXPLODE(c.cuboids_uris) AS uri'))
+    if include_point_clouds:
+      key_uri_dfs.append(
+        key_cloud_df.selectExpr('key', 'EXPLODE(c.pc_uris) AS uri'))
+
+    if key_uri_dfs:
+      from oarphpy import spark as S
+      key_uri_df = S.union_dfs(*key_uri_dfs)
+      key_uri_df = key_uri_df.repartition(100, 'uri')
+      sd_ut = self._get_sd_ut()
+      key_sample_df = sd_ut.get_keyed_sample_df(key_uri_df)
+
+      joined = df.join(key_sample_df, key_sample_df.key == df.uri_key)
+
+      def to_record_samples(row):
+        from oarphpy.spark import RowAdapter
+        from psegs.table.sd_db import StampedDatumDB
+        flow_rec = RowAdapter.from_row(row)
+        sample = StampedDatumDB.datum_rows_to_sample(row.datums)
+        return (flow_rec, sample)
+      #df = df.persist()
+      #joined = joined.persist()
+      #joined.show()
+      #import ipdb; ipdb.set_trace()
+
+      return joined.rdd.map(to_record_samples)
+    
+    else:
+
+      def to_record_samples(row):
+        from oarphpy.spark import RowAdapter
+        flow_rec = RowAdapter.from_row(row)
+        return (flow_rec, None)
+      return df.rdd.map(to_record_samples)
diff --git a/psegs/exp/semantic_kitti.py b/psegs/exp/semantic_kitti.py
new file mode 100644
index 0000000..289c113
--- /dev/null
+++ b/psegs/exp/semantic_kitti.py
@@ -0,0 +1,369 @@
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import os
+
+import numpy as np
+
+from psegs import datum
+from psegs import util
+from psegs.conf import C
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+
+def parse_calibration(path):
+  """Parse a calibration file and return a map to 4x4 Numpy matrices.
+  Important keys returned:
+  * Tr - the lidar to camera static transform
+  * P2 - the left camera projective matrix P
+  Based upon https://github.com/PRBonn/semantic-kitti-api/blob/9b5feda3b19ea560a298493b9a5ebebe0cbe2cc2/generate_sequential.py#L14
+  """
+  calib = {}
+
+  with open(path) as f:
+    for line in f:
+      key, mat_str = line.strip().split(":")
+      values = [float(v) for v in mat_str.strip().split()]
+      mat = np.zeros((4, 4))
+      mat[0, 0:4] = values[0:4]
+      mat[1, 0:4] = values[4:8]
+      mat[2, 0:4] = values[8:12]
+      mat[3, 3] = 1.0
+      calib[key] = mat
+  return calib
+
+def parse_poses(path):
+  """Read a SemanticKITTI (per-scan) poses file and return a list of 4x4
+  homogenous RT matrices that express world-to-left-camera transforms.  The
+  index of this list is implicitly the scan ID.
+
+  Based upon: https://github.com/PRBonn/semantic-kitti-api/blob/9b5feda3b19ea560a298493b9a5ebebe0cbe2cc2/generate_sequential.py#L42
+  """
+  poses = []
+  with open(path) as f:
+    for line in f:
+      values = [float(v) for v in line.strip().split()]
+      mat = np.zeros((4, 4))
+      mat[0, 0:4] = values[0:4]
+      mat[1, 0:4] = values[4:8]
+      mat[2, 0:4] = values[8:12]
+      mat[3, 3] = 1.0
+      poses.append(mat)
+  return poses
+
+
+
+class Fixtures(object):
+
+  # Please follow the instructions posted on the SemanticKITTI website to
+  # obtain the data:
+  # http://www.semantic-kitti.org/dataset.html#download
+  # Additionally, if you wish to study optical flow, you'll want to expand
+  # the KITTI zip file `data_odometry_color.zip`.
+  # Extract the data as described to a directory and symlink that directory
+  # path here:
+  ROOT = C.EXT_DATA_ROOT / 'semantic_kitti'
+
+  # Deduced from:
+  # https://github.com/PRBonn/semantic-kitti-api/blob/c2d7712964a9541ed31900c925bf5971be2107c2/auxiliary/SSCDataset.py#L20
+  SK_SPLIT_SEQUENCES = {
+      "train": [
+        "00", "01", "02", "03", 
+        # "04", -- We ignore sequence 04 because it has no clouds with only
+        #            static points
+        "05", "06", "07", "09", "10"],
+      "valid": ["08"],
+      "test": [
+        "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21"]
+  }
+
+  SK_MOVING_LABELS = (
+      252, # "moving-car"
+      253, # "moving-bicyclist"
+      254, # "moving-person"
+      255, # "moving-motorcyclist"
+      256, # "moving-on-rails"
+      257, # "moving-bus"
+      258, # "moving-truck"
+      259, # "moving-other-vehicle"
+  )
+
+  @classmethod
+  def get_scene_basepath(cls, seq):
+    return os.path.join(cls.ROOT, 'dataset/sequences', seq)
+
+  @classmethod
+  def get_seq_to_nscans(cls):
+    if not hasattr(cls, '_seq_to_nscans'):
+      cls._seq_to_nscans = {}
+      for seq in cls.SK_SPLIT_SEQUENCES['train']:
+        scene_base = cls.get_scene_basepath(seq)
+        last_vel = max(os.listdir(os.path.join(scene_base, 'velodyne')))
+        n_scans = int(last_vel.replace('.bin', '')) + 1
+        util.log.info("Found Sequence %s with %s scans" % (seq, n_scans))
+        cls._seq_to_nscans[seq] = n_scans
+      util.log.info("Found %s total scans" % sum(cls._seq_to_nscans.values()))
+    return cls._seq_to_nscans
+
+  @classmethod
+  def get_calibration(cls, seq):
+    scene_base = cls.get_scene_basepath(seq)
+    return parse_calibration(os.path.join(scene_base, 'calib.txt'))
+
+  @classmethod
+  def get_poses(cls, seq):
+    scene_base = cls.get_scene_basepath(seq)
+    return parse_poses(os.path.join(scene_base, "poses.txt"))
+
+  @classmethod
+  def get_moving_mask_for_scan(cls, seq, scan_id):
+    scene_base = cls.get_scene_basepath(seq)
+    scan_name = str(scan_id).rjust(6, '0')
+    labels_path = os.path.join(scene_base, 'labels', scan_name + '.label')
+    labels = np.fromfile(labels_path, dtype=np.uint32)
+    labels = labels.reshape((-1))
+    sem_label = labels & 0xFFFF  # semantic label in lower half
+    inst_label = labels >> 16    # instance id in upper half
+      # NB: 22 / 252 is chase car in scene 08 !!!
+
+    moving_mask = np.logical_or.reduce(
+      tuple((sem_label == c) for c in cls.SK_MOVING_LABELS))
+    return moving_mask
+
+  @classmethod
+  def read_scan_get_cloud(
+          cls, seq, scan_id, remove_movers=True, filter_ego=True):
+    scan_name = str(scan_id).rjust(6, '0')
+    scene_base = cls.get_scene_basepath(seq)
+    scan_path = os.path.join(scene_base, 'velodyne', scan_name + '.bin')
+
+    # Read the raw lidar
+    lidar_bytes = open(scan_path, 'rb').read()
+    lidar = np.frombuffer(lidar_bytes, dtype=np.float32).reshape((-1, 4))
+    cloud = np.ones(lidar.shape)  # need homogenous for transforms later
+    cloud[:, 0:3] = lidar[:, 0:3]
+
+    if remove_movers:
+        # Clean out points for anything moving
+        moving_mask = cls.get_moving_mask_for_scan(seq, scan_id)
+        cloud = cloud[~moving_mask]#[:, :3]
+    
+    if filter_ego:
+        pass # TODO ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    
+    return cloud
+
+
+class SemanticKITTISDTable(StampedDatumTableFactory):
+  
+  FIXTURES = Fixtures
+
+  ONLY_FRAMES_WITH_NO_MOVERS = True
+  
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return [
+      datum.URI(
+        dataset='semantikitti', # fixme ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+        split='train',
+        segment_id=str(seq))
+      for seq in cls.FIXTURES.get_seq_to_nscans().keys()
+    ]
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    assert existing_uri_df is None, "Resume feature not supported"
+    
+    seg_uris = cls.get_all_segment_uris()
+    if only_segments:
+      util.log.info("Filtering to only %s segments" % len(only_segments))
+      seg_uris = [
+        uri for uri in seg_uris
+        if any(
+          suri.soft_matches_segment_of(uri) for suri in only_segments)
+      ]
+    
+    SK_SEQ_TO_NSCANS = cls.FIXTURES.get_seq_to_nscans()
+    datum_rdds = []
+    for seg_uri in seg_uris:
+      seq = seg_uri.segment_id
+      if cls.ONLY_FRAMES_WITH_NO_MOVERS:
+        util.log.info(
+          "Finding scans for sequence %s with no moving points ..." % seq) # FIXME we should keep these .... ?
+        n_scans = SK_SEQ_TO_NSCANS[seq]
+        slices = max(1, n_scans // 100)
+        task_rdd = spark.sparkContext.parallelize(
+          range(n_scans), numSlices=slices)
+        scan_has_no_movers = (
+          lambda scan_id: (
+            not cls.FIXTURES.get_moving_mask_for_scan(seq, scan_id).any()))
+        scans_no_movers = task_rdd.filter(scan_has_no_movers).collect()
+        util.log.info(
+          "... sequence %s has %s scans with no movers." % (
+            seq, len(scans_no_movers)))
+        scan_ids = scans_no_movers
+      else:
+        scan_ids = list(range(SK_SEQ_TO_NSCANS[seq]))
+      
+      # for testing scan_ids = scan_ids[:500]
+      tasks = [(seg_uri, scan_id) for scan_id in scan_ids]
+      
+      # Emit camera_image RDD
+      ctask_rdd = spark.sparkContext.parallelize(tasks)
+      datum_rdd = ctask_rdd.map(lambda t: cls.create_camera_frame(*t))
+      datum_rdds.append(datum_rdd)
+      
+      # Emit ego_pose RDD
+      ptask_rdd = spark.sparkContext.parallelize(tasks)
+      datum_rdd = ptask_rdd.map(lambda t: cls.create_ego_pose(*t))
+      datum_rdds.append(datum_rdd)
+      
+      # Emit velodyne cloud RDD
+      pctask_rdd = spark.sparkContext.parallelize(tasks)
+      datum_rdd = pctask_rdd.map(lambda t: cls.create_point_cloud_in_world(*t))
+      datum_rdds.append(datum_rdd)
+  
+    return datum_rdds
+  
+  @classmethod
+  def _get_calib(cls, seq):
+    if not hasattr(cls, '_calib'):
+      cls._calib = {}
+    if seq not in cls._calib:
+      cls._calib[seq] = cls.FIXTURES.get_calibration(seq)
+    return cls._calib[seq]
+  
+  @classmethod
+  def _get_poses(cls, seq):
+    if not hasattr(cls, '_poses'):
+      cls._poses = {}
+    if seq not in cls._poses:
+      cls._poses[seq] = cls.FIXTURES.get_poses(seq)
+    return cls._poses[seq]
+  
+  @classmethod
+  def create_camera_frame(cls, base_uri, scan_id):
+    seq = base_uri.segment_id
+    calib = cls._get_calib(seq)
+    
+    uri = copy.deepcopy(base_uri)
+    uri.topic = 'camera|left_rect'
+    uri.timestamp = int(scan_id) # HACK!
+    uri.extra['semantic_kitti.scan_id'] = str(scan_id)
+
+    scene_base = cls.FIXTURES.get_scene_basepath(seq)
+    scan_name = str(scan_id).rjust(6, '0')
+    img_path = os.path.join(scene_base, 'image_2/', scan_name + '.png')
+    assert os.path.exists(img_path), (
+      "Did you remember to expand data_odometry_color.zip ? "
+      "%s not found" % img_path)
+    with open(img_path, 'rb') as f:
+      width, height = util.get_png_wh(f.read(100))
+                          # NB: Util only needs the first few bytes
+    
+    import imageio
+    image_factory = lambda: imageio.imread(img_path)
+    
+    # HACK!!!  This is actually P !!!
+    K = calib['P2']
+    
+    # hack! this is lidar to cam
+    ego_to_sensor = datum.Transform.from_transformation_matrix(
+        calib['Tr'], src_frame='lidar', dest_frame=uri.topic)
+    
+    sd_ego_pose = cls.create_ego_pose(base_uri, scan_id)
+    ego_pose = sd_ego_pose.transform
+    ci = datum.CameraImage(
+        sensor_name=uri.topic,
+        image_factory=image_factory,
+        width=width,
+        height=height,
+        timestamp=uri.timestamp,
+        ego_pose=ego_pose,
+        K=K,
+        ego_to_sensor=ego_to_sensor,
+        extra={'semantic_kitti.scan_id': str(scan_id)})
+    return datum.StampedDatum(uri=uri, camera_image=ci)
+  
+  @classmethod
+  def create_ego_pose(cls, base_uri, scan_id):
+    seq = base_uri.segment_id
+    poses = cls._get_poses(seq)
+    
+    uri = copy.deepcopy(base_uri)
+    uri.topic = 'ego_pose'
+    uri.timestamp = int(scan_id) # HACK!
+    uri.extra['semantic_kitti.scan_id'] = str(scan_id)
+    
+    # Move cloud into the world frame
+    calib = cls._get_calib(seq)
+    Tr = calib["Tr"]
+    Tr_inv = np.linalg.inv(Tr)
+    cam2_pose = poses[scan_id]
+    pose = np.matmul(Tr_inv, np.matmul(cam2_pose, Tr))
+
+    # # Hack! believe ego frame is lidar here?
+    # poses = cls._get_poses(seq)
+    # ego_pose = datum.Transform.from_transformation_matrix(
+    #     poses[scan_id], src_frame='world', dest_frame='ego')
+
+    # Hack! believe ego frame is lidar here?
+    ego_pose = datum.Transform.from_transformation_matrix(
+        pose, src_frame='world', dest_frame='ego')
+
+    return datum.StampedDatum(uri=uri, transform=ego_pose)
+  
+  @classmethod
+  def create_point_cloud_in_world(cls, base_uri, scan_id):
+    
+    uri = copy.deepcopy(base_uri)
+    uri.topic = 'lidar|world' + (
+      '_cleaned' if cls.ONLY_FRAMES_WITH_NO_MOVERS else '')
+    uri.timestamp = int(scan_id) # HACK!
+    uri.extra['semantic_kitti.scan_id'] = str(scan_id)
+    
+    sd_ego_pose = cls.create_ego_pose(base_uri, scan_id)
+    ego_pose = sd_ego_pose.transform
+
+    # # The cloud is in world coords so the ego pose is effectively null
+    # ego_pose = datum.Transform()
+    
+    def _get_cloud(seq, sid):
+      cloud = cls.FIXTURES.read_scan_get_cloud(
+            seq,
+            sid,
+            remove_movers=cls.ONLY_FRAMES_WITH_NO_MOVERS)
+      
+      # # Move cloud into the world frame
+      # calib = cls._get_calib(seq)
+      # all_poses = cls._get_poses(seq)
+      # Tr = calib["Tr"]
+      # Tr_inv = np.linalg.inv(Tr)
+      # cam2_pose = all_poses[sid]
+      # pose = np.matmul(Tr_inv, np.matmul(cam2_pose, Tr))
+      # cloud = np.matmul(pose, cloud.T).T
+      
+      return cloud
+
+    pc = datum.PointCloud(
+      sensor_name=uri.topic,
+      timestamp=uri.timestamp,
+      cloud_factory=lambda: _get_cloud(base_uri.segment_id, scan_id),
+      ego_to_sensor=datum.Transform(), # Hack! ego frame is lidar frame
+      ego_pose=ego_pose,
+      extra={'semantic_kitti.scan_id': str(scan_id)})
+    return datum.StampedDatum(uri=uri, point_cloud=pc)
+
diff --git a/psegs/export.py b/psegs/export.py
new file mode 100644
index 0000000..f2a92a0
--- /dev/null
+++ b/psegs/export.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# vim: tabstop=2 shiftwidth=2 expandtab
+
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DESC = """
+seg2html.py -- A library module of tools (as well as a script) to convert
+PSegs segments to HTML visualizations.  Run this script in the PSegs dockerized
+environment; FMI see ./psegs-util --help in the PSegs project.
+
+## Example
+
+python3 psegs/seg2html.py \
+  --segment-id=charuco-lowres-test \
+  --out-dir=./my_html_viz
+
+This will render only the segment named `charuco-lowres-test` to HTML and
+put rendered assets in ./my_html_viz .
+
+"""
+
+from psegs import xform as psx
+
+
+def create_arg_parser():
+  import argparse
+
+  parser = argparse.ArgumentParser(
+                    description=DESC,
+                    formatter_class=argparse.RawDescriptionHelpFormatter)
+
+  parser.add_argument(
+    '--viz-all-segments', default=False, action='store_true',
+    help='Render all available segments')
+
+  psx.configure_arg_parser(parser)
+
+  return parser
+
+def main(args=None):
+  if args is None:
+    parser = create_arg_parser()
+    args = parser.parse_args()
+  
+  
+  # hacks for now
+  
+
+
+
+if __name__ == '__main__':
+  main(args=None)
diff --git a/psegs/export/__init__.py b/psegs/export/__init__.py
new file mode 100644
index 0000000..e18a275
--- /dev/null
+++ b/psegs/export/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/psegs/export/nerf.py b/psegs/export/nerf.py
new file mode 100644
index 0000000..fb2daab
--- /dev/null
+++ b/psegs/export/nerf.py
@@ -0,0 +1,356 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+from pathlib import Path
+
+from psegs import util
+
+
+def export_sdt_to_blender_format(
+      sd_table,
+      outdir,
+      splits_to_write=('train', 'test', 'val'),
+      resize_max_h=-1,
+      img_ext='jpg',
+      only_cameras=None,
+      limit=-1,
+      include_file_extensions_in_keys=False,
+      use_relative_paths=True):
+  """
+  Given a `:class:`~psegs.table.StampedDatumTable` instance, export the 
+  `CameraImage` images (and other metadata) to `outdir` in the "Blender format"
+  that is mimics the original "NeRF Synthetic" dataset (and that is compatible
+  with most research).
+
+  Args:
+    sd_table (StampedDatumTable): Export images from this table.
+    outdir (Path or str): Dump all data to this directory.
+    splits_to_write (List[str]): Export transform data for these splits
+      (NB: we currently always ignore splits specified in the `sd_table` URIs).
+    resize_max_h (int): Resize input images to have this maximum
+      height in pixels.
+    img_ext (str): Save images in this format.
+    only_cameras (List[str]): Only export these camera topics.
+    limit (int): Sample this number of frames uniformly.
+    include_file_extensions_in_keys (bool): Standard NeRF datasets don't
+      include the image file extensions in `transforms_*.json` and all image
+      paths are assumed to end in '.png'.  Use this option to force-include
+      file extensions.
+    use_relative_paths (bool): Embed relative file paths in the exported
+      `transforms_*.json` files (some NeRF Impls *require* relative paths)
+
+  References:
+   * Original NeRF Blender files: https://github.com/bmild/nerf/issues/59
+   * Original NeRF Blender *dataset* transforms: https://drive.google.com/drive/folders/1LEDmMJ-rFRhl8CJKnLBeTePDBsmJJ8OP
+   * Code that reads Blender data:
+     * Original NeRF: https://github.com/bmild/nerf/blob/20a91e764a28816ee2234fcadb73bd59a613a44c/load_blender.py#L41
+     * nerf_pl (pytorch lightning): https://github.com/kwea123/nerf_pl/blob/f4a072bc0dc49d2703d2a47da808432d228622e0/datasets/blender.py#L11
+     * jaxnerf: https://github.com/google-research/google-research/blob/47795035fc374b9501bbf9a49a1ae05a4d3282e3/jaxnerf/nerf/datasets.py#L196
+     * nerf_sh / plenoctrees: https://github.com/sxyu/plenoctree/blob/92ee5c1e367602d08f7eda77ed331f0f515d4b6f/nerf_sh/nerf/datasets.py#L189
+     * mipnerf: https://github.com/google/mipnerf/blob/84c969e0a623edd183b75693aed72a7e7c22902d/internal/datasets.py#L311
+     * D-nerf: https://github.com/albertpumarola/D-NeRF/blob/f16319df497105b71ac151d2c2ddd4de36a1493f/load_blender.py#L70 
+     * related, Pixel-NeRF: https://github.com/sxyu/pixel-nerf/blob/a5a514224272a91e3ec590f215567032e1f1c260/src/data/MultiObjectDataset.py#L72
+     * related, NeRF-- (without poses) & COLMAP loader: https://github.com/ActiveVisionLab/nerfmm/blob/27faab66a927ea14259125e1140231f0c8f6d14c/dataloader/with_colmap.py#L119
+   * List of lots of projects: https://github.com/visonpon/New-View-Synthesis 
+  """
+
+  outdir = Path(outdir)
+
+  IMAGES_BASE_DIR = outdir / 'images'
+
+  # Select the datums to export
+  datum_rdd = sd_table.get_datum_rdd_matching(
+                  only_types=['camera_image'],
+                  only_topics=only_cameras)
+  
+  def has_rgb(stamped_datum):
+    ci = stamped_datum.camera_image
+    return ci.has_rgb()
+  datum_rdd = datum_rdd.filter(has_rgb)
+
+  if limit >= 0:
+    n_total = datum_rdd.count()
+    frac = float(limit) / max(n_total, 1)
+    datum_rdd = datum_rdd.sample(
+                  fraction=frac,
+                  withReplacement=False,
+                  seed=1337)
+
+  # Try to favor fewer, longer-lived python processes
+  from oarphpy.spark import cluster_cpu_count
+  from psegs.spark import Spark
+  with Spark.sess() as spark:
+    n_cpus = cluster_cpu_count(spark)
+  datum_rdd = datum_rdd.repartition(n_cpus).cache()
+
+  if datum_rdd.count() == 0:
+    util.log.info(f"Nothing to export for {outdir} !")
+    return
+  
+  util.log.info(f"Selected {datum_rdd.count()} input images ...")
+
+  def save_image(stamped_datum):
+    import imageio
+    ci = stamped_datum.camera_image
+    fname = (
+      str(stamped_datum.uri.topic) + "." + 
+      str(stamped_datum.uri.timestamp) + "." + img_ext)
+    dest = IMAGES_BASE_DIR / fname
+    image = ci.image.copy()
+    h, w = image.shape[:2]
+
+    frame = {}
+    frame['psegs_uri'] = str(stamped_datum.uri)
+    frame['height'] = h
+    frame['width'] = w
+    frame['psegs_rescaled'] = 1.0
+    if resize_max_h >= 0 and h > resize_max_h:
+      import cv2
+      th = int(resize_max_h)
+      tw = int((float(w) / h) * th)
+      image = cv2.resize(image, (tw, th))
+      frame['height'] = th
+      frame['width'] = tw
+      frame['psegs_rescaled'] = float(tw) / w
+    
+    imageio.imsave(dest, image)
+    
+    c2w = ci.ego_pose['ego', 'world']
+    # TODO FIXME  should be camera frame? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    
+    # w2c = ci.ego_pose['world', 'ego']
+    
+    # import numpy as np
+    # opencv2opengl =  np.array([
+    #           [ 1.,   0.,   0., 0],
+    #           [ 0.,  -1.,   0., 0],
+    #           [ 0.,   0.,  -1., 0],
+    #           [ 0.,   0.,  0, 1],
+    #         ])
+    # transform_matrix = np.linalg.inv(
+    #   opencv2opengl @ w2c.get_transformation_matrix(homogeneous=True))
+
+    transform_matrix = c2w.get_transformation_matrix(homogeneous=True)
+    frame['transform_matrix'] = transform_matrix.tolist()
+
+    if include_file_extensions_in_keys:
+      frame['file_path'] = str(dest)
+    else:
+      frame['file_path'] = str(dest).replace('.' + img_ext, '')
+        # NB: Dataset readers are supposed to append the .png suffix :S
+
+    if use_relative_paths:
+      frame['file_path'] = str(Path(frame['file_path']).relative_to(outdir))
+
+    fov_h, fov_v = ci.get_fov()
+    frame['camera_angle_x'] = fov_h
+      # NB: Readers will compute somthing like:
+      # focal = .5 * image_width / np.tan(.5 * camera_angle_x)
+
+    # Some readers accept all the intrinsics
+    # frame['camera_angle_y'] = fov_v
+    # print('hacks disable the distortion')
+    fx = ci.K[0, 0]
+    fy = ci.K[1, 1]
+    cx = ci.K[0, 2]
+    cy = ci.K[1, 2]
+    frame['fx'] = fx * frame['psegs_rescaled']
+    frame['fy'] = fy * frame['psegs_rescaled']
+    frame['cx'] = cx * frame['psegs_rescaled']
+    frame['cy'] = cy * frame['psegs_rescaled']
+    frame['w'] = ci.width
+    frame['h'] = ci.height
+
+    # if 'colmap.camera_params_raw_json' in ci.extra:
+    #   params_raw = ci.extra['colmap.camera_params_raw_json']
+      
+    #   # FMI https://github.com/colmap/colmap/blob/e180948665b03c4a12d45e2ca39a589f42fdbda6/src/base/camera_models.h#L235
+    #   if ci.extra.get('colmap.camera_model_name') in ('OPENCV', 'FULL_OPENCV'):
+    #     params = json.loads(params_raw)
+    #     k1, k2, p1, p2 = params[4:8] # FULL_OPENCV has more ...
+
+    #     frame['k1'] = k1
+    #     frame['k2'] = k2
+    #     frame['p1'] = p1
+    #     frame['p2'] = p2
+
+    return frame
+  
+  IMAGES_BASE_DIR.mkdir(parents=True, exist_ok=True)
+  frames = datum_rdd.map(save_image).collect()
+  frames = [f for f in frames if f]
+  frames = sorted(frames, key=lambda f: f['file_path'])
+  util.log.info(f"... saved {len(frames)} input images frames ...")
+
+  transforms_data = {
+    'frames': frames,
+  }
+
+  KEYS_TO_MAKE_GLOBAL = (
+    'camera_angle_x', 'camera_angle_y',
+    'cx', 'cy', 'w', 'h',
+    'k1', 'k2', 'p1', 'p2',
+  )
+  f0 = frames[0]
+  for k in KEYS_TO_MAKE_GLOBAL:
+    if k in f0:
+      transforms_data[k] = f0[k]    
+
+  for split in splits_to_write:
+    transforms_dest = outdir / f'transforms_{split}.json' 
+    with open(transforms_dest, 'w') as f:
+      json.dump(transforms_data, f, indent=2)
+
+
+
+
+def save_sample_blender_format(
+      cis,
+      outdir='/tmp/test_nerf_blender_out',
+      split='train',
+      parallel=-1):
+  """
+  Given a list of `:class:`~psegs.datum.camera_image.CameraImage` instances,
+  export the images in the "Blender format" that is compatible with most
+  NeRF research.
+
+  Args:
+    cis (List[CameraImage]): Export these camera images.
+    outdir (str): Dump all data to this directory.
+    split (str): Export this split of the dataset; the Blender format
+      accommodates 'train', 'test', and 'val'.
+    parallel (int): Use this many export workers (default to one per vcpu
+      if negative)
+
+  References:
+   * Original NeRF Blender files: https://github.com/bmild/nerf/issues/59
+   * Original NeRF Blender *dataset* transforms: https://drive.google.com/drive/folders/1LEDmMJ-rFRhl8CJKnLBeTePDBsmJJ8OP
+   * Code that reads Blender data:
+     * Original NeRF: https://github.com/bmild/nerf/blob/20a91e764a28816ee2234fcadb73bd59a613a44c/load_blender.py#L41
+     * nerf_pl (pytorch lightning): https://github.com/kwea123/nerf_pl/blob/f4a072bc0dc49d2703d2a47da808432d228622e0/datasets/blender.py#L11
+     * jaxnerf: https://github.com/google-research/google-research/blob/47795035fc374b9501bbf9a49a1ae05a4d3282e3/jaxnerf/nerf/datasets.py#L196
+     * nerf_sh / plenoctrees: https://github.com/sxyu/plenoctree/blob/92ee5c1e367602d08f7eda77ed331f0f515d4b6f/nerf_sh/nerf/datasets.py#L189
+     * mipnerf: https://github.com/google/mipnerf/blob/84c969e0a623edd183b75693aed72a7e7c22902d/internal/datasets.py#L311
+     * D-nerf: https://github.com/albertpumarola/D-NeRF/blob/f16319df497105b71ac151d2c2ddd4de36a1493f/load_blender.py#L70 
+     * related, Pixel-NeRF: https://github.com/sxyu/pixel-nerf/blob/a5a514224272a91e3ec590f215567032e1f1c260/src/data/MultiObjectDataset.py#L72
+     * related, NeRF-- (without poses) & COLMAP loader: https://github.com/ActiveVisionLab/nerfmm/blob/27faab66a927ea14259125e1140231f0c8f6d14c/dataloader/with_colmap.py#L119
+   * List of lots of projects: https://github.com/visonpon/New-View-Synthesis 
+  """
+
+  import json
+  import imageio
+  from oarphpy import util as oputil
+  from oarphpy import spark as S
+
+  from psegs.spark import Spark
+
+
+  assert split in ('train', 'test', 'val')
+
+  util.log.info("Exporting %s images to Blender format ..." % len(cis))
+
+  img_dir_out = os.path.join(outdir, split)
+  oputil.mkdir(str(img_dir_out))
+
+  
+  class SaveAndGetFrame(object):
+    def __call__(self):
+      i = self.i
+      ci = self.ci
+      img_dir_out = self.img_dir_out
+
+      import imageio
+
+      dest = os.path.join(img_dir_out, 'r_%s.png' % str(i).zfill(6))
+      img = ci.image
+      imageio.imwrite(dest, img) 
+        # NOTE: some nerfs think this image is in [0, 1] ???
+      
+      c2w = ci.ego_pose['ego', 'world']
+      # TODO FIXME  should be camera frame? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      transform_matrix = c2w.get_transformation_matrix(homogeneous=True)
+      frame = {
+        'transform_matrix': transform_matrix.tolist(),
+        'file_path': dest.replace('.png', ''),
+          # NB: Dataset readers are supposed to append the .png suffix :S
+        # 'rotation': ??? don't know what this is but it's not read?
+      }
+      return frame
+
+  camera_angle_x = None
+  K = None
+  cis = sorted(cis, key=lambda ci: ci.timestamp)
+  callables = []
+  for ci in cis:
+
+    if camera_angle_x is None:
+      fov_h, fov_v = ci.get_fov()
+      camera_angle_x = fov_h
+        # NB: Readers will compute somthing like:
+        # focal = .5 * image_width / np.tan(.5 * camera_angle_x)
+      
+      K = ci.K
+    
+    c = SaveAndGetFrame()
+    c.i = int(ci.extra['threeDScannerApp.frame_id'])
+    c.ci = ci
+    c.img_dir_out = img_dir_out
+    callables.append(c)
+
+  with Spark.sess() as spark:
+    results = S.run_callables(spark, callables, parallel=parallel)
+    frames = [f for obj, f in results]
+
+  transforms_data = {
+    'camera_angle_x': camera_angle_x,
+    'frames': frames,
+  }
+
+  transforms_dest = os.path.join(outdir, 'transforms_%s.json' % split)
+  with open(transforms_dest, 'w') as f:
+    json.dump(transforms_data, f, indent=2)
+
+  full_intrinsic = os.path.join(outdir, 'full_intrinsic.json')
+  with open(full_intrinsic, 'w') as f:
+    json.dump(K.tolist(), f, indent=2)
+
+  util.log.info("... done writing to %s ." % outdir)
+
+
+
+if __name__ == '__main__':
+
+
+  from psegs.datasets import ios_lidar
+
+  # base_dir = '/outer_root/home/au/lidarphone_scans/2021_06_27_12_37_38'
+  base_dir = '/outer_root/media/970-evo-plus-raid0/lidarphone_lidar_scans/2021_10_31_20_24_07/'
+  # base_dir = '/outer_root/home/au/lidarphone_scans/landscape_home_button_right_07_09_49'
+
+  from oarphpy import util as oputil
+  jpg_paths = oputil.all_files_recursive(base_dir, pattern='frame*.jpg')
+  json_paths = [p.replace('jpg', 'json') for p in jpg_paths]
+  json_paths = sorted(json_paths)
+  cis = [ios_lidar.threeDScannerApp_create_camera_image(p) for p in json_paths]
+
+  print(len(cis))
+  cis[0]
+
+  save_sample_blender_format(
+    cis,
+    '/outer_root/home/pwais/bundle-adjusting-NeRF/data/blender/soma-pizza-mural')
+
+
diff --git a/psegs/export/nerfstudio.py b/psegs/export/nerfstudio.py
new file mode 100644
index 0000000..0f1f691
--- /dev/null
+++ b/psegs/export/nerfstudio.py
@@ -0,0 +1,379 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+from pathlib import Path
+
+from tqdm.auto import tqdm
+import numpy as np
+
+from psegs import util
+
+def _save_image(
+      stamped_datum,
+      outdir,
+      images_outdir,
+      downscales=(2, 4, 8),
+      resize_max_h=-1,
+      img_ext='png'):
+    
+  import imageio
+  import cv2
+  import numpy as np
+  
+  ci = stamped_datum.camera_image
+  frame = {}
+  
+  # Maybe rescale, and record dimensions
+  image = ci.image.copy()
+  h, w = image.shape[:2]
+  th, tw = h, w
+  frame['psegs_rescaled'] = 1.0
+  if resize_max_h >= 0 and h > resize_max_h:
+    th = int(resize_max_h)
+    tw = int((float(w) / h) * th)
+    image = cv2.resize(image, (tw, th), interpolation=cv2.INTER_CUBIC)
+    frame['psegs_rescaled'] = float(tw) / w
+  frame['height'] = th
+  frame['h'] = th
+  frame['width'] = tw
+  frame['w'] = tw
+  
+  # Write the image output
+  fname = f"{stamped_datum.uri.topic}.{stamped_datum.uri.timestamp}.{img_ext}"
+  dest = images_outdir / fname
+  frame['psegs_uri'] = str(stamped_datum.uri)
+  frame['psegs_fpath'] = str(dest)
+  imageio.imwrite(dest, image)
+
+  # Do downscales
+  downscales = downscales or []
+  for dfactor in downscales:
+    d_sz = (int(tw / dfactor), int(th / dfactor))
+    image_downscaled = cv2.resize(image, d_sz, interpolation=cv2.INTER_CUBIC)
+
+    i_base_dir = outdir / f'images_{dfactor}'
+    i_dest = i_base_dir / fname
+    
+    imageio.imwrite(i_dest, image_downscaled)
+    frame[f'psegs_downscales_{dfactor}_fpath'] = str(i_dest)
+
+  # prev version
+  T_c2w = ci.get_world_to_sensor()#.get_inverse() DEBUG THIS
+  c2w = T_c2w.get_transformation_matrix(homogeneous=True)
+  
+  # TODO: check if input cameras were actually in opencv frame!
+  OPENCV_2_OPENGL = np.diag([1, -1, -1, 1])
+  c2w = c2w @ OPENCV_2_OPENGL
+
+  frame['transform_matrix'] = c2w.tolist()
+  frame['file_path'] = str(dest.relative_to(outdir))
+
+  fx = ci.K[0, 0]
+  fy = ci.K[1, 1]
+  cx = ci.K[0, 2]
+  cy = ci.K[1, 2]
+  frame['fl_x'] = fx * frame['psegs_rescaled']
+  frame['fl_y'] = fy * frame['psegs_rescaled']
+  frame['cx'] = cx * frame['psegs_rescaled']
+  frame['cy'] = cy * frame['psegs_rescaled']
+
+  if 'colmap.camera_params_raw_json' in ci.extra:
+    params_raw = ci.extra['colmap.camera_params_raw_json']
+    
+    # FMI https://github.com/colmap/colmap/blob/e180948665b03c4a12d45e2ca39a589f42fdbda6/src/base/camera_models.h#L235
+    if ci.extra.get('colmap.camera_model_name') in ('OPENCV', 'FULL_OPENCV'):
+      params = json.loads(params_raw)
+      k1, k2, p1, p2 = params[4:8] # FULL_OPENCV has more ...
+
+      frame['camera_model'] = 'OPENCV' # TODO support FULL_OPENCV
+      frame['k1'] = k1
+      frame['k2'] = k2
+      frame['p1'] = p1
+      frame['p2'] = p2
+
+  return frame
+
+def _save_depth_image(
+      stamped_datum,
+      frame,
+      outdir,
+      depth_outdir,
+      downscales=(2, 4, 8),
+      resize_max_h=-1,
+      mm_depth_type='uint16',
+      mm_depth_scale=1000.):
+  
+  import cv2
+
+  dci = stamped_datum.camera_image
+
+  depth = dci.get_depth()
+  depth = mm_depth_scale * depth
+  depth = depth.astype(mm_depth_type)
+
+  # Maybe rescale, and record dimensions
+  h, w = depth.shape[:2]
+  th, tw = h, w
+  frame['psegs_depth_rescaled'] = 1.0
+  if resize_max_h >= 0 and h > resize_max_h:
+    th = int(resize_max_h)
+    tw = int((float(w) / h) * th)
+    depth = cv2.resize(depth, (tw, th), interpolation=cv2.INTER_NEAREST)
+    frame['psegs_depth_rescaled'] = float(tw) / w
+  frame['height'] = th
+  frame['h'] = th
+  frame['width'] = tw
+  frame['w'] = tw
+    
+  # Write the image output
+  fname = f"{stamped_datum.uri.topic}.{stamped_datum.uri.timestamp}.png"
+  dest = depth_outdir / fname
+  
+  cv2.imwrite(str(dest), depth)
+  frame['psegs_depth_uri'] = str(stamped_datum.uri)
+  frame['psegs_depth_fpath'] = str(dest)
+  frame['depth_file_path'] = str(dest.relative_to(outdir))
+
+  # Do downscales
+  downscales = downscales or []
+  for dfactor in downscales:
+    d_sz = (int(tw / dfactor), int(th / dfactor))
+    depth_downscaled = cv2.resize(depth, d_sz, interpolation=cv2.INTER_NEAREST)
+
+    d_base_dir = outdir / f'depths_{dfactor}'
+    d_dest = d_base_dir / fname
+    cv2.imwrite(str(d_dest), depth_downscaled)
+    frame[f'psegs_depth_downscales_{dfactor}_fpath'] = str(d_dest)
+
+  return frame
+
+def export_sdt_to_nerfstudio_format(
+      sd_table,
+      outdir,
+      downscales=(2, 4, 8),
+      resize_max_h=-1,
+      img_ext='png',
+      only_cameras=None,
+      include_mm_depth=True,
+      mm_depth_type='uint16',
+      mm_depth_scale=1000.,
+      include_world_pointclouds=True,
+      only_pc_topics=None,
+      limit=-1):
+  """
+  Given a `:class:`~psegs.table.StampedDatumTable` instance, export the 
+  `CameraImage` images (and other metadata) to `outdir` in Nerfstudio format:
+   * nerfstudio.data.dataparsers.nerfstudio_dataparser.Nerfstudio
+
+  Args:
+    sd_table (StampedDatumTable): Export images from this table.
+    outdir (Path or str): Dump all data to this directory.
+    downscales (List[int]): Also export downsized copies of images, downscaled
+      by these factors.
+    resize_max_h (int): Resize input images to have this maximum
+      height in pixels.
+    img_ext (str): Save images in this format.
+    only_cameras (List[str]): Only export these camera topics.
+    include_mm_depth (bool): Include mm (millimeter) depth images.
+    mm_depth_type (str): Encode mm depth into integer values of this type.
+    mm_depth_scale (float): Scale depth channel to millimeters using
+      this factor.
+    include_world_pointclouds (bool): Include PointClouds in world frame
+      as a single PLY file (nerfstudio spec).
+    only_pc_topics (List[str]): Only export these point cloud topics.
+    limit (int): Sample this number of frames uniformly.
+
+  """
+
+  outdir = Path(outdir)
+
+  images_outdir = outdir / 'images'
+  downscales = downscales or []
+
+  # Select the datums to export
+  datum_rdd = sd_table.get_datum_rdd_matching(
+                  only_types=['camera_image'],
+                  only_topics=only_cameras)
+  
+  def has_rgb(stamped_datum):
+    ci = stamped_datum.camera_image
+    return ci.has_rgb()
+  datum_rdd = datum_rdd.filter(has_rgb)
+
+  if limit >= 0:
+    n_total = datum_rdd.count()
+    frac = float(limit) / max(n_total, 1)
+    datum_rdd = datum_rdd.sample(
+                  fraction=frac,
+                  withReplacement=False,
+                  seed=1337)
+
+  # Try to favor fewer, longer-lived python processes
+  from oarphpy.spark import cluster_cpu_count
+  from psegs.spark import Spark
+  with Spark.sess() as spark:
+    n_cpus = cluster_cpu_count(spark)
+  datum_rdd = datum_rdd.repartition(n_cpus).cache()
+
+  if datum_rdd.count() == 0:
+    util.log.info(f"Nothing to export for {outdir} !")
+    return
+  
+  util.log.info(f"Selected {datum_rdd.count()} input images ...")
+
+  images_outdir.mkdir(parents=True, exist_ok=True)
+  for dfactor in downscales:
+    i_base_dir = outdir / f'images_{dfactor}'
+    i_base_dir.mkdir(parents=True, exist_ok=True)
+
+  save_ci = lambda sd: _save_image(
+                          sd,
+                          outdir,
+                          images_outdir,
+                          downscales,
+                          resize_max_h,
+                          img_ext)
+  frames = datum_rdd.map(save_ci).collect()
+  frames = [f for f in frames if f]
+  util.log.info(f"... saved {len(frames)} input images frames ...")
+
+  if include_mm_depth:
+    # Select the depth images to export
+    psegs_uri_to_frame = dict((f['psegs_uri'], f) for f in frames)
+    datum_dci_rdd = sd_table.get_datum_rdd_matching(
+                      only_types=['camera_image'],
+                      only_topics=only_cameras)
+    
+    def to_sd_frame(stamped_datum):
+      dci = stamped_datum.camera_image
+      if dci.has_depth() and 'psegs.depth.rgb_uri' in dci.extra:
+        rgb_uri = dci.extra['psegs.depth.rgb_uri']
+        frame = psegs_uri_to_frame[rgb_uri]
+        return (stamped_datum, frame)
+      
+      if dci.has_depth() and 'psegs.depth.rgb_uri' not in dci.extra:
+        util.log.warning(
+          f"WARN: Found depth image that has no `psegs.depth.rgb_uri` key: "
+          f"{str(stamped_datum.uri)}")
+      return None
+
+    sd_frame_rdd = datum_dci_rdd.map(to_sd_frame)
+    sd_frame_rdd = sd_frame_rdd.filter(lambda v: v is not None).cache()
+    util.log.info(f"Selected {sd_frame_rdd.count()} depth images ...")
+
+    depth_outdir = outdir / 'depth'
+    depth_outdir.mkdir(parents=True, exist_ok=True)
+    for dfactor in downscales:
+      d_base_dir = outdir / f'depths_{dfactor}'
+      d_base_dir.mkdir(parents=True, exist_ok=True)
+
+    save_dci = lambda sd_f: _save_depth_image(
+                                      sd_f[0],
+                                      sd_f[1],
+                                      outdir,
+                                      depth_outdir,
+                                      downscales,
+                                      resize_max_h,
+                                      mm_depth_type,
+                                      mm_depth_scale)
+    dframes = sd_frame_rdd.map(save_dci).collect()
+      # NB: this implicity overwrites `frames` to only include frames that
+      # have *both* RGB and Depth.  Nerfstudio wants this 1-to-1 parity at
+      # time of writing.
+    if dframes:
+      frames = [f for f in dframes if f]
+      util.log.info(f"... saved depth frames, now have {len(frames)} frames ...")
+    else:
+      util.log.info(f"... no depth frames, skipping! ...")
+
+  ns_ply_file_path = '' # Actually fname
+  if include_world_pointclouds:
+    import open3d as o3d
+
+    util.log.info("Exporting world cloud to PLY ...")
+
+    datum_pc_rdd = sd_table.get_datum_rdd_matching(
+                    only_types=['point_cloud'],
+                    only_topics=only_pc_topics)
+    xyzrgbs = []
+    iter_pc_sds = tqdm(datum_pc_rdd.collect(), desc="Export PC single PLY")
+    for pc_sd in iter_pc_sds:
+      pc = pc_sd.point_cloud
+      if pc is None:
+        continue
+
+      # TODO: make sure in world frame
+      xyzrgbs.append(pc.get_xyzrgb(default_color=(0, 0, 0)))
+    
+    ply_dest = outdir / 'psegs_world_point_cloud.ply'
+    xyzrgb = np.concatenate(xyzrgbs)
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(xyzrgb[:, :3])
+    pcd.colors = o3d.utility.Vector3dVector(xyzrgb[:, 3:] / 256.)
+    o3d.io.write_point_cloud(str(ply_dest), pcd)
+    ns_ply_file_path = str(ply_dest.name)
+    
+    util.log.info(f"... exported world cloud to {ply_dest} .")
+
+  frames = sorted(frames, key=lambda f: f['file_path'])
+  transforms_data = {
+    'frames': frames,
+  }
+
+  KEYS_TO_MAKE_GLOBAL = (
+    'fl_x', 'fl_y', 'cx', 'cy',
+    'w', 'h',
+    'camera_model',
+    'k1', 'k2', 'p1', 'p2',
+  )
+  if frames:
+    f0 = frames[0]
+    for k in KEYS_TO_MAKE_GLOBAL:
+      if k in f0:
+        transforms_data[k] = f0[k]
+
+  if ns_ply_file_path:
+    transforms_data['ply_file_path'] = ns_ply_file_path
+
+  transforms_dest = outdir / f'transforms.json' 
+  with open(transforms_dest, 'w') as f:
+    json.dump(transforms_data, f, indent=2)
+  util.log.info(f"... saved {transforms_dest} .")
+
+
+if __name__ == '__main__':
+  # TODO: need to port these datas ...
+  # from psegs.table.sd_table_factory import ParquetSDTFactory
+  # F = ParquetSDTFactory.factory_for_sd_subdirs(
+  #   '/outer_root/media/mai-tank/hloc_out/pwais.private.canepa-speedster/psegs/')
+  # T = F.create_as_single_table()
+  # export_sdt_to_nerfstudio_format(
+  #   T,
+  #   '/outer_root/media/mai-tank/ns-test-root')
+  
+  # import faulthandler
+  # faulthandler.enable()
+
+  spath = Path('/outer_root/media/mai-tank/hloc_out_mrskylake/pwais.private.moms-strawberrys-comp')
+
+  from psegs.datasets.colmap import COLMAP_SDTFactory
+  sdt = COLMAP_SDTFactory.create_sd_table_for_reconstruction(
+              spath/'sfm_out/sfm_superpoint+superglue/',
+              spath/'images/',
+              '/outer_root/media/mai-tank/ns-test-root/tastpsegs-w-depth')
+  export_sdt_to_nerfstudio_format(
+    sdt,
+    '/outer_root/media/mai-tank/ns-test-root/w-depth')
\ No newline at end of file
diff --git a/psegs/export/ros.py b/psegs/export/ros.py
new file mode 100644
index 0000000..c4e04fe
--- /dev/null
+++ b/psegs/export/ros.py
@@ -0,0 +1,427 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+# import tf
+# import os
+# import cv2
+import rospy
+# import rosbag
+# import progressbar
+
+# from datetime import datetime
+# from std_msgs.msg import Header
+# from sensor_msgs.msg import CameraInfo, Imu, PointField, NavSatFix
+# import sensor_msgs.point_cloud2 as pcl2
+# from geometry_msgs.msg import TransformStamped, TwistStamped, Transform
+# from cv_bridge import CvBridge
+# import numpy as np
+# import argparse
+
+# https://github.com/tomas789/kitti2bag/blob/master/kitti2bag/kitti2bag.py
+
+
+
+
+
+###############################################################################
+## PSegs -> ROS: Utils
+
+def nanostamp_to_rostime(nanostamp):
+  # For a good time https://github.com/pgao/roscpp_core/commit/dffa31afe8d7f1268a3fa227408aeb6e04a28b87#diff-65b9485bd6b5d3fb4b7a84cd975c3967L157
+  return rospy.Time(
+            secs=int(nanostamp / 1000000000),
+            nsecs=int(nanostamp % 1000000000))
+
+
+def to_ros_frame(s):
+  s = s.replace('|', '_')
+  s = s.replace('/', '_')
+  return s
+
+
+def to_ros_topic(s):
+  s = s.replace('|', '_')
+  return '/' + s
+
+
+def to_ros_arr(arr):
+  return arr.flatten(order='C').tolist()
+
+
+###############################################################################
+## PSegs -> ROS: datum conversion
+
+def transform_to_ros(xform, nanostamp=None):
+  import tf
+  from tf2_msgs.msg import TFMessage
+  from geometry_msgs.msg import Transform
+  from geometry_msgs.msg import TransformStamped
+
+  tf_msg = TFMessage()
+  tf_transform = TransformStamped()
+  if nanostamp is not None:
+    tf_transform.header.stamp = nanostamp_to_rostime(nanostamp)
+  
+  tf_transform.header.frame_id = to_ros_frame(xform.src_frame)
+  tf_transform.child_frame_id = to_ros_frame(xform.dest_frame)
+
+  transform = Transform()
+  r_4x4 = np.ones((4, 4))
+  r_4x4[:3, :3] = xform.rotation
+  q = tf.transformations.quaternion_from_matrix(r_4x4)
+  transform.rotation.x = q[0]
+  transform.rotation.y = q[1]
+  transform.rotation.z = q[2]
+  transform.rotation.w = q[3]
+
+  transform.translation.x = xform.translation[0]
+  transform.translation.y = xform.translation[1]
+  transform.translation.z = xform.translation[2]
+
+  tf_transform.transform = transform
+  tf_msg.transforms.append(tf_transform)
+  return tf_msg
+
+
+def ci_to_ros_camera_info(ci):
+  from sensor_msgs.msg import CameraInfo
+  info = CameraInfo()
+  info.header.frame_id = to_ros_frame(ci.sensor_name)
+  info.header.stamp = nanostamp_to_rostime(ci.timestamp)
+  info.width = ci.width
+  info.height = ci.height
+  info.distortion_model = 'plumb_bob'
+
+  info.K = to_ros_arr(ci.K)
+  P = np.zeros((3, 4))
+  P[:3, :3] = ci.K
+  info.P = to_ros_arr(P)
+
+  return info
+
+
+def ci_to_ros_image(ci):
+  import cv2
+  from cv_bridge import CvBridge
+  bridge = CvBridge()
+
+  img_arr = np.asarray(ci.image_png, dtype=np.uint8)
+  cv_img = cv2.imdecode(img_arr, cv2.IMREAD_UNCHANGED)
+  ros_img_msg = bridge.cv2_to_imgmsg(cv_img, encoding='bgr8')
+  ros_img_msg.header.frame_id = to_ros_frame(ci.sensor_name)
+  ros_img_msg.header.stamp = nanostamp_to_rostime(ci.timestamp)
+  
+  return ros_img_msg
+
+
+def pc_to_ros_pcl(pc):
+  from sensor_msgs.msg import PointField
+  from std_msgs.msg import Header
+  import sensor_msgs.point_cloud2 as pcl2
+  
+  header = Header()
+  header.frame_id = 'ego' # fixme? ~~~~~~~~~~~~~~~~~~~~~~~~` to_ros_frame(pc.sensor_name)
+  header.stamp = nanostamp_to_rostime(pc.timestamp)
+
+  cloud = pc.get_cloud()
+  xyz = cloud.astype(np.float32)
+  assert xyz.shape[-1] == 3
+
+  from psegs.util.plotting import rgb_for_distance
+  colors = rgb_for_distance(np.linalg.norm(xyz, axis=1))
+
+  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # https://gist.github.com/lucasw/ea04dcd65bc944daea07612314d114bb#file-create_cloud_xyzrgb-py-L28
+  # https://github.com/cruise-automation/webviz/blob/2e7db3aafffec39b541728668c97ce7d83eee007/packages/webviz-core/src/panels/ThreeDimensionalViz/commands/Pointclouds/PointCloudBuilder.js#L117
+  colors = colors.astype(int)
+  colors_uint32 = (
+    (2**16) * colors[:, 0] + (2**8) * colors[:, 1] + 1 * colors[:, 2])
+
+
+  points = [pt + [c] for pt, c in zip(xyz.tolist(), colors_uint32)]
+
+  # import pdb; pdb.set_trace()
+
+  # points = np.hstack([xyz, colors_uint32[:, np.newaxis]])
+  # import pdb; pdb.set_trace()
+
+  fields = [PointField('x', 0, PointField.FLOAT32, 1),
+            PointField('y', 4, PointField.FLOAT32, 1),
+            PointField('z', 8, PointField.FLOAT32, 1),
+            PointField('rgb', 12, PointField.UINT32, 1)]
+
+  pcl_msg = pcl2.create_cloud(header, fields, points)
+
+  return pcl_msg
+
+
+def color_to_ros(color):
+  """color in [0, 1] -> ROS color"""
+  from std_msgs.msg import ColorRGBA
+  r, g, b = np.clip(color, 0, 1).tolist()
+  ros_color = ColorRGBA()
+  ros_color.r = r
+  ros_color.g = g
+  ros_color.b = b
+  ros_color.a = 1.
+  return ros_color
+
+
+def _box_face_marker(face_pts, color):
+  from geometry_msgs.msg import Point
+  from visualization_msgs.msg import Marker
+  
+  m = Marker()
+  m.type = Marker.LINE_STRIP  # each point in points is part of the line
+  m.action = Marker.MODIFY    # or add
+  m.color = color
+  m.scale.x = 0.1
+  for i in range(5):
+    p = Point()
+    p.x, p.y, p.z = face_pts[i % 4, :].tolist()
+    m.points.append(p)
+  return m
+
+
+def _box_sides_marker(front_pts, back_pts, color):
+  from geometry_msgs.msg import Point
+  from visualization_msgs.msg import Marker
+
+  m = Marker()
+  m.type = Marker.LINE_LIST # pairs of points create a line
+  m.action = Marker.MODIFY  # or add
+  m.color = color
+  m.scale.x = 0.1
+  for start, end in zip(front_pts.tolist(), back_pts.tolist()):
+    startp = Point()
+    startp.x, startp.y, startp.z = start
+    endp = Point()
+    endp.x, endp.y, endp.z = end
+    m.points += [startp, endp]
+  return m
+
+
+def cuboids_to_ros_marker_array(cuboids):
+  
+  from visualization_msgs.msg import MarkerArray
+  marray = MarkerArray()
+  
+  # We'll use the Line List and Line Strip Markers instead of the Cube marker
+  # so that we can highlight the front face of the cuboid.
+  for obj_id, cuboid in enumerate(cuboids):
+    from std_msgs.msg import Header
+    header = Header()
+    header.frame_id = 'ego' # fixme? ~~~~~~~~~~~~~~~~~~~~~~~~` to_ros_frame(pc.sensor_name)
+    header.stamp = nanostamp_to_rostime(cuboid.timestamp)
+
+    from oarphpy.plotting import hash_to_rbg
+    base_color = np.array(hash_to_rbg(cuboid.category_name)) / 255.
+    front_color = color_to_ros(base_color + 0.3)
+    back_color = color_to_ros(base_color - 0.3)
+    sides_color = color_to_ros(base_color)
+
+    box_xyz = cuboid.get_box3d()
+    front = box_xyz[:4, :]
+    back = box_xyz[4:, :]
+
+    box_markers = [
+      _box_face_marker(front, front_color),
+      _box_sides_marker(front, back, sides_color),
+      _box_face_marker(back, back_color),
+    ]
+    for mid, m in enumerate(box_markers):
+      m.id = obj_id * 10 + mid
+      m.ns = cuboid.track_id
+      m.header = header
+    
+    marray.markers += box_markers
+  
+  return marray
+
+
+###############################################################################
+## PSegs -> ROS: RDD[StampedDatum] conversion
+
+import attr
+
+@attr.s(slots=True)
+class ROSMsg(object):
+  topic = attr.ib(default='')
+  timestamp = attr.ib(default=0)
+  msg = attr.ib(default=None)
+
+  @classmethod
+  def iter_rosmsgs_from_datum(cls, sd):
+    msg_t = nanostamp_to_rostime(sd.uri.timestamp)
+
+    transforms = []
+    if sd.camera_image:
+      namespace = to_ros_topic(sd.uri.topic)
+      yield ROSMsg(
+        timestamp=msg_t,
+        topic=namespace + '/image',
+        msg=ci_to_ros_image(sd.camera_image))
+      yield ROSMsg(
+        timestamp=msg_t,
+        topic=namespace + '/camera_info',
+        msg=ci_to_ros_camera_info(sd.camera_image))
+
+      transforms += [sd.camera_image.ego_pose, sd.camera_image.ego_to_sensor]
+
+    elif sd.point_cloud:
+      yield ROSMsg(
+        timestamp=msg_t,
+        topic=to_ros_topic(sd.uri.topic),
+        msg=pc_to_ros_pcl(sd.point_cloud))
+      
+      transforms += [sd.point_cloud.ego_pose, sd.point_cloud.ego_to_sensor]
+
+    elif sd.cuboids:
+      yield ROSMsg(
+        timestamp=msg_t,
+        topic=to_ros_topic(sd.uri.topic),
+        msg=cuboids_to_ros_marker_array(sd.cuboids))
+      transforms += [c.ego_pose for c in sd.cuboids]
+
+    elif sd.transform:
+      transforms += [sd.transform]
+    
+    for transform in transforms:
+      yield ROSMsg(
+        timestamp=msg_t,
+        topic='/tf',
+        msg=transform_to_ros(transform))
+
+
+def segment_to_bag(spark, sd_table, segment_id, dest):
+  sd_rdd = sd_table.get_segment_datum_rdd(
+                  spark, segment_id, time_ordered=True) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # rosmsg_rdd = sd_rdd.flatMap(ROSMsg.iter_rosmsgs_from_datum)
+
+  # from oarphpy import util as oputil
+  # thru = oputil.ThruputObserver(name='seg_to_bag')
+
+  import rosbag
+  bag = rosbag.Bag(dest, 'w', compression='lz4')
+  n = 0
+  for sd in sd_rdd.toLocalIterator():
+    for rosmsg in ROSMsg.iter_rosmsgs_from_datum(sd):
+      bag.write(rosmsg.topic, rosmsg.msg, t=rosmsg.timestamp)
+      n += 1
+      if n % 100 == 0:
+        print(bag)
+  print('done')
+  print(bag)
+  bag.close()
+
+
+class DynamicPubNode(object):
+  def __init__(self):
+    self._topic_to_pub = {}
+    rospy.init_node('dynamicpubnode')
+
+  def publish(self, topic, msg):
+    if topic not in self._topic_to_pub:
+      pub = rospy.Publisher(topic, type(msg), queue_size=0)
+      self._topic_to_pub[topic] = pub
+    pub = self._topic_to_pub[topic]
+    pub.publish(msg)
+
+
+def publish_segment(spark, sd_table, segment_id):
+  sd_rdd = sd_table.get_segment_datum_rdd(
+                  spark, segment_id, time_ordered=True)
+  
+  ros_node = DynamicPubNode()
+
+  # should_play = [True]
+  # import keyboard
+  # def toggle():
+  #   print('toggle')
+  #   should_play[0] = not should_play[0]
+  # keyboard.add_hotkey('space', toggle)
+  
+  n = 0
+  for sd in sd_rdd.toLocalIterator():
+    for rosmsg in ROSMsg.iter_rosmsgs_from_datum(sd):
+      # if not should_play[0]:
+      import time
+      time.sleep(.1)
+        # print('waited')
+      ros_node.publish(rosmsg.topic, rosmsg.msg)
+      n += 1
+      if n % 100 == 0:
+        print('published %s' % n)
+
+
+if __name__ == '__main__':
+  import copy
+  print('moof')
+
+  import numpy as np
+  ds = np.array([10., 12., 15., 17., 20., 25., 34.])
+  print(ds)
+  from psegs.util.plotting import rgb_for_distance
+  print(rgb_for_distance(ds))
+
+
+  from psegs.datum import stamped_datum as sd
+
+  info = ci_to_ros_camera_info(sd.CAMERAIMAGE_PROTO)
+
+  ros_tf = transform_to_ros(sd.TRANSFORM_PROTO)
+
+  ci = copy.deepcopy(sd.CAMERAIMAGE_PROTO)
+  ci.image_png = bytearray(open('/outer_root/home/au/psegs/yay.png', 'rb').read())
+  image_msg = ci_to_ros_image(ci)
+
+  pc = copy.deepcopy(sd.POINTCLOUD_PROTO)
+  pc.cloud = np.random.rand(10, 3)
+  pcl_msg = pc_to_ros_pcl(pc)
+
+  cuboids = [copy.deepcopy(sd.CUBOID_PROTO)] * 4
+  for c in cuboids:
+    c.obj_from_ego.src_frame = 'ego'
+    c.obj_from_ego.dest_frame = 'obj'
+  cube_markers = cuboids_to_ros_marker_array(cuboids)
+
+
+  # ros_node = DynamicPubNode()
+  # ros_node.publish('camera', image_msg)
+  # ros_node.publish('pc', pcl_msg)
+  # ros_node.publish('cube', cube_markers)
+  # import sys
+  # sys.exit()
+
+
+  from psegs.spark import Spark
+  from psegs.datasets import kitti
+  with Spark.sess() as spark:
+    # segment_to_bag(
+    #   spark,
+    #   kitti.KITTISDTable,
+    #   'train-0009',
+    #   '/outer_root/home/au/psegs/testbag.bag')
+  
+    publish_segment(
+      spark,
+      kitti.KITTISDTable,
+      'train-0009')
+
+
+  # import pdb; pdb.set_trace()
+  print()
diff --git a/psegs/render/__init__.py b/psegs/render/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/psegs/render/mesh2rgbd.py b/psegs/render/mesh2rgbd.py
new file mode 100644
index 0000000..5fb4905
--- /dev/null
+++ b/psegs/render/mesh2rgbd.py
@@ -0,0 +1,703 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+"""
+
+docker build -t psegs-pt3d -f docker/Dockerfile.pt3d .
+nvidia-docker run -d -it --name=psegs-pt3d -v `pwd`:/opt/psegs:z -w /opt/psegs -v/:/outer_root --net=host psegs-pt3d sleep infinity
+
+
+
+
+
+pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
+
+!pip3 install pytorch3d==0.6.0 -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu102_pyt190/download.html
+"""
+
+def pytorch3d_iter_mesh2depth_for_camera_images(
+        cis,
+        mesh_path='',
+        batch_size=-1):
+
+  from oarphpy import util as oputil
+
+  import torch
+
+  # Data structures and functions for rendering
+  from pytorch3d.structures import Meshes
+  from pytorch3d.io import load_obj
+  from pytorch3d.renderer import (
+      PointLights, 
+      RasterizationSettings, 
+      MeshRenderer, 
+      MeshRasterizer,  
+      HardPhongShader,
+      TexturesVertex,
+      FoVPerspectiveCameras,
+      BlendParams,
+      PointLights,
+      TexturesVertex,
+  )
+
+
+  if not cis:
+    return
+  
+  if batch_size < 0:
+    # TODO estimate based upon image size
+    batch_size = 10
+  
+  
+  if torch.cuda.is_available():
+      device = torch.device("cuda:0")
+      torch.cuda.set_device(device)
+  else:
+      device = torch.device("cpu")
+
+  torch.set_grad_enabled(False) # does this make faster?
+
+  verts, faces_idx, _ = load_obj(mesh_path)
+  faces = faces_idx.verts_idx
+  print('verts', verts.shape)
+  print('faces', faces.shape)
+
+  # nverts = verts.numpy()
+  # nfaces = faces.numpy()
+
+  # Initialize each vertex to be white in color.
+  verts_rgb = .9 * torch.ones_like(verts)[None]  # (1, V, 3)
+
+  # Create a Meshes object for the teapot. Here we have only one mesh in the batch.
+  verts = verts.to(device).tile((batch_size, 1, 1))
+  faces = faces.to(device).tile((batch_size, 1, 1))
+  verts_rgb = verts_rgb.to(device).tile((batch_size, 1, 1))
+  mesh = Meshes(
+      verts=verts,
+      faces=faces,
+      textures=TexturesVertex(verts_features=verts_rgb))
+  print('mesh', mesh)
+
+  import torch
+  K = torch.zeros(batch_size, 4, 4, dtype=torch.float32, device=device)
+  R = torch.zeros(batch_size, 3, 3, dtype=torch.float32, device=device)
+  T = torch.zeros(batch_size, 3, dtype=torch.float32, device=device)
+  image_size = torch.zeros(batch_size, 2, dtype=torch.float32, device=device)
+
+
+  fov_y = None
+  rasterizer_image_size = None
+
+  # # https://github.com/facebookresearch/pytorch3d/issues/522
+  # from pytorch3d.utils.camera_conversions import cameras_from_opencv_projection
+  # cameras = cameras_from_opencv_projection(R, T, tK, image_size, device=device).cuda()
+
+
+  # # hack up cameras_from_opencv_projection
+  tvec = T
+  R_pytorch3d = R.clone().permute(0, 2, 1)
+  T_pytorch3d = tvec.clone()
+  R_pytorch3d[:, :, :2] *= -1
+  T_pytorch3d[:, :2] *= -1
+
+  cameras = None
+  # cameras = FoVPerspectiveCameras(
+  #             device=device,
+  #             fov=fov_y,
+  #             degrees=False,
+  #             R=R_pytorch3d,
+  #             T=T_pytorch3d,
+  #             znear=0.01,
+  #             zfar=100.)
+
+  # raster_settings = RasterizationSettings(
+  #     image_size=rasterizer_image_size, 
+  #     faces_per_pixel=1)
+  # lights = PointLights(
+  #     device=device, 
+  #     location=cameras.get_world_to_view_transform().transform_points(
+  #       torch.tensor([[0., 0., -1.]]).cuda()))
+
+  blend_params = BlendParams(
+                    sigma=1e-4,
+                    gamma=1e-4,
+                    background_color=(0.1, 0.1, 0.1))
+  rasterizer = None
+  # rasterizer = MeshRasterizer(
+  #       cameras=cameras, 
+  #       raster_settings=RasterizationSettings(
+  #         image_size=rasterizer_image_size, 
+  #         faces_per_pixel=1))
+  # phong_renderer = MeshRenderer(
+  #     rasterizer=rasterizer,
+  #     shader=HardPhongShader(
+  #               device=device,
+  #               cameras=cameras,
+  #               lights=lights,
+  #               blend_params=blend_params))
+
+
+  # image_ref = phong_renderer(meshes_world=mesh)
+  # import torchvision.transforms.functional as F
+  # import numpy as np
+  # pil_img = F.to_pil_image((255.0*image_ref.cpu().numpy()[0]).astype(np.uint8))
+
+  # from IPython.display import display
+  # display(pil_img)
+
+
+  fragments = None
+  # fragments = rasterizer(meshes_world=mesh)
+    
+
+
+  
+  import time
+  for ci_chunk in oputil.ichunked(cis, batch_size):
+    start = time.time()
+    print('start batch')
+    for i, ci in enumerate(ci_chunk):
+      if fov_y is None:
+        fov_x, fov_y = ci.get_fov()
+        
+      if rasterizer_image_size is None:
+        rasterizer_image_size = (ci.height, ci.width)
+
+      pose = ci.ego_pose['world', 'ego'].get_transformation_matrix(homogeneous=True)
+
+      # For iOS !!!
+      world2pytorch = np.array([
+          [1, 0, 0, 0],
+          [0, -1, 0, 0],
+          [0, 0, -1, 0],
+          [0, 0, 0, 1],
+      ], dtype=np.float32)
+
+      pose = world2pytorch @ pose
+
+      K[i, :3, :3] = torch.from_numpy(ci.K)
+      K[i, 3, 3] = 1
+      R[i, :3, :3] = torch.from_numpy(pose[:3, :3])
+      T[i, :3] = torch.from_numpy(pose[:3, 3])
+      image_size[i, 0] = ci.height
+      image_size[i, 1] = ci.width
+    
+
+    tvec = T
+    R_pytorch3d = R.clone().permute(0, 2, 1)
+    T_pytorch3d = tvec.clone()
+    R_pytorch3d[:, :, :2] *= -1
+    T_pytorch3d[:, :2] *= -1
+
+
+    # if True:#cameras is None:
+    cameras = FoVPerspectiveCameras(
+                    device=device,
+                    fov=fov_y,
+                    degrees=False,
+                    R=R_pytorch3d,
+                    T=T_pytorch3d,
+                    znear=0.01,
+                    zfar=100.)
+
+    if rasterizer is None:
+      rasterizer = MeshRasterizer(
+                    cameras=cameras, 
+                    raster_settings=RasterizationSettings(
+                      image_size=rasterizer_image_size, 
+                      faces_per_pixel=1))
+
+    
+    fragments = rasterizer(meshes_world=mesh, cameras=cameras)
+    zbuf = fragments.zbuf
+    depth_batch = zbuf[:, :, :, 0].cpu().numpy()
+    print('batch done', time.time() - start)
+    for i in range(batch_size):
+      depth = depth_batch[i, :, :]
+      
+      yield depth
+
+      # h, w = rasterizer_image_size
+      # px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+      # px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+      # pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+      # pyx = pyx.astype(np.float32)
+
+      # vud1 = np.dstack([pyx, depth]).reshape([-1, 3])
+
+      # vud1 = vud1[vud1[:, 2] > 0]
+      # uvd = vud1[:, (1, 0, 2)]
+
+      
+      # print('yielding', batch_size)
+      # yield uvd
+
+
+def depth_to_uvd(depth, h, w):
+  px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+  px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+  pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+  pyx = pyx.astype(np.float32)
+
+  vud1 = np.dstack([pyx, depth]).reshape([-1, 3])
+
+  vud1 = vud1[vud1[:, 2] > 0]
+  uvd = vud1[:, (1, 0, 2)]
+
+  return uvd
+
+
+if __name__ == '__main__':
+  import sys
+  sys.path.append('/opt/psegs')
+
+  import os
+
+  ROOT = '/outer_root/media/970-evo-plus-raid0/lidarphone_lidar_scans/'
+
+  for d in sorted(os.listdir(ROOT)):
+    if '.DS_Store' in d:
+        continue
+
+    if '2021_08_05' not in d:
+      print('hacks skip', d)
+      continue
+
+    base_dir = os.path.join(ROOT, d)
+    if not os.path.isdir(base_dir):
+        print('skipping non-dir', base_dir)
+        continue
+    print()
+    print()
+    print()
+    print(base_dir)
+    
+
+    outpath = os.path.join(ROOT, d + 'pytorch_rgbd_debug.mp4')
+    depth_outpath = os.path.join(ROOT, d + '/pytorch_depth2')
+    debug_outpath = os.path.join(ROOT, d + '/pytorch_debug')
+    
+    # if os.path.exists(outpath):
+    #     print('aleady done', outpath)
+    #     continue
+    
+    from psegs.datasets import ios_lidar
+
+
+    from oarphpy import util as oputil
+    json_paths = oputil.all_files_recursive(base_dir, pattern='frame*.json')
+    json_paths = sorted(json_paths)
+    
+    try:
+      cis = [ios_lidar.threeDScannerApp_create_camera_image(p) for p in json_paths]
+    except AssertionError as e:
+      print('err', e)
+      continue
+
+    print('len(cis)', len(cis))
+
+    oputil.mkdir(depth_outpath)
+    oputil.mkdir(debug_outpath)
+    
+    mesh_path = os.path.join(base_dir, 'export_refined.obj')
+    if not os.path.exists(mesh_path):
+      mesh_path = os.path.join(base_dir, 'export.obj')
+    
+    
+    import imageio
+    writer = imageio.get_writer(outpath, fps=5)
+    
+    # from psegs.render.mesh2rgbd import pytorch3d_iter_mesh2depth_for_camera_images
+    
+    iter_depth = pytorch3d_iter_mesh2depth_for_camera_images(cis, mesh_path)
+    for i, (ci, depth) in enumerate(zip(cis, iter_depth)):
+      
+      frame_name = ci.extra['threeDScannerApp.frame_json_name']
+      depth_dest = os.path.join(depth_outpath, frame_name + '.npy')
+      np.save(depth_dest, depth)
+      
+      debug = ci.image
+      from psegs.util.plotting import draw_xy_depth_in_image
+      uvd = depth_to_uvd(depth, ci.height, ci.width)
+      draw_xy_depth_in_image(debug, uvd, period_meters=0.1)
+      writer.append_data(debug)
+      imageio.imwrite(
+        os.path.join(debug_outpath, frame_name + '.debug.jpg'),
+        debug)
+      print(i)
+    
+    writer.close()
+    
+    import torch
+    torch.cuda.empty_cache()
+    
+    import gc
+    gc.collect()
+    
+    print('done', outpath)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  
+  # plt.imshow(zbuf[0, ..., 0].cpu().numpy())
+  # plt.show()
+  # print('zbuf', zbuf.min(), zbuf.max(), zbuf[zbuf > 0].min())
+  # # display(F.to_pil_image(image_ref.cpu().numpy()[0].astype(np.uint8)[:, :, -1]))
+  # # image_ref.cpu().numpy()[0]
+
+  # depth = zbuf[0, ..., 0].cpu().numpy()
+  # h, w = rasterizer_image_size
+  # px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+  # px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+  # pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+  # pyx = pyx.astype(np.float32)
+
+  # vud1 = np.dstack([pyx, depth]).reshape([-1, 3])
+
+  # vud1 = vud1[vud1[:, 2] > 0]
+  # uvd = vud1[:, (1, 0, 2)]
+  # yield uvd
+
+
+  # from psegs.util.plotting import draw_xy_depth_in_image
+  # draw_xy_depth_in_image(debug, uvd, period_meters=0.1)
+
+
+  # import torchvision.transforms.functional as F
+  # import numpy as np
+  # pil_img = F.to_pil_image(debug.astype(np.uint8))
+
+  # from IPython.display import display
+  # display(pil_img)
+
+
+
+
+
+
+# def pytorch3d_mesh2depth_for_camera_images(cis, mesh_path='', batch_size=-1):
+#   if batch_size < 0:
+#     batch_size = 10
+  
+#   from oarphpy import util as oputil
+
+#   from pytorch3d.io import load_obj
+#   obj_path = os.path.join(base_dir, 'export_refined.obj')
+#   verts, faces_idx, _ = load_obj(obj_path)
+#   faces = faces_idx.verts_idx
+#   print('verts', verts.shape)
+#   print('faces', faces.shape)
+
+#   nverts = verts.numpy()
+#   nfaces = faces.numpy()
+
+#   import numpy as np
+# # obj_path = os.path.join(base_dir, 'export_refined.obj')
+# fov_x, fov_y = CI.get_fov()
+# K = CI.K
+# height, width = CI.height, CI.width
+# pose = CI.ego_pose['ego', 'world'].get_inverse().get_transformation_matrix(homogeneous=True)
+# pose = pose.astype(np.float32)
+# K = K.astype(np.float32)
+
+
+
+# import numpy as np
+# world2pytorch = np.array([
+#     [1, 0, 0, 0],
+#     [0, -1, 0, 0],
+#     [0, 0, -1, 0],
+#     [0, 0, 0, 1],
+# ], dtype=np.float32)
+
+# pose = world2pytorch @ pose
+
+# # pose[0, 0] *= -1
+# # pose[1, 1] *= -1
+# # pose[2, 2] *= -1
+
+# import os
+# import sys
+# import torch
+
+# import pytorch3d
+
+# import os
+# import torch
+# import matplotlib.pyplot as plt
+
+# from pytorch3d.utils import ico_sphere
+# import numpy as np
+# from tqdm.notebook import tqdm
+
+
+
+
+# # Util function for loading meshes
+# from pytorch3d.io import load_objs_as_meshes, save_obj, load_obj
+
+# from pytorch3d.loss import (
+#     chamfer_distance, 
+#     mesh_edge_loss, 
+#     mesh_laplacian_smoothing, 
+#     mesh_normal_consistency,
+# )
+
+# # Data structures and functions for rendering
+# from pytorch3d.structures import Meshes
+# from pytorch3d.renderer import (
+#     look_at_view_transform,
+#     OpenGLPerspectiveCameras, 
+#     PointLights, 
+#     DirectionalLights, 
+#     Materials, 
+#     RasterizationSettings, 
+#     MeshRenderer, 
+#     MeshRasterizer,  
+#     SoftPhongShader,
+#     SoftSilhouetteShader,
+#     SoftPhongShader,
+#     TexturesVertex,
+#     AmbientLights
+# )
+
+# # add path for demo utils functions 
+# import sys
+# import os
+# sys.path.append(os.path.abspath(''))
+
+
+# # io utils
+# from pytorch3d.io import load_obj
+
+# # datastructures
+# from pytorch3d.structures import Meshes
+
+# # 3D transformations functions
+# from pytorch3d.transforms import Rotate, Translate
+
+# # rendering components
+# from pytorch3d.renderer import (
+#     FoVPerspectiveCameras, look_at_view_transform, look_at_rotation, 
+#     RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
+#     SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
+#     HardGouraudShader, SoftGouraudShader,HardFlatShader,PerspectiveCameras,FoVOrthographicCameras,
+# )
+
+
+# if torch.cuda.is_available():
+#     device = torch.device("cuda:0")
+#     torch.cuda.set_device(device)
+# else:
+#     device = torch.device("cpu")
+# # device = torch.device("cpu")
+
+# # # Set paths
+# # DATA_DIR = "./data"
+# # obj_filename = os.path.join(DATA_DIR, "cow_mesh/cow.obj")
+
+# # Load obj file
+# # mesh = load_objs_as_meshes([obj_path], device=device)
+
+# # Load the obj and ignore the textures and materials.
+# # verts, faces_idx, _ = load_obj(obj_path)
+# # faces = faces_idx.verts_idx
+# # print('verts', verts.shape)
+# # print('faces', faces.shape)
+# import torch
+# verts = torch.from_numpy(nverts)
+# faces = torch.from_numpy(nfaces)
+
+# # Initialize each vertex to be white in color.
+# verts_rgb = .9 * torch.ones_like(verts)[None]  # (1, V, 3)
+# textures = TexturesVertex(verts_features=verts_rgb.to(device))
+
+# # Create a Meshes object for the teapot. Here we have only one mesh in the batch.
+# teapot_mesh = Meshes(
+#     verts=[verts.to(device)],   
+#     faces=[faces.to(device)], 
+#     textures=textures,
+# )
+# print('teapot_mesh', teapot_mesh)
+# # teapot_mesh = load_objs_as_meshes([obj_path], device=device)
+
+
+# import torch
+# import numpy as np
+# R = torch.from_numpy(pose[:3, :3].reshape([1, 3, 3])).to(device)
+# T = torch.from_numpy(pose[:3, 3].reshape([1, 3])).to(device)
+
+
+# # Select the viewpoint using spherical angles  
+# distance = 5   # distance from camera to the object
+# elevation = 50.0   # angle of elevation in degrees
+# azimuth = 0.0  # No rotation so the camera is positioned on the +Z axis. 
+
+# # Get the position of the camera based on the spherical angles
+# # R, T = look_at_view_transform(distance, elevation, azimuth, device=device)
+# print('R', R)
+# print('T', T)
+
+# tK = np.eye(4).astype(np.float32)
+# tK[:3, :3] = K
+
+# # # Great job pytorch3d!! 
+# # # https://github.com/facebookresearch/pytorch3d/blob/103da63393d6bbb697835ddbfc86b07572ea4d0c/tests/test_camera_conversions.py#L116
+# # tK[0, 0] = 1.1 * K[0, 0]
+# # tK[1, 1] = 1.1 * K[1, 1]
+# # tK[2, 0] = 1.1 * K[2, 0]
+# # tK[2, 1] = 1.1 * K[2, 1]
+
+
+# tK = torch.from_numpy(tK.reshape([1, 4, 4])).to(device)
+# print('K', tK)
+
+# image_size = torch.from_numpy(np.array([height, width]).reshape([1, 2])).to(device)
+# print('image_size', image_size)
+
+# # https://github.com/facebookresearch/pytorch3d/issues/522
+# from pytorch3d.utils.camera_conversions import cameras_from_opencv_projection
+
+
+
+
+# cameras = cameras_from_opencv_projection(R, T, tK, image_size, device=device).cuda()
+
+# # assert False, (cameras.R, cameras.T, cameras.get_world_to_view_transform().device)
+# # assert False, cameras.get_world_to_view_transform().device
+# # print('get_world_to_view_transform', xform.device, cameras.R, cameras.T)
+
+# # cameras = FoVPerspectiveCameras(device=device, fov=fov_x, degrees=False)#, K=K)
+
+# # # hack up cameras_from_opencv_projection
+# # camera_matrix = tK
+# tvec = T
+# # focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1)
+# # principal_point = camera_matrix[:, :2, 2]
+
+# # # Retype the image_size correctly and flip to width, height.
+# # image_size_wh = image_size.to(R).flip(dims=(1,))
+
+# # # Get the PyTorch3D focal length and principal point.
+# # focal_pytorch3d = focal_length / (0.5 * image_size_wh)
+# # p0_pytorch3d = -(principal_point / (0.5 * image_size_wh) - 1)
+
+# # For R, T we flip x, y axes (opencv screen space has an opposite
+# # orientation of screen axes).
+# # We also transpose R (opencv multiplies points from the opposite=left side).
+# R_pytorch3d = R.clone().permute(0, 2, 1)
+# T_pytorch3d = tvec.clone()
+# R_pytorch3d[:, :, :2] *= -1
+# T_pytorch3d[:, :2] *= -1
+# # cameras = PerspectiveCameras(
+# #             device=device, R=R_pytorch3d,
+# #             T=T_pytorch3d,
+# #             focal_length=focal_pytorch3d,
+# #             principal_point=p0_pytorch3d, image_size=image_size, in_ndc=True)
+
+# fov_x, fov_y = CI.get_fov()
+# cameras = FoVPerspectiveCameras(
+#     device=device, fov=fov_y, degrees=False, R=R_pytorch3d, T=T_pytorch3d, aspect_ratio=1.0)
+
+
+# # cameras = PerspectiveCameras(device=device, K=K, R=R, T=T, in_ndc=False, image_size=image_size)
+
+
+# raster_settings = RasterizationSettings(
+#     image_size=(height, width), 
+#     faces_per_pixel=1, 
+# )
+# lights = PointLights(
+#     device=device, 
+#     location=cameras.get_world_to_view_transform().transform_points(torch.tensor([[0., 0., -1.]]).cuda()),
+# )
+# # lights = AmbientLights(device=device)
+# blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.1, 0.1, 0.1))
+# rasterizer = MeshRasterizer(
+#         cameras=cameras, 
+#         raster_settings=raster_settings
+#     )
+# phong_renderer = MeshRenderer(
+#     rasterizer=rasterizer,
+#     shader=HardPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params)
+# )
+
+
+# image_ref = phong_renderer(meshes_world=teapot_mesh)
+
+
+# import torchvision.transforms.functional as F
+# import numpy as np
+# pil_img = F.to_pil_image((255.0*image_ref.cpu().numpy()[0]).astype(np.uint8))
+
+# from IPython.display import display
+# display(pil_img)
+
+# import matplotlib.pyplot as plt
+
+# fragments = rasterizer(meshes_world=teapot_mesh)
+
+# zbuf = fragments.zbuf
+# plt.imshow(zbuf[0, ..., 0].cpu().numpy())
+# plt.show()
+# print('zbuf', zbuf.min(), zbuf.max(), zbuf[zbuf > 0].min())
+# # display(F.to_pil_image(image_ref.cpu().numpy()[0].astype(np.uint8)[:, :, -1]))
+# # image_ref.cpu().numpy()[0]
+
+# debug = CI.image
+# depth = zbuf[0, ..., 0].cpu().numpy()
+# h, w = debug.shape[:2]
+# px_y = np.tile(np.arange(h)[:, np.newaxis], [1, w])
+# px_x = np.tile(np.arange(w)[np.newaxis, :], [h, 1])
+# pyx = np.concatenate([px_y[:,:,np.newaxis], px_x[:, :, np.newaxis]], axis=-1)
+# pyx = pyx.astype(np.float32)
+
+# vud1 = np.dstack([pyx, depth]).reshape([-1, 3])
+
+# vud1 = vud1[vud1[:, 2] > 0]
+# uvd = vud1[:, (1, 0, 2)]
+
+
+# from psegs.util.plotting import draw_xy_depth_in_image
+# draw_xy_depth_in_image(debug, uvd, period_meters=0.1)
+
+
+# import torchvision.transforms.functional as F
+# import numpy as np
+# pil_img = F.to_pil_image(debug.astype(np.uint8))
+
+# from IPython.display import display
+# display(pil_img)
+
diff --git a/psegs/render/pybullet_ttc.py b/psegs/render/pybullet_ttc.py
new file mode 100644
index 0000000..d2d487f
--- /dev/null
+++ b/psegs/render/pybullet_ttc.py
@@ -0,0 +1,218 @@
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO https://pybullet.org/Bullet/phpBB3/viewtopic.php?f=24&t=13269 
+
+import tempfile
+
+import attr
+
+from psegs.util import misc
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class CuboidAgent(object):
+  obj_id = attr.ib(default=-1)
+
+  material_color = attr.ib(default='white')
+
+  mass_kg = attr.ib(default=1)
+
+  size_xyz = attr.ib(default=[1., 1., 1.])
+
+  init_xyz = attr.ib(default=[0., 0., 0.])
+  init_rpy = attr.ib(default=[0., 0., 0.])
+
+  init_velocity = attr.ib(default=[0., 0., 0.])
+  init_angular_velocity = attr.ib(default=[0., 0., 0.])
+
+  constant_acceleration = attr.ib(default=[0., 0., 0.])
+
+
+  def pybullet_init(self, p):
+
+    sz_x, sz_y, sz_z = self.size_xyz
+    CUBE_URDF = f"""
+      <?xml version="1.0"?>
+      <robot name="psegs_cuboid">
+        <link name="base_link">
+          <visual>
+            <geometry>
+              <box size="{sz_x} {sz_y} {sz_z}"/>
+            </geometry>
+            <origin rpy="0 0 0" xyz="0 0 0"/>
+            <material name="{self.material_color}"/>
+          </visual>
+          <collision>
+            <geometry>
+              <box size="{sz_x} {sz_y} {sz_x}"/>
+            </geometry>
+            <origin rpy="0 0 0" xyz="0 0 0"/>
+          </collision>
+        </link>
+      </robot>
+      """
+    
+    urdf_path = tempfile.NamedTemporaryFile(suffix='.urdf').name
+    with open(urdf_path, 'w') as f:
+      f.write(CUBE_URDF)
+    
+    self.obj_id = p.loadURDF(urdf_path)
+
+    p.changeDynamics(self.obj_id, -1, mass=self.mass_kg)
+
+    p.resetBaseVelocity(
+      objectUniqueId=self.obj_id,
+      linearVelocity=self.init_velocity,
+      angularVelocity=self.init_angular_velocity)
+    
+    p.resetBasePositionAndOrientation(
+      bodyUniqueId=self.obj_id,
+      posObj=self.init_xyz,
+      ornObj=p.getQuaternionFromEuler(self.init_rpy))
+
+  
+  def step_acceleration(self, p):
+    p.applyExternalForce(
+          objectUniqueId=self.obj_id,
+          linkIndex=-1,
+          forceObj=self.constant_acceleration,
+          posObj=[0, 0, 0],
+          flags=p.LINK_FRAME)
+
+
+@attr.s(slots=True, eq=False, weakref_slot=False)
+class PyBulletSim(object):
+
+  ground_plane_id = attr.ib(default=-1)
+
+  cuboid_agents = attr.ib(default=[])
+
+  time_step_Hz = attr.ib(default=20)
+
+  duration_sec = attr.ib(default=2)
+
+  debug_cam_distance = attr.ib(default=5)
+  debug_cam_look_at_agent = attr.ib(default=0)
+  debug_image_width = attr.ib(default=900)
+  debug_image_height = attr.ib(default=900)
+
+  @classmethod
+  def start_direct(cls):
+    import pybullet as p
+    p.connect(p.DIRECT)
+    return p
+
+  def _set_up_world(self, p):
+    import pybullet_data
+    p.setAdditionalSearchPath(pybullet_data.getDataPath())
+
+    p.resetSimulation()
+
+    self.ground_plane_id = p.loadURDF('plane.urdf')
+
+    for aa in self.cuboid_agents:
+      aa.pybullet_init(p)
+
+
+  def run(self, p=None, debug_video_out='/tmp/pybullet_debug.mp4'):
+    if p is None:
+      p = self.start_direct()
+      self._set_up_world(p)
+
+    p.setTimeStep(1. / self.time_step_Hz)
+
+    misc.log.info(
+      f"Running pybullet sim with {p.getNumBodies()} bodies "
+      "for {self.duration_sec} seconds at "
+      "{self.time_step_Hz} Hz ...")
+
+    debug_writer = None
+    debug_look_at = [0., 0., 0.]
+    if debug_video_out:
+      import imageio
+      debug_writer = imageio.get_writer(debug_video_out, fps=self.time_step_Hz)
+
+      aa = self.cuboid_agents[self.debug_cam_look_at_agent]
+      debug_look_at = aa.init_xyz
+
+    t_sec = 0
+    n_steps = 0
+    while t_sec < self.duration_sec:
+
+      # Update accelerations
+      for aa in self.cuboid_agents:
+        aa.step_acceleration(p)
+
+      p.stepSimulation()
+      p.performCollisionDetection()
+
+      for aa1 in self.cuboid_agents:
+        for aa2 in self.cuboid_agents:
+          if aa1.obj_id < aa2.obj_id:
+            print(aa1.obj_id, aa2.obj_id)
+            pts = p.getClosestPoints(aa1.obj_id, aa2.obj_id, float('inf'))
+            print('len(pts)', len(pts))
+            distance = pts[0][8]
+            #print("distance=",distance)
+            ptA = pts[0][5]
+            ptB = pts[0][6]
+            print('distance', distance, 'ptA', ptA, 'ptB', ptB)
+      
+      if debug_writer is not None:
+        import numpy as np
+        from PIL import Image
+        # from IPython.display import display
+
+        result = p.getCameraImage(
+            self.debug_image_width,
+            self.debug_image_height,
+            viewMatrix=
+              p.computeViewMatrixFromYawPitchRoll(
+                cameraTargetPosition=debug_look_at,
+                distance=self.debug_cam_distance,
+                yaw=20,
+                pitch=-10,
+                roll=0,
+                upAxisIndex=2),
+            projectionMatrix=p.computeProjectionMatrixFOV(
+                fov=60,
+                aspect=float(self.debug_image_width) / self.debug_image_height,
+                nearVal=0.01,
+                farVal=500.),
+            shadow=True,
+            lightDirection=[1, 1, 1])
+
+        width, height, rgba, depth, mask = result
+
+        # print(f"rgba shape={rgba.shape}, dtype={rgba.dtype}")
+        debug_writer.append_data(rgba[:, :, :3].astype(np.uint8))
+        
+        # display(Image.fromarray(rgba, 'RGBA'))
+        # print(f"depth shape={depth.shape}, dtype={depth.dtype}, as values from 0.0 (near) to 1.0 (far)")
+        # display(Image.fromarray((depth*255).astype('uint8')))
+        # print(f"mask shape={mask.shape}, dtype={mask.dtype}, as unique values from 0 to N-1 entities, and -1 as None")
+        # display(Image.fromarray(np.interp(mask, (-1, mask.max()), (0, 255)).astype('uint8')))
+
+
+      t_sec += 1. / self.time_step_Hz
+      n_steps += 1
+      if (n_steps % 100) == 0:
+        misc.log.info(f"... rendered {n_steps} steps ...")
+    
+    if debug_writer is not None:
+      debug_writer.close()
+
+
+
diff --git a/psegs/seg2html.py b/psegs/seg2html.py
new file mode 100644
index 0000000..a15cb11
--- /dev/null
+++ b/psegs/seg2html.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# vim: tabstop=2 shiftwidth=2 expandtab
+
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+DESC = """
+seg2html.py -- A library module of tools (as well as a script) to convert
+PSegs segments to HTML visualizations.  Run this script in the PSegs dockerized
+environment; FMI see ./psegs-util --help in the PSegs project.
+
+## Example
+
+python3 psegs/seg2html.py \
+  --segment-id=charuco-lowres-test \
+  --out-dir=./my_html_viz
+
+This will render only the segment named `charuco-lowres-test` to HTML and
+put rendered assets in ./my_html_viz .
+
+"""
+
+import os
+
+from psegs import xform as psx
+
+
+def save_htmls(
+      sdtables=None,
+      seg_uris=None,
+      out_dir='/tmp',
+      partition_by_segment=False):
+
+  from pathlib import Path
+  
+  from oarphpy import util as oputil
+  from tqdm import tqdm
+  import six
+  
+  from psegs import table
+  from psegs import datum
+  from psegs import util
+
+  out_dir = Path(out_dir)
+
+  segs = seg_uris or sdtables
+  assert segs, "Need either Segment URIs or StampedDatumTables"
+
+  util.log.info(
+    f"Saving to {out_dir}. Have {len(segs)} segments to HTMLize ...")
+  segs = segs[155:]
+  pbar = tqdm(segs)
+  total_bytes = 0
+  for seg in pbar:
+    ## Fetch segment data
+    if not isinstance(seg, table.StampedDatumTable):
+      if isinstance(seg, six.string_types):
+        seg_uri = datum.URI.from_str(seg)
+      else:
+        seg_uri = seg
+      sdts = psx.get_segment_tables_for_uris([seg_uri])
+      sdt = sdts[0]
+    else:
+      sdt = seg
+    
+    ## Decide where to output
+    if partition_by_segment:
+      partition_path = psx.get_partition_path(sdt.get_all_segment_uris())
+      dest = out_dir / partition_path / "rich_viz.html"
+    else:
+      seg_uris = sdt.get_all_segment_uris()
+      assert len(seg_uris) == 1, \
+        "Table {sdt} has data for more than one segment: {seg_uris}"
+      seg_uri = seg_uris[0]
+      fname = '.'.join((
+                    seg_uri.dataset or "anon_dataset",
+                    seg_uri.split or "anon_split",
+                    seg_uri.segment_id or "anon_segment_id"))
+      fname = fname + '.html'
+      dest = out_dir / fname
+
+    ## Render!
+    html = sdt.to_rich_html()
+    total_bytes += len(html)
+      
+    oputil.mkdir(dest.parent)
+    with open(dest, 'w') as f:
+      f.write(html)
+
+    pbar.set_description(f"Wrote total {(1e-6*total_bytes):.2f} MBytes")
+
+
+def create_arg_parser():
+  import argparse
+
+  parser = argparse.ArgumentParser(
+                    description=DESC,
+                    formatter_class=argparse.RawDescriptionHelpFormatter)
+  parser.add_argument(
+    '--out-dir', default=os.path.abspath('./my_html_viz'),
+    help='Place all computed assets in this directory [default %(default)s].')
+  parser.add_argument(
+    '--partition-by-segment', default=False, action='store_true',
+    help='Save rendered assets in a directory tree partitioned by dataset, '
+         'split, and segment_id (as is done for PSegs StampedDatumTables).')
+
+  psx.configure_arg_parser(parser)
+
+  return parser
+
+def main(args=None):
+  if args is None:
+    parser = create_arg_parser()
+    args = parser.parse_args()
+  
+  seg_uris = psx.get_matching_seg_uris(args)
+  save_htmls(
+    seg_uris=seg_uris,
+    out_dir=args.out_dir,
+    partition_by_segment=args.partition_by_segment)
+
+if __name__ == '__main__':
+  main()
diff --git a/psegs/spark.py b/psegs/spark.py
new file mode 100644
index 0000000..7e50b96
--- /dev/null
+++ b/psegs/spark.py
@@ -0,0 +1,109 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import os
+
+from oarphpy import spark
+from oarphpy import util as oputil
+
+import psegs
+from psegs import util
+
+
+class Spark(spark.SessionFactory):
+
+  SRC_ROOT = os.path.dirname(psegs.__file__)
+
+  SRC_ROOT_MODULES = ['psegs']
+
+  CONF_KV = {
+    'spark.driver.maxResultSize': '20g',
+    'spark.driver.memory': '32g',
+    'spark.executor.memory': '32g',
+    # 'spark.driver.cores': '6',
+    # 'spark.memory.offHeap.enabled': 'true',
+    # 'spark.memory.offHeap.size': '100g',
+
+    'spark.files.overwrite': 'true',
+      # Needed for notebook-based development; FMI see oarphpy.spark.NBSpark
+
+    'spark.python.worker.reuse': False,
+    # 'spark.blockManager.port': '5555',
+      # Helps reduce memory leaks related to matplotlib / tensorflow / etc
+    # 'spark.driver.extraJavaOptions': '-Dlog4j.logger.org.apache.spark.api.python.PythonGatewayServer=DEBUG',
+    # 'spark.driver.extraJavaOptions': '-Dlog4jspark.root.logger=DEBUG,console',
+    'spark.sql.files.maxPartitionBytes': int(8 * 1e6),
+      # Partitions need to be big enough to potentially fit
+      # point clouds / images
+
+    'spark.port.maxRetries': '256',
+      # Allow lots of Spark drivers on a single machine (e.g. a dev machine)
+
+  }
+
+def save_sd_tables(
+      sdts,
+      spark=None,
+      compute_df_sizes=True,
+      spark_save_opts=None):
+  
+  class DFThunk:
+    def __init__(self, t, spark):
+      self.t = t
+      self.spark = spark
+    def __call__(self):
+      return self.t.to_spark_df(spark=self.spark)
+
+  df_thunks = [DFThunk(t, spark) for t in sdts]
+  return save_df_thunks(
+    df_thunks,
+    compute_df_sizes=compute_df_sizes,
+    spark_save_opts=spark_save_opts)
+
+def save_df_thunks(df_thunks, compute_df_sizes=True, spark_save_opts=None):
+  spark_save_opts = spark_save_opts or {}
+  if 'path' in spark_save_opts:
+    # JRDD bridge below requires string
+    spark_save_opts['path'] = str(spark_save_opts['path'])
+  
+  t = oputil.ThruputObserver(name='save_df_thunks', n_total=len(df_thunks))
+  util.log.info("Going to write in %s chunks ..." % len(df_thunks))
+  while len(df_thunks):
+    df_thunk = df_thunks.pop(0)
+    t.start_block()
+    df = df_thunk()
+    # df = df.persist()
+    # print('df size', df.count())
+    # df.show()
+    num_bytes = 0
+    if compute_df_sizes:
+      df = df.persist()
+      # def getsize(x):#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      #   y = oputil.get_size_of_deep(x)
+      #   print(x.uri.topic, y / (2 **20))
+      #   return y
+      # num_bytes = df.rdd.map(getsize).sum()
+      num_bytes = df.rdd.map(oputil.get_size_of_deep).sum()
+    df.write.save(mode='append', **spark_save_opts)
+    df.unpersist()
+    
+    t.stop_block(n=1, num_bytes=num_bytes)
+    t.maybe_log_progress(every_n=1)
+
+# Expose a NBSpark "subclass" configured for PSegs
+NBSpark = copy.deepcopy(spark.NBSpark)
+NBSpark.SRC_ROOT = Spark.SRC_ROOT
+NBSpark.SRC_ROOT_MODULES = Spark.SRC_ROOT_MODULES
+NBSpark.CONF_KV.update(Spark.CONF_KV)
diff --git a/psegs/table/__init__.py b/psegs/table/__init__.py
new file mode 100644
index 0000000..80d12dd
--- /dev/null
+++ b/psegs/table/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.table.sd_table import StampedDatumTable
+from psegs.table.sd_table_factory import StampedDatumTableFactory
diff --git a/psegs/table/cached_factory.py b/psegs/table/cached_factory.py
new file mode 100644
index 0000000..d4e2117
--- /dev/null
+++ b/psegs/table/cached_factory.py
@@ -0,0 +1,53 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs import datum
+from psegs import util
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+class SampleCachedFactory(StampedDatumTableFactory):
+  """Cache each created `StampedDatumTable` in process memory as a `Sample`;
+  useful when there is tons of OS memory / swap space available."""
+
+  CACHED_FACTORY = None
+
+  @classmethod
+  def get_all_segment_uris(cls):
+    if cls.CACHED_FACTORY:
+      return cls.CACHED_FACTORY.get_all_segment_uris()
+    else:
+      return []
+
+  @classmethod
+  def get_segment_sd_table(cls, segment_uri, spark=None):
+    from psegs.table.sd_table import StampedDatumTable
+
+    assert cls.CACHED_FACTORY is not None
+
+    if not hasattr(cls, '_seg_uri_to_sdt'):
+      cls._seg_uri_to_sdt = {}
+    
+    segment_uri = str(datum.URI.from_str(segment_uri).to_segment_uri())
+    if segment_uri not in cls._seg_uri_to_sdt:
+      sdt = cls.CACHED_FACTORY.get_segment_sd_table(segment_uri, spark=spark)
+      sample = sdt.to_sample()
+      sdt_sample = StampedDatumTable.from_sample(sample)
+      cls._seg_uri_to_sdt[segment_uri] = sdt_sample
+      util.log.info(f'SampleCachedFactory: cache SAVE {segment_uri}')
+    else:
+      util.log.info(f'SampleCachedFactory: cache HIT {segment_uri}')
+
+    return cls._seg_uri_to_sdt[segment_uri]
+  
\ No newline at end of file
diff --git a/psegs/table/canonical_factory.py b/psegs/table/canonical_factory.py
new file mode 100644
index 0000000..d8f1a13
--- /dev/null
+++ b/psegs/table/canonical_factory.py
@@ -0,0 +1,323 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from psegs import util
+# from psegs.datasets.kitti import KITTISDTable
+# from psegs.datasets.kitti_360 import KITTI360SDTable
+from psegs.datasets.ios_lidar import IOSLidarSDTFactory
+# from psegs.datasets.nuscenes import NuscStampedDatumTableLabelsAllFrames
+from psegs.table.union_factory import UnionFactory
+
+
+class CanonicalFactory(UnionFactory):
+  
+  # The canonical factory is empty unless configured and init-ed!
+  SDT_FACTORIES = []
+
+  @classmethod
+  def init_from_environ(cls):
+
+    if 'PSEGS_CANON_FACTORY_PQ_SD_SUBDIRS' in os.environ:
+      sd_subdirs = os.environ['PSEGS_CANON_FACTORY_PQ_SD_SUBDIRS']
+      sd_subdirs = sd_subdirs.split(':')
+      util.log.info(f"PSegs Global Parquet Stamped Datum Roots: {sd_subdirs}")
+      
+      from psegs.table.sd_table_factory import ParquetSDTFactory
+      cls.SDT_FACTORIES += [
+        ParquetSDTFactory.factory_for_sd_subdirs(d)
+        for d in sd_subdirs
+      ]
+
+
+    if 'PSEGS_CANON_FACTORY_IOS_LIDAR_DATA_ROOTS' in os.environ:
+      dir_roots = os.environ['PSEGS_CANON_FACTORY_IOS_LIDAR_DATA_ROOTS']
+      dir_roots = dir_roots.split(':')
+      util.log.info(f"PSegs Global IOS Lidar Data Roots: {dir_roots}")
+      
+      dataset = os.environ.get(
+        'PSEGS_CANON_FACTORY_IOS_LIDAR_DATASET',
+        'anon_canon_factory_ios_lidar_dataset')
+      split = os.environ.get(
+        'PSEGS_CANON_FACTORY_IOS_LIDAR_SPLIT',
+        'anon_canon_factory_ios_lidar_split')
+
+      from psegs.datasets.ios_lidar import Fixtures as IOS_Fixtures
+      for dir_root in dir_roots:
+        class MyFixtures(IOS_Fixtures):
+          DATASET = dataset
+          SPLIT = split
+          @classmethod
+          def threeDScannerApp_data_root(cls):
+            from pathlib import Path
+            return Path(dir_root)
+
+        class MyIOSLidarSDTableFactory(IOSLidarSDTFactory):
+          FIXTURES = MyFixtures
+
+        CanonicalFactory.SDT_FACTORIES += [MyIOSLidarSDTableFactory]
+
+
+
+if False:
+  # For now we'll just wire in our own data in Psegs.
+  # User libraries and/or notebooks should set this up
+  # in their own code after importing psegs but before 
+  # using any psegs code that might list canonical stuff.
+
+  from psegs.datasets.ios_lidar import Fixtures as IOS_Fixtures
+  class MyFixtures(IOS_Fixtures):
+    DATASET = 'pwais'
+    SPLIT = 'private'
+    @classmethod
+    def threeDScannerApp_data_root(cls):
+      from pathlib import Path
+      return Path('/outer_root/media/magdaraid0/lidarphone_lidar_scans/')
+    
+    @classmethod
+    def get_all_seg_uris(cls):
+      seg_uris = []
+      seg_uris += cls.get_threeDScannerApp_segment_uris()
+        # Room for other recording sources ...
+
+      BROKEN_SEGMENTS = (
+        'Untitled Scan', 'amiot-crow-bar', 'headlands-downhill-2',
+        'headlands-long-descent', 'san-anselmo-rock-fort-broken',
+        'amiot-catcher-short', 'amiot-hot-dog-broken',
+      )
+      seg_uris = [
+        suri for suri in seg_uris
+        if suri.segment_id not in BROKEN_SEGMENTS
+      ]
+      return seg_uris
+
+
+  class MyIOSLidarSDTableFactory(IOSLidarSDTFactory):
+    FIXTURES = MyFixtures
+
+  CanonicalFactory.SDT_FACTORIES += [MyIOSLidarSDTableFactory]
+  
+  # from psegs.datasets.adhoc_pixels import AdhocVideosSDTFactory
+  # CanonicalFactory.SDT_FACTORIES += [
+  #   AdhocVideosSDTFactory.create_factory_for_video(p)
+  #   for p in (
+  #     '/outer_root/media/magdaraid0/iphone_vids_to_sfm/vids_to_sfm/san-bruno-ridge-sunset-lidar-comparison-IMG_5652.MOV',
+  #     '/outer_root/media/magdaraid0/iphone_vids_to_sfm/vids_to_sfm/san-bruno-ridge-sunset-lidar-comparison-IMG_5654.MOV',
+  #     '/outer_root/media/magdaraid0/iphone_vids_to_sfm/vids_to_sfm/san-bruno-ridge-sunset-lidar-comparisonIMG_5653.MOV',
+  #     '/outer_root/media/magdaraid0/iphone_vids_to_sfm/vids_to_sfm/dubs-gym-bluetiful-subie-lidar-comparison.MOV',
+  #   )]
+
+  from pathlib import Path
+  from psegs.datasets.adhoc_pixels import AdhocImagePathsSDTFactory
+  CanonicalFactory.SDT_FACTORIES += [
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        images_dir=p,
+        dataset='pwais',
+        split='private',
+        segment_id=seg_name)
+    for seg_name, p in {
+      'dubs-gym-bluetiful-subie-lidar-comparison.MOV':
+        '/outer_root/media/magdaraid0/vids_to_hloc_cache/dubs-gym-bluetiful-subie-lidar-comparison.MOV/',
+      'lidar_hero10_winter_stinsin_GX010018.MP4_cache':
+        '/outer_root/media/magdaraid0/iphone_vids_to_sfm/vids_to_sfm/lidar_hero10_winter_stinsin_GX010018.MP4_cache/frames',
+      'hero10_calib3':
+        '/outer_root/media/magdaraid0/iphone_vids_to_sfm/hero10_1/calib3_frames_short/keeps',
+      'hero10_calib5':
+        '/outer_root/media/magdaraid0/iphone_vids_to_sfm/hero10_1/calib5_frames/keeps',
+      'winter-stinsin-just-the-nappie':
+        '/outer_root/media/magdaraid0/iphone_vids_to_sfm/winter-stinsin-just-the-nappie/',
+      'winter-stinsin-just-the-nappie-oneside':
+        '/outer_root/media/magdaraid0/iphone_vids_to_sfm/winter-stinsin-just-the-nappie-oneside/',
+    }.items()
+  ]
+
+  CanonicalFactory.SDT_FACTORIES += [
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for p in 
+          Path('/outer_root/media/magdaraid0/lidarphone_lidar_scans/2021_10_12_13_53_29/').iterdir()
+          if 'frame_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='phoenix-dry-long-broken'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/home/pwais/ORB_SLAM3/mav0/cam0/data').iterdir())
+          if 'jpg' in p.name and ((i % 30) == 0)),
+        dataset='pwais',
+        split='private',
+        segment_id='winter-stinsin-jpegged-30th'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/home/pwais/ORB_SLAM3/mav0/cam0/data').iterdir())
+          if 'jpg' in p.name and ((i % 25) == 0)),
+        dataset='pwais',
+        split='private',
+        segment_id='winter-stinsin-jpegged-25th'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/hero10_1/tl-buttercup-gpm-tasty').iterdir())
+          if 'jpg' in p.name ),
+        dataset='pwais',
+        split='private',
+        segment_id='tl-buttercup-gpm-tasty'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/hero10_1/king-theo-heart-tasty').iterdir())
+          if 'jpg' in p.name ),
+        dataset='pwais',
+        split='private',
+        segment_id='king-theo-heart-tasty'),
+
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/hero10_1/gabi-library-tasty').iterdir())
+          if 'jpg' in p.name ),
+        dataset='pwais',
+        split='private',
+        segment_id='gabi-library-tasty'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/jane-monkey-adhoc1').iterdir())
+          if 'jpg' in p.name and 'frame_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='jane-monkey-adhoc1'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/jane-monkey-adhoc2').iterdir())
+          if 'jpg' in p.name and 'frame_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='jane-monkey-adhoc2'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/bruno-bumble').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='bruno-bumble'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/dipsea-banana-slug').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='dipsea-banana-slug'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/dipsea-iris-family-gpm-1').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='dipsea-iris-family-gpm-1'),
+    
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/mimi-frogs-test-1').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='mimi-frogs-test-1'),
+	
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/gabigarden-rose-spider-test').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='gabigarden-rose-spider-test'),
+
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/gabigarden-baby-strawberry-test-1').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='gabigarden-baby-strawberry-test-1'),
+
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/dipsea-sleepie-bumble-pink-test-1').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='dipsea-sleepie-bumble-pink-test-1'),
+
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/san-bruno-ridge-spring-moth-wings-closed-iphone').iterdir())
+          if 'jpg' in p.name and 'output_' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id='san-bruno-ridge-spring-moth-wings-closed-iphone'),
+
+  ]
+
+  CanonicalFactory.SDT_FACTORIES += [
+
+    AdhocImagePathsSDTFactory.create_factory_for_images(
+        image_paths=sorted(
+          p for i, p in 
+          enumerate(Path('/outer_root/media/magdaraid0/iphone_vids_to_sfm/mission_bay_streatfood_bee').iterdir())
+          if 'jpg' in p.name and f'mission_bay_streatfood_bee{s}' in p.name),
+        dataset='pwais',
+        split='private',
+        segment_id=f'mission_bay_streatfood_bee{s}')
+
+      for s in range(1, 7)
+
+  ]
+
+  # from psegs.table.sd_table_factory import ParquetSDTFactory
+  # CanonicalFactory.SDT_FACTORIES += [
+  #   ParquetSDTFactory.factory_for_sd_subdirs(
+  #     '/outer_root/media/magdaraid0/hloc_out/')
+  # ]
+
+
+  # F = CanonicalFactory.SDT_FACTORIES[-1]
+  # print(F.get_all_segment_uris()[-1])
+  # t = F.get_segment_sd_table(F.get_all_segment_uris()[-1])
+  # datum_rdd = t.to_datum_rdd()
+  # print(datum_rdd.take(1))
+  # df = t.to_spark_df()
+  # df.show()
+  # breakpoint()
+  # print()
diff --git a/psegs/table/sd_db.py b/psegs/table/sd_db.py
new file mode 100644
index 0000000..153754f
--- /dev/null
+++ b/psegs/table/sd_db.py
@@ -0,0 +1,353 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+
+import attr
+
+from psegs import util
+from psegs.datum.uri import URI
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+from psegs.datum.stamped_datum import Sample
+from psegs.datum.stamped_datum import StampedDatum
+
+
+URI_ATTRNAMES = set(a.name for a in attr.fields(URI))
+
+def to_seg_uri_str(obj):
+  import six
+  if isinstance(obj, URI):
+    uri = obj
+  elif isinstance(obj, six.string_types):
+    uri = URI.from_str(obj)
+  elif hasattr(obj, 'asDict'):
+    d = dict(
+      (k, v)
+      for k, v in obj.asDict().items()
+      if k in URI_ATTRNAMES)
+    uri = URI(**d)
+  else:
+    raise ValueError("Can't convert %s" % (obj,))
+  return str(uri.to_segment_uri())
+
+class NoKnownTable(Exception):
+  pass
+
+class StampedDatumDB(object):
+  """
+
+  TODO rename this UnionTable or something and put with sd_table
+
+  """
+
+  # @classmethod
+  # def all_tables(cls):
+  #   if not hasattr(cls, '_all_tables'):
+  #     from psegs.datasets import kitti
+  #     cls._all_tables = (
+  #       kitti.KITTISDTable,
+  #     )
+  #   return cls._all_tables
+  
+  # @classmethod
+  # def show_all_segment_uris(cls):
+  #   """FIXME Interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+  #   for table in cls.all_tables():
+  #     print(table)
+  #     for seg_uri in table.get_all_segment_uris():
+  #       print(seg_uri)
+
+  # @classmethod
+  # def get_segment_datum_rdd(cls, spark, segment_uri, time_ordered=True):
+  #   """FIXME Interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+
+  #   def has_segment(table_segs):
+  #     if segment_uri.dataset and segment_uri.split:
+  #       return segment_uri in table_segs
+  #     else:
+  #       table_seg_ids = set(uri.segment_id for uri in table_segs)
+  #       return segment_uri.segment_id in table_seg_ids
+
+  #   for table in cls.all_tables():
+  #     if has_segment(table.get_all_segment_uris()):
+  #       return table.get_segment_datum_rdd(
+  #         spark, segment_uri, time_ordered=time_ordered)
+  #   return spark.sparkContext.parallelize([])
+
+  def __init__(self, tables=[], spark=None, cache_dfs=True):
+    self._tables = tables
+    self._segment_to_df = {}
+    self._spark = spark
+    self._cache_dfs = cache_dfs
+  
+  def _build_datum_df(self, uri, spark=None):
+    spark = self._spark or spark
+    suri = URI.from_str(uri).to_segment_uri()
+    
+    T = None
+    for table in self._tables:
+      t_seg_uris = table.get_all_segment_uris()
+      if any(suri.soft_matches_segment_of(tu) for tu in t_seg_uris):
+        T = table
+        break
+        
+    if T is None:
+      raise NoKnownTable("No known table for %s in %s" % (uri, self._tables))
+    
+    util.log.info("Building DF for %s" % uri)
+    if self._cache_dfs:
+      T.build(spark=self._spark, only_segments=[suri])
+      # TODO the below call is expensive for each table if disk table has lots of files !!!!!!! 5 minutes per table!!
+      # at least when merge schema was used !!!!
+      datum_df = T._get_segment_datum_df_from_disk(spark, suri)
+    else:
+      datum_df = T.get_segment_datum_df(spark, suri)
+    return datum_df
+
+  def _get_df_for_segment(self, suri, spark=None):
+    spark = self._spark or spark
+    suri = URI.from_str(suri).to_segment_uri()
+
+    matching_seg = None
+    for known_seg_str in self._segment_to_df.keys():
+      known_seg = URI.from_str(known_seg_str)
+      if suri.soft_matches_segment_of(known_seg):
+        matching_seg = known_seg
+        break
+    
+    if matching_seg is None:
+      datum_df = self._build_datum_df(suri, spark=spark)
+
+      # Use the datum_df to get a fully-qualified segment_uri
+      row = datum_df.select('uri').take(1)
+      assert row, "Dataset for %s is empty" % suri
+      uri = StampedDatumTableFactory.from_row(row[0].uri)
+      matching_seg = uri.to_segment_uri()
+      self._segment_to_df[str(matching_seg)] = datum_df
+      util.log.info("Added DF for %s" % matching_seg)
+
+    return self._segment_to_df[str(matching_seg)]
+
+  def _get_union_df_for_segments(
+            self,
+            suris,
+            ignore_unknown_tables=False,
+            spark=None):
+    
+    def get_df(suri):
+      try:
+        return self._get_df_for_segment(suri, spark=spark)
+      except NoKnownTable as e:
+        if ignore_unknown_tables:
+          return None
+        else:
+          raise e
+
+    util.log.info("Building union DF for %s segments ..." % len(suris))
+    from oarphpy import util as oputil
+    thru = oputil.ThruputObserver(
+                    name='get_or_build_datum_dfs',
+                    n_total=len(suris),
+                    log_freq=1,
+                    log_on_del=True)
+    dfs = []
+    for suri in suris:
+      with thru.observe(n=1):
+        dfs.append(get_df(suri))
+      thru.maybe_log_progress()
+    util.log.info("... done building union DF for %s segments" % len(suris))
+
+    dfs = [d for d in dfs if d is not None]
+    if not dfs:
+      if ignore_unknown_tables:
+        spark = spark or self._spark
+        empty_df = spark.createDataFrame(
+                      [], schema=StampedDatumTableFactory.table_schema())
+        return empty_df
+      else:
+        raise NoKnownTable("No tables for segments: %s" % suris)
+    
+    from oarphpy.spark import union_dfs
+    return union_dfs(*dfs)
+
+
+  # def _add_datum_df(self, datum_df, seg_uri=None):
+  #   if not seg_uri:
+  #     row = datum_df.select('uri').take(1)
+  #     row = StampedDatumTableFactory.from_row(row)
+  #     uri = row.uri
+  #     seg_uri = uri.to_segment_uri()
+    
+  #   if self._df is None:
+  #     self._df = datum_df
+  #   else:
+  #     from oarphpy.spark import union_dfs
+  #     self._df = union_dfs(self._df, datum_df)
+      
+  #   self._segments.append(seg_uri)
+    
+      
+  
+
+  # def _maybe_add_segment(self, uri, spark=None):
+  #   spark = self._spark or spark
+  #   suri = URI.from_str(uri).to_segment_uri()
+  #   if not any(suri.soft_matches_segment_of(u) for u in self._segments):
+  #     datum_df = self._build_datum_df(uri, spark=spark)
+  #     self._add_datum_df(datum_df, seg_uri=suri)
+  
+  # def _ensure_have_data(self, seg_uris, spark=None, ignore_unknown=False):
+  #   for u in seg_uris:
+  #     try:
+  #       self._maybe_add_segment(u, spark=spark)
+  #     except NoKnownTable as e:
+  #       if not ignore_unknown:
+  #         raise e
+
+  @staticmethod
+  def select_datum_df_from_uris(uris, datum_df):
+    # Compile `uris` into the query itself for maximum speed
+    
+    import pyspark.sql.functions as F
+    from functools import reduce
+
+    def _to_match(uri):
+      toks = []
+      if uri.dataset:
+        toks += [F.col('uri.dataset') == uri.dataset]
+      if uri.split:
+        toks += [F.col('uri.split') == uri.split]
+      if uri.segment_id:
+        toks += [F.col('uri.segment_id') == uri.segment_id]
+      if uri.topic:
+        # Topic implies both topic and timestamp are valid
+        toks += [
+          F.col('uri.topic') == uri.topic,
+          F.col('uri.timestamp') == uri.timestamp
+        ]
+      
+      return reduce(lambda a, b: a & b, toks)
+      
+    # Construct SELECT * FROM T WHERE uri = uri1 OR uri = uri2 OR ...
+    df = datum_df.where(
+          reduce(
+            lambda a, b: a | b,
+            (_to_match(uri) for uri in uris)
+          ))
+    return df
+
+  @staticmethod
+  def select_datum_df_from_uri_df(uri_df, datum_df):
+    df = datum_df.join(
+          uri_df,
+          (datum_df.uri.dataset == uri_df.dataset) &
+          (datum_df.uri.split == uri_df.split) &
+          (datum_df.uri.segment_id == uri_df.segment_id) &
+          (datum_df.uri.topic == uri_df.topic) &
+          (datum_df.uri.timestamp == uri_df.timestamp))
+    return df
+
+  @staticmethod
+  def select_datum_df_from_uri_rdd(spark, uri_rdd, datum_df):
+    from oarphpy.spark import RowAdapter
+    row_rdd = uri_rdd.map(RowAdapter.to_row)
+    schema = RowAdapter.to_schema(URI())
+    uri_df = spark.createDataFrame(row_rdd, schema=schema)
+    return StampedDatumDB.select_datum_df_from_uri_df(uri_df, datum_df)
+
+  def get_sample(self, uri, spark=None):
+    uri = URI.from_str(uri)
+    uris = uri.get_datum_uris() or [uri]
+    datum_df = self.get_datum_df(uris=uris, spark=spark)
+    datum_rdd = StampedDatumTableFactory.sd_df_to_rdd(datum_df)
+    return Sample(uri=uri, datums=datum_rdd.collect())
+
+  def get_datum_df(self, uris=None, spark=None):
+    spark = self._spark or spark
+
+    if hasattr(uris, '_jrdd'):
+      uri_rdd = uris
+      seg_uris = uri_rdd.map(to_seg_uri_str).distinct().collect()
+      datum_df = self._get_union_df_for_segments(
+                        seg_uris, ignore_unknown_tables=True, spark=spark)
+      return StampedDatumDB.select_datum_df_from_uri_rdd(
+                spark or self._spark,
+                uri_rdd,
+                datum_df)
+    elif hasattr(uris, 'rdd'):
+      uri_df = uris
+      suri_df = uri_df.select('dataset', 'split', 'segment_id').distinct()
+      seg_uris = suri_df.rdd.map(to_seg_uri_str).distinct().collect()
+      datum_df = self._get_union_df_for_segments(
+                        seg_uris, ignore_unknown_tables=True, spark=spark)
+      return StampedDatumDB.select_datum_df_from_uri_df(
+                uri_df,
+                datum_df)
+    else:
+      seg_uris = list(set(to_seg_uri_str(u) for u in uris))
+      datum_df = self._get_union_df_for_segments(
+                        seg_uris, ignore_unknown_tables=False, spark=spark)
+
+      uris = [URI.from_str(u) for u in uris]
+      uris = list(itertools.chain.from_iterable(
+        (u.get_datum_uris() or [u])
+        for u in uris
+      ))
+      
+      return StampedDatumDB.select_datum_df_from_uris(uris, datum_df)
+
+  def get_keyed_sample_df(
+                self,
+                df,
+                key_col='key',
+                uri_col='uri',
+                datum_col='datums',
+                spark=None):
+    # fixme it took 23 mins just to get union df of 42 segments ~~~~~~~~~~~~~~~~~~~~~
+    suri_df = df.select(
+                    df[uri_col + '.dataset'],
+                    df[uri_col + '.split'],
+                    df[uri_col + '.segment_id']).distinct()
+    seg_uris = suri_df.rdd.map(to_seg_uri_str).distinct().collect()
+    datum_df = self._get_union_df_for_segments(
+                        seg_uris, ignore_unknown_tables=True, spark=spark)
+
+    key_uri_df = df.withColumnRenamed(uri_col, 'user_uri')
+    joined = datum_df.join(
+          key_uri_df,
+          (datum_df.uri.dataset == key_uri_df['user_uri.dataset']) &
+          (datum_df.uri.split == key_uri_df['user_uri.split']) &
+          (datum_df.uri.segment_id == key_uri_df['user_uri.segment_id']) &
+          (datum_df.uri.topic == key_uri_df['user_uri.topic']) &
+          (datum_df.uri.timestamp == key_uri_df['user_uri.timestamp']))
+
+    import attr
+    datum_colnames = [f.name for f in attr.fields(StampedDatum)]
+    datum_colnames += ['__pyclass__']
+    
+    from pyspark.sql import functions as F
+    agg = F.collect_list(F.struct(*datum_colnames)).alias(datum_col)
+    key_sample_df = joined.groupBy(key_col).agg(agg)
+    
+    return key_sample_df
+
+  @staticmethod
+  def datum_rows_to_sample(datum_rows):
+    from psegs import datum
+    from oarphpy.spark import RowAdapter
+
+    datums = [RowAdapter.from_row(d) for d in datum_rows]
+    return datum.Sample(datums=datums)
+
+
diff --git a/psegs/table/sd_table.py b/psegs/table/sd_table.py
new file mode 100644
index 0000000..b88efc5
--- /dev/null
+++ b/psegs/table/sd_table.py
@@ -0,0 +1,467 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+from psegs.datum import URI
+from psegs.datum.stamped_datum import Sample
+from psegs.datum.stamped_datum import STAMPED_DATUM_PROTO
+from psegs.spark import Spark
+
+class StampedDatumTable(object):
+
+  PARTITION_KEYS = ('dataset', 'split', 'segment_id')
+
+  ## Datums -> StampedDatumTable
+
+  def __init__(self):
+    self._sample = None
+    self._spark_df = None
+    self._datum_rdd = None
+    self._spark = None
+
+  @classmethod
+  def from_spark_df(cls, spark_df, spark=None):
+    sdt = cls()
+    sdt._spark_df = spark_df
+    sdt._spark = spark
+    return sdt
+
+  @classmethod
+  def from_sample(cls, sample):
+    sdt = cls()
+    sdt._sample = sample
+    return sdt
+
+  @classmethod
+  def from_datum_rdd(cls, datum_rdd, spark=None):
+    sdt = cls()
+    sdt._datum_rdd = datum_rdd
+    sdt._spark = spark
+    return sdt
+
+  def get_spark(self, spark=None):
+    # Prefer our own spark handle
+    return self._spark or spark
+
+  ## StampedDatumTable -> Datums
+
+  def to_spark_df(self, spark=None):
+    spark = spark or self._spark
+    if self._sample:
+      with Spark.sess(spark) as spark:
+        datum_rdd = spark.sparkContext.parallelize(self._sample.datums)
+        self._spark_df = self._sd_rdd_to_sd_df(spark, datum_rdd)
+        return self._spark_df
+    elif self._spark_df:
+      return self._spark_df
+    elif self._datum_rdd:
+      with Spark.sess(spark) as spark:
+        return self._sd_rdd_to_sd_df(spark, self._datum_rdd)
+    else:
+      # Create an empty Spark DF
+      with Spark.sess(spark) as spark:
+        self._spark_df = spark.createDataFrame([], schema=self.table_schema())
+        return self._spark_df
+
+  def to_sample(self):
+    if self._sample:
+      return self._sample
+    elif self._spark_df:
+      datum_rdd = self.datum_df_to_datum_rdd(self._spark_df)
+      return Sample(datums=datum_rdd.collect())
+    elif self._datum_rdd:
+      return Sample(datums=self._datum_rdd.collect())
+    else:
+      return Sample()
+
+  def to_datum_rdd(self, spark=None, cache_to_disk=False):
+    datum_rdd = None
+    if self._datum_rdd:
+      datum_rdd = self._datum_rdd
+    elif self._sample:
+      spark = spark or self._spark
+      with Spark.sess(spark) as spark:
+        datum_rdd = spark.sparkContext.parallelize(
+                    self._sample.datums, numSlices=len(self._sample.datums))
+    elif self._spark_df:
+      datum_rdd = self.datum_df_to_datum_rdd(self._spark_df)
+    else:
+      with Spark.sess(spark) as spark:
+        datum_rdd = spark.sparkContext.parallelize([])
+
+    if cache_to_disk:
+      from pyspark import StorageLevel
+      datum_rdd = datum_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+    return datum_rdd
+
+
+  ## Accessors
+
+  def get_all_segment_uris(self):
+    if self._sample:
+      return [self._sample.uri.to_segment_uri()]
+    elif self._spark_df:
+      uri_df = self.as_uri_df()
+      cols = ['dataset', 'split', 'segment_id']
+      distinct_uri_df = uri_df.select(*cols).distinct()
+      uri_rows = distinct_uri_df.collect()
+      return [
+        URI(dataset=r.dataset, split=r.split, segment_id=r.segment_id)
+        for r in uri_rows
+      ]
+    elif self._datum_rdd:
+      uri_rdd = self.as_uri_rdd()
+      sseg_uri_rdd = uri_rdd.map(lambda uri: str(uri.to_segment_uri()))
+      distinct_ssegs = sseg_uri_rdd.distinct()
+      distinct_segs = distinct_ssegs.map(lambda s: URI.from_str(s))
+      return distinct_segs.collect()
+    else:
+      return []
+
+  def select_from_uris(self, uris, spark=None):
+    print('todo')
+    pass # TODO
+
+
+  # @classmethod
+  # def get_sample(cls, uri, spark=None):
+  #   with Spark.sess(spark) as spark:
+  #     datums = cls._get_segment_datum_rdd_or_df(spark, uri)
+  #     if hasattr(datums, 'rdd'):
+  #       datums = cls.datum_df_to_datum_rdd(datums)
+  #     return Sample(uri=uri, datums=datums.collect())
+
+  def get_datum_rdd_matching(
+          self,
+          only_topics=None,
+          only_types=None,
+          spark=None):
+    
+    if self._spark_df or self._sample:
+      sdf = self.to_spark_df(spark=spark)
+      if only_types:
+        for sd_type in only_types:
+          if sd_type in ('cuboids',):
+            sdf = sdf.where("ARRAY_SIZE(%s) > 0" % sd_type)
+          else:
+            sdf = sdf.where("%s IS NOT NULL" % sd_type)
+      if only_topics:
+        sdf = sdf.where(sdf['uri.topic'].isin(only_topics))
+    
+      datum_rdd = self.datum_df_to_datum_rdd(sdf)
+      return datum_rdd
+    else:
+      datum_rdd = self.to_datum_rdd(spark=spark)
+      def matches(sd):
+        matches_topics = (not only_topics or (sd.uri.topic in only_topics))
+        matches_types = True
+        for sd_type in only_types:
+          if not bool(getattr(sd, sd_type, False)):
+            matches_types = False
+            break
+        return matches_topics and matches_types
+      datum_rdd = datum_rdd.filter(matches)
+      return datum_rdd
+
+
+  def as_uri_df(self, spark=None):
+
+    df = self.to_spark_df(spark=spark)
+
+    import attr
+    colnames = [
+      'uri.' + f.name
+      for f in attr.fields(URI)
+      if f.name not in self.PARTITION_KEYS
+    ]
+    colnames += [c for c in self.PARTITION_KEYS]
+      # Use the partition columns for faster filters
+    colnames += ['uri.__pyclass__']
+    uri_df = df.select(colnames)
+    return uri_df
+
+  def as_uri_rdd(self, spark=None):
+    if self._spark_df or self._sample:
+      uri_df = self.as_uri_df(spark=spark)
+      return uri_df.rdd.map(self.uri_from_row)
+    else:
+      datum_rdd = self.to_datum_rdd(spark=spark)
+      return datum_rdd.map(lambda sd: sd.uri)
+
+
+  def get_start_end_time_ns(self, spark=None):
+    if self._sample:
+
+      ts = [sd.uri.timestamp for sd in self._sample.datums] or [None]
+      return min(ts), max(ts)
+    
+    elif self._spark_df:
+      
+      uri_df = self.as_uri_df()
+
+      from pyspark.sql import functions as F
+      min_max_df = uri_df.select(F.min('timestamp'), F.max('timestamp'))
+      start_time_ns, end_time_ns = min_max_df.collect()[0]
+      return start_time_ns, end_time_ns
+
+    elif self._datum_rdd:
+
+      uri_rdd = self.as_uri_rdd()
+      ts = uri_rdd.map(lambda uri: uri.timestamp).collect() or [None]
+      return min(ts), max(ts)
+    
+    else:
+      return None, None
+
+
+  ## Viz
+
+  def to_rich_html(self, spark=None, **html_kwargs):
+    """Create and return an HTML visualization with (small) vidoes and
+    3D plots"""
+  
+    from psegs.spark import Spark
+    from psegs.util.plotting import sample_to_html
+
+    spark = spark or self._spark
+    with Spark.sess(spark) as spark:
+      return sample_to_html(
+                spark,
+                self,
+                **html_kwargs)
+
+
+  ## I/O
+
+  def save_parquet(
+        self,
+        dest_dir,
+        partition=True,
+        mode='overwrite',
+        compression='zstd',
+        spark=None,
+        num_partitions=-1):
+
+    save_opts = dict(
+      path=str(dest_dir),
+      compression=compression,
+      mode=mode,
+    )
+
+    # TODO use save_df_thunks depending on backing type?
+
+    spark_df = self.to_spark_df(spark=spark).persist()
+    if partition:
+      for k in self.PARTITION_KEYS:
+        spark_df = spark_df.withColumn(k, spark_df['uri.' + k])
+      save_opts['partitionBy'] = self.PARTITION_KEYS      
+    
+    if num_partitions > 0:
+      spark_df = spark_df.repartition(num_partitions).persist()
+    
+    spark_df.write.save(**save_opts)
+    spark_df.unpersist()
+  
+
+
+  ## StampedDatum <-> Table Rows
+
+  @classmethod
+  def sd_to_row(cls, sd):
+    from oarphpy.spark import RowAdapter
+    from pyspark.sql import Row
+    
+    row = RowAdapter.to_row(sd)
+    row = row.asDict()
+
+    for k in cls.PARTITION_KEYS:
+      row[k] = getattr(sd.uri, k)
+    return Row(**row)
+
+  @classmethod
+  def sd_from_row(cls, row):
+    from oarphpy.spark import RowAdapter
+    return RowAdapter.from_row(row)
+  
+  @classmethod
+  def uri_from_row(cls, row):
+    from oarphpy.spark import RowAdapter
+    if hasattr(row, 'uri'):
+      return RowAdapter.from_row(row.uri)
+    else:
+      return RowAdapter.from_row(row)
+
+  @classmethod
+  def table_schema(cls):
+    """ comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    if not hasattr(cls, '_schema'):
+      from oarphpy.spark import RowAdapter
+      to_row = cls.sd_to_row
+      cls._schema = RowAdapter.to_schema(to_row(STAMPED_DATUM_PROTO))
+    return cls._schema
+
+  @classmethod
+  def _sd_rdd_to_sd_df(cls, spark, sd_rdd):
+    """ comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    
+    row_rdd = sd_rdd.map(cls.sd_to_row)
+    df = spark.createDataFrame(row_rdd, schema=cls.table_schema())
+    return df
+
+  @classmethod
+  def datum_df_to_datum_rdd(cls, sd_df):
+    return sd_df.rdd.map(cls.sd_from_row)
+  
+
+
+  ## Diffing
+
+  def diff_with(self, other_sdt, spark=None):
+    this_df = self.to_spark_df(spark=spark)
+    other_df = other_sdt.to_spark_df(spark=spark)
+    return self.find_diff(this_df, other_df)
+
+  @classmethod
+  def find_diff(cls, sd_df1, sd_df2):
+    """Compare all entries of Spark DataFrames of `StampedDatum`s `sd_df1`
+    and `sd_df2` and return a string report on the first major difference.
+    Return the empty string if the tables are equal.
+    """
+
+    import pprint
+    from operator import add
+
+    import attr
+    from pyspark.sql import functions as F
+
+    from psegs.util import misc
+    from psegs.datum import datumutils as du
+    from psegs.datum.stamped_datum import StampedDatum
+
+
+    ## First, do we have the same datasets / splits?
+    def get_dataset_splits(df):
+      rows = df.select(df.uri.dataset, df.uri.split).distinct().collect()
+      return sorted(tuple(r) for r in rows)
+    
+    ds1 = get_dataset_splits(sd_df1)
+    ds2 = get_dataset_splits(sd_df2)
+    if ds1 != ds2:
+      return "Dataset/Split Mismatch: \n%s" % misc.diff_of_pprint(ds1, ds2)
+    
+    
+    ## Next, do we have the same segments?
+    def get_seg_uris(df):
+      rows = df.select(
+              df.uri.dataset,
+              df.uri.split,
+              df.uri.segment_id).distinct().collect()
+      return sorted(tuple(r) for r in rows)
+
+    segs1 = get_seg_uris(sd_df1)
+    segs2 = get_seg_uris(sd_df2)
+    if segs1 != segs2:
+      return "Segment Mismatch: \n%s" % misc.diff_of_pprint(segs1, segs2)
+    
+
+    ## Next, let's compare URIs.  
+    def get_uri_rdd(df):
+      uri_rdd = df.select(df.uri).rdd.map(lambda row: cls.sd_from_row(row.uri))
+      return uri_rdd
+    
+    # First in number ...
+    uri_rdd1 = get_uri_rdd(sd_df1).cache()
+    uri_rdd2 = get_uri_rdd(sd_df2).cache()
+    c1 = uri_rdd1.count()
+    c2 = uri_rdd2.count()
+
+    if c1 == 0 and c2 == 0:
+      return '' # Short-circuit: Spark is slow for empty data below
+    elif c1 == 0 and c2 > 0:
+      return "Left table is EMPTY but right table has %s rows" % c2
+    elif c1 > 0 and c2 == 0:
+      return "Right table is EMPTY but left table has %s rows" % c1
+    elif c1 != c2 and (abs(c1 - c2) >= 1000):
+      return "URI Count Mismatch: left count: %s right count: %s" % (c1, c2)
+
+    # ... then in content ...
+    to_key = lambda uri: (uri.to_str(), 1)
+    kv1 = uri_rdd1.map(to_key).reduceByKey(add)
+    kv2 = uri_rdd2.map(to_key).reduceByKey(add)
+    missing_rdd2 = sorted(kv1.subtractByKey(kv2).keys().collect())
+    missing_rdd1 = sorted(kv2.subtractByKey(kv1).keys().collect())
+
+    if missing_rdd1 or missing_rdd2:
+      return """
+                Missing URIs (first 50):
+                Missing left (%s): %s 
+                Missing right (%s): %s""" % (
+                  len(missing_rdd1),
+                  pprint.pformat(missing_rdd1[:50]),
+                  len(missing_rdd2),
+                  pprint.pformat(missing_rdd2[:50]))
+
+    # ... and check for dupes!!
+    has_dupes = lambda kv: kv[-1] > 1
+    rdd1_dupes = kv1.filter(has_dupes).collect()
+    rdd2_dupes = kv2.filter(has_dupes).collect()
+    if rdd1_dupes or rdd2_dupes:
+      return """
+                Dupe URIs (first 50):
+                Dupes left (%s): %s 
+                Dupes right (%s): %s""" % (
+                  len(rdd1_dupes),
+                  pprint.pformat(rdd1_dupes[:50]),
+                  len(rdd2_dupes),
+                  pprint.pformat(rdd2_dupes[:50]))
+
+    ## Finally, let's compare actual Datums.
+    SD_COLS = [f.name for f in attr.fields(StampedDatum)]
+
+    # Do key-value mapping and join using the Dataframe API
+    # because it's faster
+    def to_key_datum_df(df):
+      URI_KEYS = (
+        # Use partition col if possible, else read from URI
+        'dataset' if 'dataset' in df.columns else 'uri.dataset',       
+        'split' if 'split' in df.columns else 'uri.split',       
+        'segment_id' if 'segment_id' in df.columns else 'uri.segment_id',       
+        # Must read from uri
+        'uri.timestamp',  
+        'uri.topic')
+      kv_df = df.select(
+                F.concat(*URI_KEYS).alias('key'),
+                F.struct(*SD_COLS).alias('datum'))
+      return kv_df
+    
+    kv_df1 = to_key_datum_df(sd_df1)
+    kv_df2 = to_key_datum_df(sd_df2)
+    kv_df1 = kv_df1.withColumn('datum1', kv_df1.datum)
+    kv_df2 = kv_df2.withColumn('datum2', kv_df2.datum)
+    joined = kv_df1.join(kv_df2, on='key', how='inner')
+
+    def get_diff_string(row):
+      return du.get_datum_diff_string(row.datum1, row.datum2)
+
+    diffs = joined.rdd.map(get_diff_string)
+    nonzero_diffs = diffs.filter(lambda s: bool(s))
+    nonzero_diffs_sample = nonzero_diffs.take(10)
+    if nonzero_diffs_sample:
+      return "Datum mismatch (%s datums), first 10: \n%s" % (
+        nonzero_diffs.count(),
+        pprint.pformat(nonzero_diffs_sample))
+    
+    # No diffs!
+    return ''
diff --git a/psegs/table/sd_table_factory.py b/psegs/table/sd_table_factory.py
new file mode 100644
index 0000000..760b7cc
--- /dev/null
+++ b/psegs/table/sd_table_factory.py
@@ -0,0 +1,472 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+import attr
+
+from psegs import util
+from psegs.datum import URI
+from psegs.table.sd_table import StampedDatumTable
+
+
+class StampedDatumTableFactory(object):
+
+  ## Public API
+
+  PARTITION_KEYS = ('dataset', 'split', 'segment_id')
+  
+  @classmethod
+  def get_all_segment_uris(cls):
+    return [URI.from_str(u) for u in cls._get_all_segment_uris()]
+
+  @classmethod
+  def get_segment_sd_table(cls, segment_uri=None, segment_uris=None, spark=None):
+    from psegs.spark import Spark
+
+    if segment_uri is not None:
+      segment_uris = [segment_uri]
+    segment_uris = [URI.from_str(s) for s in (segment_uris or [])]
+    
+    with Spark.sess(spark) as spark:
+      datum_rdds = cls._create_datum_rdds(
+                        spark, 
+                        only_segments=segment_uris)
+      datum_rdd = spark.sparkContext.union(datum_rdds)
+
+      from pyspark import StorageLevel
+      datum_rdd = datum_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+
+      return StampedDatumTable.from_datum_rdd(datum_rdd, spark=spark)
+
+  @classmethod
+  def save_parquet(
+        cls,
+        dest_dir,
+        only_segments=None,
+        existing_uri_df=None,
+        auto_resume_incomplete=True,
+        spark=None):
+    
+    from psegs import util
+    from psegs.spark import Spark
+    
+    dest_dir = Path(dest_dir)
+
+    with Spark.sess(spark) as spark:
+      if auto_resume_incomplete and existing_uri_df is None and dest_dir.exists():
+        util.log.info(f"Attempting to resume from {dest_dir} ...")
+
+        F = ParquetSDTFactory.factory_for_sd_subdirs(dest_dir)
+        existing_uri_df = F.read_uri_df(spark=spark)
+        
+        if existing_uri_df is not None:
+          util.log.info(
+            f"... found {existing_uri_df.count()} datums in {dest_dir} ...")
+        else:
+          util.log.info(f"... found no datum data in {dest_dir} ...")
+    
+      if only_segments:
+        only_segments = [URI.from_str(s).to_segment_uri() for s in only_segments]
+    
+      sd_rdds = cls._create_datum_rdds(
+                            spark, 
+                            existing_uri_df=existing_uri_df,
+                            only_segments=only_segments)
+      sd_tables = [
+        StampedDatumTable.from_datum_rdd(sd_rdd) for sd_rdd in sd_rdds
+      ]
+
+      from psegs.spark import save_sd_tables
+      save_sd_tables(
+        sd_tables,
+        spark=spark,
+        spark_save_opts=dict(
+          path=dest_dir / 'stamped_datums',
+          format='parquet',
+          partitionBy=cls.PARTITION_KEYS,
+          compression='zstd'))
+  
+  # @classmethod
+  # def load_parquet(
+  #       cls,
+  #       stamped_datums_root,
+  #       spark=None):
+    
+
+  ## Subclass API - Datasets should provide ETL to lists of RDD[StampedDatum]
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    return []
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    """Subclasses should create and return a list of `RDD[StampedDatum]`s
+    
+    only_segments must be segment uris
+    TODO docs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    return []
+
+
+  # @classmethod
+  # def as_df(cls, spark, force_compute=False, only_segments=None):
+  #   # TODO REPLACE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #   """ comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+  #   if force_compute: # hacks
+  #     sd_rdds = cls._create_datum_rdds(spark, only_segments=only_segments)
+  #     sd_dfs = [cls._sd_rdd_to_sd_df(spark, rdd) for rdd in sd_rdds]
+  #     import oarphpy.spark
+  #     return oarphpy.spark.union_dfs(*sd_dfs)
+
+
+  #   util.log.info("Loading %s ..." % cls.table_root())
+  #   # df = spark.read.option("mergeSchema", "true").parquet(str(cls.table_root()))
+  #   df = spark.read.parquet(str(cls.table_root()))
+  #   # df = spark.read.schema(cls.table_schema()).option("mergeSchema", "true").load(path=cls.table_root())
+  #   # read = read.schema(
+  #   # df = spark.read.parquet(cls.table_root(), schema=cls.table_schema())
+  #   return df
+
+  # @classmethod
+  # def as_datum_rdd(cls, spark, df=None):
+  #   df = df or cls.as_df(spark)
+  #   return df.rdd.map(StampedDatumTableFactory.from_row)
+
+  # @classmethod
+  # def get_segment_datum_rdd(cls, spark, segment_uri, time_ordered=True):
+  #   if util.missing_or_empty(cls.table_root()):
+  #     datum_rdds = cls._create_datum_rdds(spark, only_segments=[segment_uri])
+  #     if not datum_rdds:
+  #       return spark.sparkContext.parallelize([])
+  #     datum_rdd = spark.sparkContext.union(datum_rdds)
+  #     from pyspark import StorageLevel
+  #     datum_rdd = datum_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+  #     if time_ordered:
+  #       datum_rdd = datum_rdd.sortBy(lambda sd: sd.uri.timestamp)
+  #     return datum_rdd
+  #   else:
+  #     df = cls.as_df(spark)
+  #     assert segment_uri.segment_id
+  #     seg_df = df.filter(df.segment_id == segment_uri.segment_id)
+  #     if segment_uri.dataset:
+  #       seg_df = seg_df.filter(df.dataset == segment_uri.dataset)
+  #     if segment_uri.split:
+  #       seg_df = seg_df.filter(df.split == segment_uri.split)
+
+  #     seg_df = seg_df.persist()
+  #     if time_ordered:
+  #       seg_df = seg_df.orderBy('uri.timestamp')
+  #     return seg_df.rdd.map(StampedDatumTableFactory.from_row)
+
+  @classmethod
+  def _get_segment_datum_rdd_or_df(cls, spark, segment_uri):
+    segment_uri = URI.from_str(segment_uri)
+    if True:#util.missing_or_empty(cls.table_root()):
+      print('fixme')
+      return cls._create_segment_datum_rdd(spark, segment_uri)
+    else:
+      return cls._get_segment_df(spark, segment_uri)
+  
+  @classmethod
+  def _create_segment_datum_rdd(cls, spark, segment_uri):
+    datum_rdds = cls._create_datum_rdds(spark, only_segments=[segment_uri])
+    if not datum_rdds:
+      return spark.sparkContext.parallelize([])
+    datum_rdd = spark.sparkContext.union(datum_rdds)
+    if segment_uri.sel_datums:
+      selected = set(
+        (sd.topic, sd.timestamp) for sd in segment_uri.sel_datums)
+      datum_rdd = datum_rdd.filter(
+        lambda sd: (sd.uri.topic, sd.uri.timestamp) in selected)
+    return datum_rdd
+
+  @classmethod
+  def _get_segment_datum_df_from_disk(cls, spark, segment_uri):
+    df = cls.as_df(spark)
+    assert segment_uri.segment_id, "Bad URI %s" % segment_uri
+    seg_df = df.filter(df.segment_id == segment_uri.segment_id)
+    if segment_uri.dataset:
+      seg_df = seg_df.filter(df.dataset == segment_uri.dataset)
+    if segment_uri.split:
+      seg_df = seg_df.filter(df.split == segment_uri.split)
+    if segment_uri.sel_datums:
+      import pyspark.sql.functions as F
+      from functools import reduce
+      seg_df = seg_df.where(
+        reduce(
+          lambda a, b: a | b,
+          ((F.col('uri.topic') == sd.topic) & 
+            (F.col('uri.timestamp') == sd.timestamp)
+          for sd in segment_uri.sel_datums)))
+    return seg_df
+
+  @classmethod
+  def get_segment_datum_rdd(cls, spark, segment_uri):
+    rdd_or_df = cls._get_segment_datum_rdd_or_df(spark, segment_uri)
+    if hasattr(rdd_or_df, 'rdd'):
+      return cls.sd_df_to_rdd(rdd_or_df)
+    else:
+      return rdd_or_df
+  
+  @classmethod
+  def get_segment_datum_df(cls, spark, segment_uri):
+    rdd_or_df = cls._get_segment_datum_rdd_or_df(spark, segment_uri)
+    if hasattr(rdd_or_df, 'rdd'):
+      return rdd_or_df
+    else:
+      return cls._sd_rdd_to_sd_df(spark, rdd_or_df)
+
+  @classmethod
+  def get_sample(cls, uri, spark=None):
+    with Spark.sess(spark) as spark:
+      datums = cls._get_segment_datum_rdd_or_df(spark, uri)
+      if hasattr(datums, 'rdd'):
+        datums = cls.sd_df_to_rdd(datums)
+      return Sample(uri=uri, datums=datums.collect())
+      
+
+  # @classmethod
+  # def get_segment_datum_rdd(cls, spark, segment_uri, time_ordered=True):
+  #   if util.missing_or_empty(cls.table_root()):
+  #     datum_rdds = cls._create_datum_rdds(spark, only_segments=[segment_uri])
+  #     if not datum_rdds:
+  #       return spark.sparkContext.parallelize([])
+  #     datum_rdd = spark.sparkContext.union(datum_rdds)
+      
+  #     from pyspark import StorageLevel
+  #     datum_rdd = datum_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+  #     if time_ordered:
+  #       datum_rdd = datum_rdd.sortBy(lambda sd: sd.uri.timestamp)
+  #     return datum_rdd
+  #   else:
+  #     df = cls.as_df(spark)
+  #     assert segment_uri.segment_id
+  #     seg_df = df.filter(df.segment_id == segment_uri.segment_id)
+  #     if segment_uri.dataset:
+  #       seg_df = seg_df.filter(df.dataset == segment_uri.dataset)
+  #     if segment_uri.split:
+  #       seg_df = seg_df.filter(df.split == segment_uri.split)
+
+  #     seg_df = seg_df.persist()
+  #     if time_ordered:
+  #       seg_df = seg_df.orderBy('uri.timestamp')
+  #     return seg_df.rdd.map(StampedDatumTableFactory.from_row)
+
+  @staticmethod
+  def to_row(sd):
+    """This method is FINAL! ~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    # TODO do we need this method or can we add partition keys in a dataframe step ? ~~~~~~~~~~~~~~~~~~~~~
+    row = RowAdapter.to_row(sd)
+    row = row.asDict()
+
+    # TODO: ditch these partition things and do it in the df writer?
+    for k in StampedDatumTableFactory.PARTITION_KEYS:
+      row[k] = getattr(sd.uri, k)
+    return Row(**row)
+
+  @staticmethod
+  def from_row(row):
+    """This method is FINAL! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    return RowAdapter.from_row(row)
+
+  # @classmethod
+  # def as_uri_df(cls, spark):
+  #   if util.missing_or_empty(cls.table_root()):
+  #     return spark.sparkContext.parallelize([])
+  #   df = cls.as_df(spark)
+
+  #   import attr
+  #   colnames = [
+  #     'uri.' + f.name
+  #     for f in attr.fields(URI)
+  #     if f.name not in cls.PARTITION_KEYS
+  #   ]
+  #   colnames += [c for c in cls.PARTITION_KEYS]
+  #     # Use the partition columns for faster filters
+  #   uri_df = df.select(colnames)
+  #   return uri_df
+    # COLS = list(URI.__slots__)
+    # uri_df = df.select(*COLS)
+    # return uri_df
+
+  # @classmethod
+  # def as_stamped_datum_rdd(cls, spark):
+  #   df = cls.as_df(spark)
+  #   sd_rdd = df.rdd.map(RowAdapter.from_row)
+  #   return sd_rdd
+
+
+  
+
+  ## Support
+
+  @classmethod
+  def table_schema(cls):
+    """ comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    if not hasattr(cls, '_schema'):
+      to_row = StampedDatumTableFactory.to_row
+      cls._schema = RowAdapter.to_schema(to_row(STAMPED_DATUM_PROTO))
+    return cls._schema
+
+  @classmethod
+  def _sd_rdd_to_sd_df(cls, spark, sd_rdd):
+    """ comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
+    
+    row_rdd = sd_rdd.map(StampedDatumTableFactory.to_row)
+    df = spark.createDataFrame(row_rdd, schema=cls.table_schema())
+    return df
+
+  @classmethod
+  def sd_df_to_rdd(cls, sd_df):
+    # TODO refactor this and above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
+    return sd_df.rdd.map(cls.from_row)
+  
+  
+class ParquetSDTFactory(StampedDatumTableFactory):
+  PQ_DIRS = []
+
+  @classmethod
+  def factory_for_sd_subdirs(cls, pq_base_path, sd_dir_name='stamped_datums'):
+    import os
+    from glob import glob # NB: does follow symlinks, Path is broken
+    pq_dirs = sorted(
+                p
+                for p in glob(os.path.join(pq_base_path, '*'), recursive=True)
+                if (os.path.isdir(p) and os.path.basename(p) == sd_dir_name))
+    util.log.info(f"Found StampedDatumTable parquet data: {pq_dirs}")
+
+    class MyPQSDTFactory(cls):
+      PQ_DIRS = pq_dirs
+
+    return MyPQSDTFactory
+
+  @classmethod
+  def read_spark_df(cls, spark=None):
+    from psegs.spark import Spark
+    with Spark.sess(spark) as spark:
+      df = None
+      for p in cls.PQ_DIRS:
+        p_df = spark.read.parquet(str(p))
+        df = p_df if df is None else df.union(p_df)
+      return df
+
+  @classmethod
+  def create_as_single_table(cls, spark=None):
+    return StampedDatumTable.from_spark_df(cls.read_spark_df(spark=spark))
+
+  @classmethod
+  def read_seg_uri_rdd(cls, spark=None):
+    print("TODO ?? uri_df = cls.as_uri_df(spark=spark)")
+    df = cls.read_spark_df(spark=spark)
+    if df is None:
+      return None
+
+    seg_col_df = df.select(
+                    df['uri.dataset'].alias('dataset'),
+                    df['uri.split'].alias('split'),
+                    df['uri.segment_id'].alias('segment_id'))
+    seg_col_df = seg_col_df.distinct()
+
+    def to_uri(row):
+      return URI(
+              dataset=row.dataset,
+              split=row.split,
+              segment_id=row.segment_id)
+    
+    uri_rdd = seg_col_df.rdd.map(to_uri)
+    return uri_rdd
+
+  @classmethod
+  def read_uri_df(cls, spark=None):
+    print("TODO move to superclass if possible? tho is generally expensive")
+    df = cls.read_spark_df(spark=spark)
+    if df is None:
+      return None
+
+    colnames = [
+      'uri.' + f.name
+      for f in attr.fields(URI)
+      if f.name not in cls.PARTITION_KEYS
+    ]
+    colnames += [c for c in cls.PARTITION_KEYS]
+      # Use the partition columns for faster filters
+    colnames += ['uri.__pyclass__']
+    uri_df = df.select(colnames)
+    return uri_df
+
+#     return uri_df
+
+# def as_uri_df(self, spark=None):
+    
+
+#   def as_uri_rdd(self, spark=None):
+#     if self._spark_df or self._sample:
+#       uri_df = self.as_uri_df(spark=spark)
+#       return uri_df.rdd.map(self.uri_from_row)
+#     else:
+#       datum_rdd = self.to_datum_rdd(spark=spark)
+#       return datum_rdd.map(lambda sd: sd.uri)
+
+
+#     df = cls.read_spark_df(spark=spark)
+#     if df.rdd.isEmpty():
+#       return None
+
+#     def uri_from_row(row):
+#       from oarphpy.spark import RowAdapter
+#       return RowAdapter.from_row(row.uri)
+    
+#     uri_rdd = df.select(df['uri']).rdd.map(uri_from_row)
+#     return uri_rdd
+
+  ## StampedDatumTableFactory Impl
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    seg_uri_rdd = cls.read_seg_uri_rdd()
+    return sorted(seg_uri_rdd.collect())
+
+  @classmethod
+  def _create_datum_rdds(cls, spark, existing_uri_df=None, only_segments=None):
+    if existing_uri_df is not None:
+      util.log.info(
+        f"Note: resume mode unsupported, got existing_uri_df {existing_uri_df}")
+    
+    seg_uri_rdd = cls.read_seg_uri_rdd(spark=spark)
+    if only_segments:
+      def has_match(s):
+        return any(
+              URI.from_str(suri).soft_matches_segment_of(s)
+              for suri in only_segments)
+      seg_uri_rdd = seg_uri_rdd.filter(has_match)
+    segs_to_load = seg_uri_rdd.collect()
+
+    util.log.info(f"Creating datum RDDs for {len(segs_to_load)} segments ...")
+
+    union_df = cls.read_spark_df(spark=spark)
+    datum_rdds = []
+    for suri in segs_to_load:
+      seg_df = union_df.filter(
+                (union_df.dataset == suri.dataset) &
+                (union_df.split == suri.split) &
+                (union_df.segment_id == suri.segment_id))
+
+      from psegs.table.sd_table import StampedDatumTable
+      datum_rdd = StampedDatumTable.datum_df_to_datum_rdd(seg_df)
+      datum_rdds.append(datum_rdd)
+
+    util.log.info(f"... created datum RDDs.")
+
+    return datum_rdds
diff --git a/psegs/table/union_factory.py b/psegs/table/union_factory.py
new file mode 100644
index 0000000..acc773e
--- /dev/null
+++ b/psegs/table/union_factory.py
@@ -0,0 +1,102 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import itertools
+import copy
+
+from psegs import datum
+from psegs.table.sd_table import StampedDatumTable
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+
+class NoKnowStampedDatumTableFactory(Exception):
+  pass
+
+
+class UnionFactory(StampedDatumTableFactory):
+  """Induce a union over a list of `StampedDatumTableFactory`s, with amortized
+  O(1) lookup of factory by segment URI."""
+
+  SDT_FACTORIES = []
+
+  @classmethod
+  def get_all_segment_uris(cls):
+    iseg_uris = itertools.chain.from_iterable(
+        F.get_all_segment_uris() for F in cls.SDT_FACTORIES)
+    
+    # Two factories might have data for the same segment. Merge these such
+    # that e.g. the user can see `extra` info that distinguishes the sources.
+    key_to_uri = {}
+    for suri in iseg_uris:
+      key = (suri.dataset, suri.split, suri.segment_id)
+      if key in key_to_uri:
+        ref_suri = key_to_uri[key]
+        ref_suri.extra.update(suri.extra)
+      else:
+        key_to_uri[key] = copy.deepcopy(suri)
+    return sorted(key_to_uri.values())
+
+  @classmethod
+  def get_segment_sd_table(cls, segment_uri, spark=None):
+    from psegs.spark import Spark
+    Fs = cls._get_factories_for_seg_uri(segment_uri)
+    with Spark.sess(spark) as spark:
+      # TODO make this more flexible, for now we assume 
+      # SDTF::get_segment_sd_table() gets a datum_rdd sdt and so it's 
+      # fastest to just union those.
+      union_datum_rdd = None
+      for F in Fs:
+        sdt = F.get_segment_sd_table(segment_uri=segment_uri, spark=spark)
+        datum_rdd = sdt.to_datum_rdd()
+        if union_datum_rdd is None:
+          union_datum_rdd = datum_rdd
+        else:
+          union_datum_rdd = union_datum_rdd.union(datum_rdd)
+
+      union_sdt = StampedDatumTable.from_datum_rdd(
+        union_datum_rdd, spark=spark)
+    return union_sdt
+
+  # @classmethod
+  # def build_cache(cls, spark=None, only_segments=None, table_root=''):
+  #   raise NotImplementedError # TODO
+
+  @classmethod
+  def _seguri_to_factoryidxes(cls):
+    if not hasattr(cls, '_seguri_to_factoryidxes_cache'):
+      seguri_to_factoryidxes = {}
+      for F_idx, F in enumerate(cls.SDT_FACTORIES):
+        seg_uris = F.get_all_segment_uris()
+        for seg_uri in seg_uris:
+          key = str(seg_uri.to_segment_uri())
+          if key not in seguri_to_factoryidxes:
+            seguri_to_factoryidxes[key] = []
+          seguri_to_factoryidxes[key].append(F_idx)
+      cls._seguri_to_factoryidxes_cache = seguri_to_factoryidxes
+    return cls._seguri_to_factoryidxes_cache
+  
+  @classmethod
+  def _get_factories_for_seg_uri(cls, seg_uri):
+    seg_uri = datum.URI.from_str(seg_uri)
+    seg_uri_key = str(seg_uri.to_segment_uri())
+    F_idxes = cls._seguri_to_factoryidxes().get(seg_uri_key)
+    if F_idxes is None:
+      raise NoKnowStampedDatumTableFactory(str(seg_uri))
+    else:
+      return [
+        cls.SDT_FACTORIES[F_idx]
+        for F_idx in F_idxes
+      ]
+
diff --git a/psegs/util/__init__.py b/psegs/util/__init__.py
new file mode 100644
index 0000000..3ce21cd
--- /dev/null
+++ b/psegs/util/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.util.misc import *
diff --git a/psegs/util/misc.py b/psegs/util/misc.py
new file mode 100644
index 0000000..4bc7588
--- /dev/null
+++ b/psegs/util/misc.py
@@ -0,0 +1,261 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import difflib
+import pprint
+from pathlib import Path
+
+import attr
+import numpy as np
+from oarphpy import util as oputil
+
+# # A global logger, just for PSegs
+log = oputil.create_log(name='ps')
+
+
+def missing_or_empty(path):
+  # TODO: support S3 and GCS paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  return oputil.missing_or_empty(path)
+
+
+def attrs_eq(o1, o2):
+  """A utility for providing an `__eq__()` method to `attrs`-based classes
+  that contain `numpy`-valued attributes.  
+
+  Notes:
+    See also https://github.com/python-attrs/attrs/issues/435
+  
+  Args:
+    o1 (object): An `attr.s()`-based object.
+    o2 (object): An object of the same type as `o1`.
+  
+  Returns:
+    bool: True if `o1 == o2`.
+  """
+
+  if not type(o1) == type(o2):
+    raise TypeError
+
+  o1t = attr.astuple(o1, recurse=False)
+  o2t = attr.astuple(o2, recurse=False)
+  
+  def eq(a1, a2):
+    if isinstance(a1, np.ndarray):
+      return np.array_equal(a1, a2)
+    else:
+      return a1 == a2
+
+  return all(eq(*ats) for ats in zip(o1t, o2t))
+
+
+def unarchive_entries(archive_path, archive_files, dest):
+  """Un-tar or un-zip a specific subset of files to `dest`.
+
+  Args:
+    archive_path (str or pathlib.Path): Use this archive.
+    archive_files (List[str] or List[pathlib.Path]): Extract only these files
+      (archive entries) from the given archive.
+    dest (str or pathlib.Path): The root director for extracted files.
+  """
+  log.info("Trying to extract %s entries from %s ..." % (
+                                    len(archive_files), archive_path))
+
+
+  fws = oputil.ArchiveFileFlyweight.fws_from(str(archive_path))
+  archive_files = set(str(fname) for fname in archive_files)
+  fws = [fw for fw in fws if fw.name in archive_files]
+  
+  
+  oputil.mkdir(dest)
+  dest = Path(dest)
+  for fw in fws:
+    fw_dest = dest / fw.name
+    oputil.mkdir(str(fw_dest.parent))
+    with open(fw_dest, 'wb') as f:
+      f.write(fw.data)
+  
+  log.info("... saved %s entries to %s" % (len(fws), dest))
+
+
+def get_png_wh(png_bytes):
+  """Return the dimensions for a PNG image.
+
+  Based upon `get_image_size <https://github.com/scardine/image_size/blob/fb25377f42fc6c90c280462a87a41cf20cc2ac0e/get_image_size.py#L107>`_
+
+  Args:
+    png_bytes (bytearray): Bytes of a PNG file buffer
+  
+  Returns:
+    int: width of the image in pixels
+    int: height of the image in pixels
+  """
+  
+  from io import BytesIO
+  buf = BytesIO(png_bytes)
+  head = buf.read(24)
+
+  if not head.startswith(b'\211PNG\r\n\032\n'):
+    raise ValueError("Not a PNG")
+
+  import struct
+  w, h = struct.unpack(">LL", head[16:24])
+  return int(w), int(h)
+
+def get_image_wh(path):
+  lpath = str(path).lower()
+  w, h = None, None
+  if lpath.endswith('.jpg') or lpath.endswith('.jpeg'):
+    from oarphpy import util as oputil
+    try:
+      with open(path, 'rb') as f:
+        w, h = oputil.get_jpeg_size(f.read(1024))
+    except ValueError:
+      # Read entire image as fallback (slow)
+      from PIL import Image
+      orig_max_pixels = Image.MAX_IMAGE_PIXELS
+      Image.MAX_IMAGE_PIXELS = 1e12
+
+      import imageio
+      w, h = imageio.imread(path).shape[:2]
+
+      Image.MAX_IMAGE_PIXELS = orig_max_pixels
+
+  elif lpath.endswith('.png'):
+    with open(path, 'rb') as f:
+      w, h = get_png_wh(f.read(1024))
+  
+  # TODO raw image support?
+
+  else:
+    import imageio
+    w, h = imageio.imread(path).shape[:2]
+  
+  return w, h
+
+
+def diff_of_pprint(v1, v2):
+  """Return a human-readable diff string of the diff of `v1` and `v2`"""
+  # Based upon pytest:
+  # https://github.com/pytest-dev/pytest/blob/55debfad1f690d11da3b33022d55c49060460e44/src/_pytest/assertion/util.py#L236
+  # https://docs.python.org/3/library/difflib.html#difflib.ndiff
+
+  lines1 = pprint.pformat(v1).splitlines(keepends=True)
+  lines2 = pprint.pformat(v2).splitlines(keepends=True)
+  difftxt = ''.join(difflib.ndiff(lines1, lines2))
+  return difftxt
+
+
+# TODO: make these work for classes... rowadapter will only do objects
+def save_rowized_pkl(obj, path):
+  import pickle
+  from oarphpy.spark import RowAdapter
+  row = RowAdapter.to_row(obj)
+  with open(path, 'wb') as f:
+    pickle.dump(row, f, protocol=3) # Support older python
+
+
+def load_rowized_pkl(path):
+  import pickle
+  from oarphpy.spark import RowAdapter
+  with open(path, 'rb') as f:
+    row = pickle.load(f)
+  return RowAdapter.from_row(row)
+
+
+# Stolen from oarphpy RowAdapter ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# def _get_classname_from_obj(o):
+#   # Based upon https://stackoverflow.com/a/2020083
+#   module = o.__class__.__module__
+#   # NB: __module__ might be null
+#   if module is None or module == str.__class__.__module__:
+#     return o.__class__.__name__  # skip "__builtin__"
+#   else:
+#     return module + '.' + o.__class__.__name__
+# def _get_class_from_path(path):
+#   # Pydoc is a bit safer and more robust than anything we can write
+#   import pydoc
+#   obj_cls, obj_name = pydoc.resolve(path)
+#   assert obj_cls
+#   return obj_cls
+
+# _LAZY_SLOTS = (
+#   # '__pyclass',
+#   '__thunktor_bytes',
+#   # '__pyclass_bytes',
+#   '__value',
+#   '__lock',
+# )
+
+# class LazyThunktor(object):
+#   """
+#   design: thunktor can get invoked:
+#     * once per process when impl is called
+#     * the above, but once after deser
+#     * the big value is never serialized ...
+#   """
+
+#   __slots__ = _LAZY_SLOTS
+
+#   def __init__(self, thunktor):#, embed_cls=True):
+#     self.__value = None
+#     # self.__pyclass = _get_classname_from_obj(thunktor)
+
+#     import cloudpickle
+#     self.__thunktor_bytes = cloudpickle.dumps(thunktor)
+
+#     import threading
+#     self.__lock = threading.Lock()
+  
+#   def __getstate__(self):
+#     d = dict(
+#       (k, getattr(self, k))
+#       for k in self.__slots__
+#       if k not in ('__lock', '__value'))
+#     return d
+  
+#   def __setstate__(self, d):
+#     for k, v in d.items():
+#       if k not in ('__lock', '__value'):
+#         setattr(self, k, v)
+#     import threading
+#     self.__lock = threading.Lock()
+  
+#   @property
+#   def impl(self):
+#     if self.__value is not None:
+#       return self.__value
+    
+#     with self.__lock:
+#       import cloudpickle
+#       thunktor = cloudpickle.loads(self.__thunktor_bytes)
+#       self.__value = thunktor()
+#     return self.__value
+
+#   def __getattribute__(self, name):
+#     if name in _LAZY_SLOTS or name in ('impl', '__slots__'):
+#       return object.__getattribute__(self, name)
+#     else:
+#       return object.__getattribute__(self.impl, name)
+           
+#   def __setattr__(self, name, value):
+#     if name in _LAZY_SLOTS or name in ('impl', '__slots__'):
+#       return object.__setattr__(self, name)
+#     else:
+#       return object.__setattr__(self.impl, name)
+  
+#   def __delattr__(self, name, value):
+#     if name in _LAZY_SLOTS or name in ('impl', '__slots__'):
+#       return object.__delattr__(self, name)
+#     else:
+#       return object.__delattr__(self.impl, name)
diff --git a/psegs/util/plotting.py b/psegs/util/plotting.py
new file mode 100644
index 0000000..065114d
--- /dev/null
+++ b/psegs/util/plotting.py
@@ -0,0 +1,1392 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+
+PLOTLY_INIT_HTML = """
+    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+    <script src='https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js'></script>
+    <script>requirejs.config({
+        paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});
+        if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>
+    """
+
+
+def color_to_opencv(color):
+  r, g, b = np.clip(color, 0, 255).astype(int).tolist()
+  return r, g, b
+
+
+def contrasting_color(color):
+  r, g, b = (np.array(color) / 255.).tolist()
+  
+  import colorsys
+  h, s, v = colorsys.rgb_to_hsv(r, g, b)
+  
+  # Pick contrasting hue and lightness
+  h = abs(1. - h)
+  v = abs(1. - v)
+  
+  rgb = 255 * np.array(colorsys.hsv_to_rgb(h, s, v))  
+  return tuple(rgb.astype(int).tolist())
+
+
+def draw_bbox_in_image(np_image, bbox, color=None, label_txt='', thickness=2):
+  """Draw a bounding box in `np_image`.
+
+  Args:
+    np_image (numpy.ndarray): Draw in this image.
+    bbox: A (x1, y1, x2, y2) tuple or a bbox instance.
+    color (tuple): An (r, g, b) tuple specifying the border color; by
+        default use a category-determined color.
+    label_txt (str): Override for the label text drawn for this box.  Prefer
+        `bbox.category_name`, then this category string.  Omit label if 
+        either is empty.
+    thickness (int): thickness of the line in pixels.
+        use the `category` attribute; omit label text if either is empty
+  """
+  import cv2
+  from psegs.datum.bbox2d import BBox2D
+  
+  if not isinstance(bbox, BBox2D):
+    bbox = BBox2D.from_x1_y1_x2_y2(*bbox)
+
+  label_txt = label_txt or bbox.category_name
+  if not color:
+    from oarphpy.plotting import hash_to_rbg
+    color = hash_to_rbg(label_txt)
+
+  x1, y1, x2, y2 = bbox.get_x1_y1_x2_y2()
+
+  ### Draw Box
+  cv2.rectangle(
+    np_image,
+    (x1, y1),
+    (x2, y2),
+    color_to_opencv(color),
+    thickness=thickness)
+
+  ### Draw Text
+  FONT_SCALE = 0.8
+  FONT = cv2.FONT_HERSHEY_PLAIN
+  PADDING = 2 # In pixels
+
+  ret = cv2.getTextSize(label_txt, FONT, fontScale=FONT_SCALE, thickness=1)
+  ((text_width, text_height), _) = ret
+
+  # Draw the label above the bbox by default ...
+  tx1, ty1 = bbox.x, bbox.y - PADDING
+
+  # ... unless the text would draw off the edge of the image ...
+  if ty1 - text_height - PADDING <= 0:
+    ty1 += bbox.height + text_height + 2 * PADDING
+  ty2 = ty1 - text_height - PADDING
+
+  # ... and also shift left if necessary.
+  if tx1 + text_width > np_image.shape[1]:
+    tx1 -= (tx1 + text_width + PADDING - np_image.shape[1])
+  tx2 = tx1 + text_width
+  
+  cv2.rectangle(
+    np_image,
+    (tx1, ty1 + PADDING),
+    (tx2, ty2 - PADDING),
+    color_to_opencv(color),
+    cv2.FILLED)
+
+  text_color = contrasting_color(color)
+  cv2.putText(
+    np_image,
+    label_txt,
+    (tx1, ty1),
+    FONT,
+    FONT_SCALE,
+    color_to_opencv(text_color),
+    1) # thickness
+
+
+def draw_cuboid_xy_in_image(img, pts, base_color_rgb, alpha=0.3, thickness=2):
+  """Draw a cuboid in the given image.  Color the front face lighter than
+  the rest of the cuboid edges to indicate orientation.
+
+  Args:
+    img (np.array): Draw in this image.
+    pts (np.array): An array of 8 by 2 representing pixel locatons (x, y)
+      of the corners of the cuboid.  The first four coordinates are the front
+      face and the last four are the rear face.  The front and back faces
+      can wind in either CW or CCW order (but both must wind in the same order)
+    base_color_rgb (tuple): An (r, g, b) sequence specifying the color of
+      the cuboid, with components in [0, 255]
+    alpha (float): Blend cuboid color into the image using weight [0, 1].
+    thickness (int): line thickness of cuboid edges.
+  """
+
+  # Drawing is expensive! Skip if completely offscreen.
+  h, w = img.shape[:2]
+  idx = np.where(
+    (pts[:, 0] >= 0) & (pts[:, 0] <= w) &
+    (pts[:, 1] >= 0) & (pts[:, 1] <= h))
+  if not idx[0].any():
+    return
+
+  base_color = np.array(base_color_rgb)
+  front_color = color_to_opencv(base_color + 0.6 * 255)
+  back_color = color_to_opencv(base_color - 0.6 * 255)
+  center_color = color_to_opencv(base_color)
+
+  import cv2
+  # OpenCV can't draw transparent colors, so we use the 'overlay image' trick
+  overlay = img.copy()
+
+  def to_opencv_px(arr):
+    return np.rint(arr).astype(int)
+
+  front = to_opencv_px(pts[:4])
+  assert front.shape == (4, 2), "OpenCV requires nx2, have %s" % (front,)
+  cv2.polylines(
+    overlay,
+    [front],
+    True, # is_closed
+    front_color,
+    thickness)
+
+  back = to_opencv_px(pts[4:])
+  assert back.shape == (4, 2), "OpenCV requires nx2, have %s" % (back,)
+  cv2.polylines(
+    overlay,
+    [back],
+    True, # is_closed
+    back_color,
+    thickness)
+  
+  for start, end in zip(front.tolist(), back.tolist()):
+    cv2.line(overlay, tuple(start), tuple(end), center_color, thickness)
+
+  # Add transparent fill
+  CUBOID_FILL_ALPHA = max(0, alpha - 0.1)
+  from scipy.spatial import ConvexHull
+  hull = ConvexHull(pts)
+  corners_uv = to_opencv_px(
+    np.array([
+      (pts[v, 0], pts[v, 1]) for v in hull.vertices]))
+  coverlay = overlay.copy()
+  cv2.fillPoly(coverlay, [corners_uv], center_color)
+  overlay[:] = cv2.addWeighted(
+    coverlay, CUBOID_FILL_ALPHA, overlay, 1 - CUBOID_FILL_ALPHA, 0)
+
+  # Now blend with input image!
+  img[:] = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+
+
+def rgb_for_distance(d_meters, period_meters=10.):
+  """Given a distance `d_meters` or an array of distances, return an
+  `np.array([r, g, b])` color array for the given distance (or a 2D array
+  of colors if the input is an array)).  We choose a distinct hue every
+  `period_meters` and interpolate between hues for `d_meters`.
+  """
+  import colorsys
+  from oarphpy.plotting import hash_to_rbg
+
+  if not isinstance(d_meters, np.ndarray):
+    d_meters = np.array([d_meters])
+  
+  SEED = 1337 # Colors for the first 10 buckets verified to be very distinct
+  base_rgb = hash_to_rbg(SEED)
+  base_h, base_s, base_v = colorsys.rgb_to_hsv(*base_rgb)
+  
+  # colorsys takes Hues in [0, 1] and the colors spaced ~0.5 apart are
+  # complimentary.  We pick a value != 0.5 to create a coloring that is
+  # out-of-phase with the HSV color wheel (ensures distinct colors across
+  # depths)
+  COLOR_STEP = 0.5 + 1. / 12
+  if len(d_meters):
+    max_bucket = int(np.ceil(d_meters.max() / period_meters))
+  else:
+    max_bucket = 1
+  bucket_to_hsv = [
+    (base_h + (bucket * COLOR_STEP % 1.0), base_s, base_v)
+    for bucket in range(max_bucket + 2)
+  ]
+  bucket_to_rgb = [colorsys.hsv_to_rgb(*hsv) for hsv in bucket_to_hsv]
+  bucket_to_color = np.array(bucket_to_rgb)
+
+  # Use numpy's indexing for fast "table lookup" of bucket ids (bids) in
+  # the "table" bucket_to_color
+  bucket_below = np.floor(d_meters / period_meters)
+  bucket_above = bucket_below + 1
+
+  color_below = bucket_to_color[bucket_below.astype(int)]
+  color_above = bucket_to_color[bucket_above.astype(int)]
+
+  # For each distance, interpolate to *nearest* color based on L1 distance
+  d_relative = d_meters / period_meters
+  l1_dist_below = np.abs(d_relative - bucket_below)
+  l1_dist_above = np.abs(d_relative - bucket_above)
+
+  colors = (
+    (1. - l1_dist_below) * color_below.T + 
+    (1. - l1_dist_above) * color_above.T)
+
+  colors = colors.T
+  if len(d_meters) == 1:
+    return colors[0]
+  else:
+    return colors
+
+
+def draw_xy_depth_in_image(
+      img,
+      pts,
+      marker_radius=-1,
+      alpha=.4,
+      period_meters=10.,
+      user_colors=None):
+  """Draw a point cloud `pts` in `img`; *modifies* `img` in-place (so you can 
+  compose this draw call with others). Point color interpolates between
+  standard colors for each `period_meters` tick.  Optionally override this
+  behavior using `user_colors`.
+
+  Args:
+    img (np.array): Draw in this image.
+    pts (np.array): An array of N by 3 points in form
+      (pixel x, pixel y, depth meters).
+    marker_radius (int): Draw a marker with this size (or a non-positive
+      number to auto-choose based upon number of points).
+    alpha (float): Blend point color using weight [0, 1].
+    period_meters (float): Choose a distinct hue every `period_meters` and
+      interpolate between hues.
+    user_colors (np.array): Instead of coloring by distance, use this array
+      of nx3 colors.
+  """
+
+  # OpenCV can't draw transparent colors, so we use the 'overlay image' trick:
+  # First draw dots an an overlay...
+  overlay = img.copy()
+  h, w = overlay.shape[:2]
+
+  if user_colors is not None:
+    # Add color columns
+    pts = np.hstack([pts, user_colors])
+  else:
+    pts = pts.copy()
+  
+  # Map points to pixels and filter off-screen points
+  pts_xy = np.rint(pts[:, :2])
+  pts[:, :2] = pts_xy[:, :2]
+  pts = pts[np.where(
+    (pts[:, 0] >= 0) & (pts[:, 0] < w) &
+    (pts[:, 1] >= 0) & (pts[:, 1] < h) &
+    (pts[:, 2] >= 0))]
+  if not pts.any():
+    return
+
+  # Sort by distance descending; let nearer points draw over farther points
+  pts = pts[-pts[:, 2].argsort()]
+  
+  if user_colors is not None:
+    colors = pts[:, 3:]
+  else:
+    colors = rgb_for_distance(pts[:, 2], period_meters=period_meters)
+  colors = np.clip(colors, 0, 255).astype(int)
+  
+  # Draw the markers! NB: numpy assignment is very fast, even for 1M+ pts
+  yy = pts[:, 1].astype('int32')
+  xx = pts[:, 0].astype('int32')
+  overlay[yy, xx] = colors
+
+  if marker_radius < 0:
+    # Draw larger markers for fewer points (or user_colors) to make points
+    # more conspicuous
+    if user_colors is not None:
+      marker_radius = 3
+    elif pts.shape[0] <= 1e5:
+      marker_radius = 2
+
+  if marker_radius >= 1:
+    # Draw a crosshairs marker
+    for r in range(-marker_radius, marker_radius + 1):
+      overlay[(yy + r) % h, xx] = colors
+      overlay[yy, (xx + r) % w] = colors
+        # NB: toroidal boundary conditions plot hack for speed ...
+
+  # Now blend!
+  import cv2
+  img[:] = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+
+
+def draw_depth_in_image(
+      img,
+      depth_channel,
+      alpha=.4,
+      period_meters=10.):
+  """Draw a `depth_channel` in `img`; *modifies* `img` in-place (so you can 
+  compose this draw call with others). Point color interpolates between
+  standard colors for each `period_meters` tick.  Optionally override this
+  behavior using `user_colors`.
+
+  Args:
+    img (np.array): Draw in this image.
+    depth_channel (np.array): A depth channel of shape (h, w, 1) [or
+      just (h, w)] that matches the size of `img`.  Each value is a depth value
+      in meters.  Invalid values are ignored (drawn with 0 alpha) in the output.
+    alpha (float): Blend point color using weight [0, 1].
+    period_meters (float): Choose a distinct hue every `period_meters` and
+      interpolate between hues.
+  """
+
+  # OpenCV can't draw transparent colors, so we use the 'overlay image' trick:
+  # First draw dots an an overlay...
+  overlay = img.copy()
+  h, w = overlay.shape[:2]
+
+  depth_channel = depth_channel.squeeze().copy()
+  assert depth_channel.shape[:2] == (h, w)
+
+  valid = np.where(
+            (depth_channel > 0) & np.isfinite(depth_channel))
+  if not valid[0].any():
+    return
+  
+  color_d = np.zeros_like(depth_channel)
+  color_d[valid] = depth_channel[valid]
+  color_d = np.reshape(color_d, [-1])
+  colors = rgb_for_distance(color_d, period_meters=period_meters)
+  colors = np.clip(colors, 0, 255).astype(int)
+  colors = np.reshape(colors, [h, w, 3])
+
+  # Retain original color for invalid points
+  overlay[valid] = colors[valid]
+
+  # Now blend!
+  import cv2
+  img[:] = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+
+
+def get_ortho_debug_image(
+      uvd,
+      min_u=0.,  min_v=0.,
+      max_u=10., max_v=10.,
+      pixels_per_meter=100,
+      marker_radius=-1,
+      period_meters=10.,
+      user_colors=None):
+  """Create and return an orthographic debug image for the given cloud of
+  `(u, v, d)` points (in meters) rasterized at `pixels_per_meters`.
+  Useful for visualizing a raw point cloud (or a half-space of one) as a
+  2D image. The parameters (min_u, minv) and (max_u, max_v) define the
+  bounding box of points to plot; provide `None` values to auto-fit to
+  `uvd` extents.
+
+  Args:
+    uvd (np.array): An nx3 array of points (in units of meters) where
+      the first axis (u) is the +u (left-to-right) axis of the debug image,
+      the second axis (v) is the +v (bottom-to-top) axis of the dbeug image,
+      and the third axis (d) is the depth of the point in the half-space
+      (and determines color).
+    min_u (float): The left image boundary (in meters).
+    min_v (float): The bottom image boundary (in meters).
+    max_u (float): The right image boundary (in meters).
+    max_v (float): The top image boundary (in meters).
+    pixels_per_meter (int): Rasterize points at this resolution.
+    recenter_points (bool): Re-center the given points to be the center
+      of the debug image.
+    marker_radius (int): Draw a marker with this size (in pixels).
+    period_meters (float) : Choose a distinct hue every `period_meters` and
+      interpolate between hues.
+    user_colors (np.array): (Optional) instead of coloring by distance, use
+      this array of nx3 colors.
+  Returns:
+    np.array: A HWC RGB debug image.
+  """
+
+  if not uvd.any():
+    if (min_u, min_v, max_u, max_v) != (None, None, None, None):
+      w = int(pixels_per_meter * (max_u - min_u) + 1)
+      h = int(pixels_per_meter * (max_v - min_v) + 1)
+      return np.zeros((h, w, 3), dtype=np.uint8)
+    else:
+      return np.zeros((0, 0, 3), dtype=np.uint8)
+  
+  if min_u is None:
+    min_u = uvd[:, 0].min()
+  if max_u is None:
+    max_u = uvd[:, 0].max()
+  if min_v is None:
+    min_v = uvd[:, 1].min()
+  if max_v is None:
+    max_v = uvd[:, 1].max()
+
+  assert min_u <= max_u
+  assert min_v <= max_v
+
+  # Move points to the viewport frame
+  uvd = uvd - np.array([min_u, min_v, 0])
+  
+  # (u, v) meters -> pixels
+  uvd[:, (0, 1)] *= pixels_per_meter
+  
+  w = int(pixels_per_meter * (max_u - min_u) + 1)
+  h = int(pixels_per_meter * (max_v - min_v) + 1)
+  img = np.zeros((h, w, 3), dtype=np.uint8)
+  
+  draw_xy_depth_in_image(
+    img,
+    uvd,
+    marker_radius=marker_radius,
+    period_meters=period_meters,
+    alpha=1.0,
+    user_colors=user_colors)
+  
+  # image vertical axis is flipped
+  img = np.flipud(img)
+
+  return img
+
+
+def create_matches_debug_line_image(
+      img1,
+      img2,
+      xyxy,
+      max_matches=10_000,
+      radius=4,
+      thickness=1,
+      circle_alpha=0.7,
+      line_alpha=0.2):
+  
+  import cv2
+  from oarphpy.plotting import hash_to_rbg
+
+  debug = np.concatenate([img1, img2], axis=1) # TODO support different sizes
+
+  xyxy = np.array(xyxy).copy()
+  assert len(xyxy.shape) >= 2, xyxy.shape
+  assert xyxy.shape[1] >= 4, xyxy.shape
+  if xyxy.shape[0] > max_matches and max_matches >= 0:
+    idx = np.random.choice(xyxy.shape[0], max_matches, replace=False)
+    xyxy = xyxy[idx]
+
+  line_overlay = debug.copy()
+  circle_overlay = debug.copy()
+  for i, xyxy_i in enumerate(xyxy):
+    x1, y1, x2, y2 = xyxy_i[:4].astype(int)
+    x2 += img1.shape[1]
+    r, g, b = hash_to_rbg(i)
+    cv2.circle(circle_overlay, (x1, y1), radius, (b, g, r), cv2.FILLED)
+    cv2.circle(circle_overlay, (x2, y2), radius, (b, g, r), cv2.FILLED)
+    cv2.line(line_overlay, (x1, y1), (x2, y2), (b, g, r), thickness=thickness)
+  
+  debug = cv2.addWeighted(line_overlay, line_alpha, debug, 1 - line_alpha, 0)
+  debug = cv2.addWeighted(
+    circle_overlay, circle_alpha, debug, 1 - circle_alpha, 0)
+  return debug
+
+
+def draw_colored_2dpts_in_image(
+      img,
+      pts,
+      marker_radius=-1,
+      alpha=.7,
+      user_colors=None):
+  """Similar to `draw_xy_depth_in_image()` but does not use a depth-based
+  color scale.
+
+  Args:
+    img (np.array): Draw in this image.
+    pts (np.array): An array of N by 3 points in form
+      (pixel x, pixel y, depth meters).
+    marker_radius (int): Draw a marker with this size (or a non-positive
+      number to auto-choose based upon number of points).
+    alpha (float): Blend point color using weight [0, 1].
+    user_colors (np.array): Instead of coloring by distance, use this array
+      of nx3 colors.
+  """
+
+  from oarphpy.plotting import hash_to_rbg
+
+  # OpenCV can't draw transparent colors, so we use the 'overlay image' trick:
+  # First draw dots an an overlay...
+  overlay = img.copy()
+  h, w = overlay.shape[:2]
+
+  pts = pts[:, :2].copy()
+
+  # Map points to pixels and filter off-screen points
+  pts = np.rint(pts)
+  pts = pts[np.where(
+    (pts[:, 0] >= 0) & (pts[:, 0] < w) &
+    (pts[:, 1] >= 0) & (pts[:, 1] < h)
+  )]
+  if not pts.any():
+    return
+  
+  if user_colors is not None:
+    colors = user_colors.copy()
+  else:
+    colors = np.array([
+      hash_to_rbg(p) for p in pts
+    ])
+  colors = np.clip(colors, 0, 255).astype(int)
+  
+  # Draw the markers! NB: numpy assignment is very fast, even for 1M+ pts
+  yy = pts[:, 1].astype('int32')
+  xx = pts[:, 0].astype('int32')
+  overlay[yy, xx] = colors
+
+  if marker_radius < 0:
+    # Draw larger markers for fewer points (or user_colors) to make points
+    # more conspicuous
+    if user_colors is not None:
+      marker_radius = 3
+    elif pts.shape[0] <= 1e5:
+      marker_radius = 2
+
+  if marker_radius >= 1:
+    # Draw a crosshairs marker
+    for r in range(-marker_radius, marker_radius + 1):
+      overlay[(yy + r) % h, xx] = colors
+      overlay[yy, (xx + r) % w] = colors
+        # NB: toroidal boundary conditions plot hack for speed ...
+
+  # Now blend!
+  import cv2
+  img[:] = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+
+
+def images_to_html_video(images=[], fps=4, play_on_hover=True):
+  """Given a sequence of HWC numpy images, create and return an HTML video
+  
+  Args:
+    images (List of np.ndarray): A sequence of HWC images (prefer RGB)
+    fps (float): Render video to this frames per second
+    play_on_hover (bool): Make the video autoplay on hover
+  
+  Returns:
+    str: An HTML string with the included video
+  """
+  
+  import base64
+  import tempfile
+  import urllib.parse
+
+  from tqdm.auto import tqdm
+  import imageio
+
+  if not images:
+    return "<i>(No images for video)</i>"
+
+  h, w = images[0].shape[:2]
+
+  # We tried BytesIO but imageio seems to struggle with that and ffmpeg
+  with tempfile.NamedTemporaryFile(suffix='psegs_html_video.mp4', delete=False) as f:
+    
+    # Use pyav because ffmpeg sometimes hangs / blocks forever on frame write
+    with imageio.imopen(f.name, "w", plugin="pyav") as iiow:
+      iiow.init_video_stream(
+        "libx264", fps=fps, max_keyframe_interval=1, force_keyframes=True)
+      for img in tqdm(images, desc=f"Saving debug video to {f.name}"):
+        iiow.write_frame(img)
+
+    video_bytes = open(f.name, 'rb').read()
+
+  encoded = base64.b64encode(video_bytes)
+  html_data = urllib.parse.quote(encoded.decode('ascii'))
+
+  attrs = ''
+  if play_on_hover:
+    attrs = """
+        onmouseover="this.play()" onmouseout="this.pause();this.currentTime=0;"
+      """.strip()
+  
+  html = """
+    <video width="{w}", height="{h}" controls {attrs}>
+    <source type="video/mp4" src="data:video/mp4;base64,{html_data}" />
+    </video>
+  """.format(
+        h=h, w=w,
+        attrs=attrs,
+        html_data=html_data)
+  return html
+
+
+def sample_to_html(
+        spark,
+        sample,
+        include_videos=True,
+        videos_n_frames=50,
+        video_fps=4,
+        video_target_height=400,
+        rgbd_depth_to_clouds=True,
+        include_cloud_viz=True,
+        clouds_n_clouds=50,
+        clouds_n_pts_per_plot=50000,
+        cloud_include_cam_poses=True,
+        cloud_n_cam_poses=50,
+        matches_n_examples=10,
+        points2d_n_examples=10):
+  
+  # ..... todo rename sdt to html ?
+
+  from psegs import datum
+  from psegs import table
+  from psegs import util
+
+  # Ensure we have a Spark Dataframe
+  if isinstance(sample, datum.Sample):
+    sdt = table.StampedDatumTable.from_sample(sample)
+    sd_df = sdt.to_spark_df(spark=spark)
+  elif isinstance(sample, table.StampedDatumTable):
+    sd_df = sample.to_spark_df(spark=spark)
+  elif hasattr(sample, '_jrdd'):
+    sdt = table.StampedDatumTable.from_datum_rdd(sample, spark=spark)
+    sd_df = sdt.to_spark_df(spark=spark)
+  elif hasattr(sample, 'rdd'):
+    # Probably is already a Spark Dataframe
+    sd_df = sample
+  else:
+    raise ValueError("Don't know what to do with %s" % (sample,))
+
+  sd_df = sd_df.repartition('uri.timestamp')
+  sd_df = sd_df.persist()
+  sd_df.createOrReplaceTempView('sd_df')
+  reports = []
+
+  util.log.info("Rendering summaries for %s datums ..." % sd_df.count())
+
+  def _get_table_html(sql):
+    res = spark.sql(sql)
+    pdf = res.toPandas()
+    util.log.info('Table: \n%s\n' % str(pdf))
+    try:
+      # Pandas < 2
+      try:
+        pdf.style.set_precision(2)
+        pdf.style.hide_index()
+      except Exception:
+        pass
+      
+      # Pandas >=2
+      try:
+        pdf.style.format(precision=2)
+        pdf.style.hide()
+      except Exception:
+        pass
+    except Exception:
+      pass
+
+    css = """
+        <style type="text/css" >
+          table {
+            border: none;
+            border-collapse: collapse;
+            border-spacing: 0;
+            color: black;
+            font-size: 14px;
+            font-family: "Monaco", monospace;
+            table-layout: fixed;
+          }
+          thead {
+            border-bottom: 1px solid black;
+            vertical-align: bottom;
+          }
+          tr, th, td {
+            text-align: right;
+            vertical-align: middle;
+            padding: 0.5em 0.5em;
+            line-height: normal;
+            white-space: normal;
+            max-width: none;
+            border: none;
+          }
+          th {
+            font-weight: bold;
+          }
+          tbody tr:nth-child(odd) {
+            background: #f5f5f5;
+          }
+          tbody tr:hover {
+            background: rgba(66, 165, 245, 0.2);
+          }
+        </style>
+    """
+    return css + pdf.to_html()
+
+  ## Summary ###############################################################
+  html = _get_table_html("""
+          SELECT 
+            segment_id,
+            dataset,
+            split,
+            n AS total_datums,
+            1e-9 * duration_ns AS duration_sec,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.dataset) AS dataset,
+                      FIRST(uri.split) AS split,
+                      FIRST(uri.segment_id) AS segment_id,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  GROUP BY (uri.dataset, uri.split, uri.segment_id)
+              )
+      """)
+  reports.append({'section': 'Sample', 'html': html})
+
+  html = _get_table_html("""
+          SELECT 
+            FIRST(uri.dataset) AS dataset,
+            FIRST(uri.split) AS dataset,
+            FIRST(uri.segment_id) AS segment_id,
+            COUNT(*) n
+          FROM sd_df
+          GROUP BY uri.dataset, uri.split, uri.segment_id
+          ORDER BY uri.dataset, uri.split, uri.segment_id
+      """)
+  reports.append({'section': 'Dataset Details', 'html': html})
+
+
+  ## CameraImages ##########################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            width,
+            height,
+            channel_names,
+            uncompressed_MBytes,
+            uncompressed_MBytes / (1e-9 * duration_ns) AS uncompressed_MBps,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      FIRST(camera_image.width) AS width,
+                      FIRST(camera_image.height) AS height,
+                      FIRST(camera_image.channel_names) AS channel_names,
+                      1e-6 * FIRST(camera_image.width * camera_image.height * 3) AS uncompressed_MBytes,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE camera_image IS NOT NULL
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'CameraImages', 'html': html})
+
+
+  ## PointClouds ###########################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            cloud_colnames,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      FIRST(point_cloud.cloud_colnames) AS cloud_colnames,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE point_cloud IS NOT NULL
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'PointClouds', 'html': html})
+  
+
+  ## Transforms ############################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            xform,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      FIRST(CONCAT(transform.src_frame, '->', transform.dest_frame)) AS xform,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE transform IS NOT NULL
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'Transforms', 'html': html})
+  
+
+  ## Cuboids ###############################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE SIZE(cuboids) > 0
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'Cuboids', 'html': html})
+
+  ## Matches ###############################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            matcher_name,
+            matches_colnames,
+            img1_width,
+            img1_height,
+            img2_width,
+            img2_height,
+            uncompressed_MBytes,
+            uncompressed_MBytes / (1e-9 * duration_ns) AS uncompressed_MBps,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      FIRST(matched_pair.matcher_name) AS matcher_name,
+                      FIRST(matched_pair.matches_colnames) AS matches_colnames,
+                      FIRST(matched_pair.img1.width) AS img1_width,
+                      FIRST(matched_pair.img1.height) AS img1_height,
+                      FIRST(matched_pair.img2.width) AS img2_width,
+                      FIRST(matched_pair.img2.height) AS img2_height,
+                      1e-6 * (
+                        FIRST(
+                          matched_pair.img1.width * matched_pair.img1.height * 3)
+                          +
+                        FIRST(
+                          matched_pair.img2.width * matched_pair.img2.height * 3)
+                      )
+                             AS uncompressed_MBytes,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE matched_pair IS NOT NULL
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'MatchedPairs', 'html': html})
+
+  ## Points2D ##############################################################
+  html = _get_table_html("""
+          SELECT 
+            topic,
+            n,
+            (n / (1e-9 * duration_ns)) AS Hz,
+            1e-9 * duration_ns AS duration_sec,
+            annotator_name,
+            points_colnames,
+            img_width,
+            img_height,
+            FROM_UNIXTIME(start * 1e-9) AS start,
+            FROM_UNIXTIME(end * 1e-9) AS end
+
+          FROM 
+              (
+                  SELECT
+                      FIRST(uri.topic) AS topic,
+                      MIN(uri.timestamp) AS start,
+                      MAX(uri.timestamp) AS end,
+                      MAX(uri.timestamp) - MIN(uri.timestamp) AS duration_ns,
+                      FIRST(points_2d.annotator_name) AS annotator_name,
+                      FIRST(points_2d.points_colnames) AS points_colnames,
+                      FIRST(points_2d.img.width) AS img_width,
+                      FIRST(points_2d.img.height) AS img_height,
+                      COUNT(*) AS n
+                  FROM sd_df
+                  WHERE points_2d IS NOT NULL
+                  GROUP BY uri.topic
+              )
+          ORDER BY topic
+      """)
+  reports.append({'section': 'Points2D', 'html': html})
+
+  # Find depth topics for later
+  rows = spark.sql("""
+              SELECT
+                FIRST(uri.topic) AS topic,
+                FIRST(camera_image.channel_names) AS channel_names
+              FROM sd_df
+              WHERE camera_image IS NOT NULL
+              GROUP BY uri.topic
+            """).collect()
+  depth_camera_topics = set([
+    r.topic
+    for r in rows
+    if 'depth' in r.channel_names
+  ])
+
+  ## Videos ################################################################
+  if include_videos:
+    topic_htmls = []
+
+    camera_topics = spark.sql(
+      "SELECT DISTINCT uri.topic FROM sd_df WHERE camera_image IS NOT NULL")
+    camera_topics = sorted(r.topic for r in camera_topics.collect())
+    for topic in camera_topics:
+      util.log.info("... rendering video for %s ..." % topic)
+      sample_sd_df = spark.sql("""
+                        SELECT *
+                        FROM sd_df
+                        WHERE uri.topic == '{topic}'
+                        ORDER BY RAND(1337)
+                        LIMIT {videos_n_frames}
+                    """.format(topic=topic, videos_n_frames=videos_n_frames))
+      sample_sd_df = sample_sd_df.repartition('uri.timestamp')
+
+      ##################################################
+      ## We want to adapt period_meters for depth topics
+      period_meters = 10.
+      if topic in depth_camera_topics:
+        def _get_depth_90th(row):
+          ci = table.StampedDatumTable.sd_from_row(row.camera_image)
+          depth = ci.get_depth()
+          if depth is None:
+              return 0
+          else:
+              return np.percentile(depth[depth > 0], 0.9)
+        depth_top_90th = sample_sd_df.rdd.map(_get_depth_90th).max()
+        if depth_top_90th <= 0.1:
+            period_meters = 0.005
+        elif depth_top_90th <= 1.0:
+            period_meters = 0.05
+        elif depth_top_90th <= 10.0:
+            period_meters = 0.5
+        else:
+            period_meters = 10.
+      
+      ##################################################
+      ## Now render video
+      def _to_t_debug_image(row, video_target_height=400):
+        import cv2
+        
+        # Pad the height a little to make ffmpeg most efficient
+        # (and complain less)
+        if video_target_height % 16 != 0:
+          video_target_height += 16 - (video_target_height % 16)
+        
+        ci = table.StampedDatumTable.sd_from_row(row.camera_image)
+        image = ci.get_debug_image(period_meters=period_meters)
+        aspect = float(ci.width) / float(ci.height)
+        target_width = int(aspect * video_target_height)
+        
+        # Pad the width a little to make ffmpeg most efficient
+        # (and complain less)
+        if target_width % 16 != 0:
+            target_width += 16 - (target_width % 16)
+        image = cv2.resize(image, (target_width, video_target_height))
+
+        return row.uri.timestamp, image
+
+      if sample_sd_df.rdd.isEmpty():
+        html = "<i>No images to viz</i>"
+      else:
+        iter_t_image = sample_sd_df.rdd.map(
+          lambda r: _to_t_debug_image(
+            r, video_target_height=video_target_height)).collect()
+        images = [
+          image
+          for t, image in sorted(iter_t_image, key=lambda ti: ti[0])
+        ]
+
+        # *Global* re-scale for depth debug coloring
+        if topic in depth_camera_topics:
+          im_min = min(i.min() for i in images)
+          im_max = min(i.max() for i in images)
+          images = [
+            (255 * 
+              (i.astype('float') - im_min) / (im_max - im_min)).astype('uint8')
+            for i in images
+          ]
+        
+        html = images_to_html_video(images, fps=video_fps)
+      if topic in depth_camera_topics:
+        footer = """
+          <br/><i>Depth coloring with {period_meters}-meter hue period</i><br/>
+          """.format(period_meters=period_meters)
+        html = html + footer
+      
+      topic_htmls.append((topic, html))
+    
+    def _to_section_html(info):
+      topic, vhtml = info
+      html = """
+        <div id="video-{topic}" style="padding: 10px; background-color: #ccc">
+          <h3>{topic}</h3><br/>
+          {vhtml}
+        </div>
+        <br/><br/>
+      """.format(topic=topic, vhtml=vhtml)
+      return html
+
+    section_html = ''.join(_to_section_html(i) for i in topic_htmls)
+    reports.append({'section': 'Videos', 'html': section_html})
+
+  
+  ## Clouds ################################################################
+  need_plotly_init = False
+  if include_cloud_viz:
+    import pandas as pd
+    import plotly.graph_objects as go
+    
+    need_plotly_init = True
+
+    topic_htmls = []
+
+    pc_topics = spark.sql(
+      "SELECT DISTINCT uri.topic FROM sd_df WHERE point_cloud IS NOT NULL")
+    pc_topics = sorted(r.topic for r in pc_topics.collect())
+    if rgbd_depth_to_clouds:
+      pc_topics += sorted(depth_camera_topics)
+    
+    for topic in pc_topics:
+      util.log.info("... rendering point cloud viz for %s ..." % topic)
+
+      ##################################################
+      ## Get the clouds to viz
+      orig_sample_sd_df = spark.sql("""
+                                SELECT *
+                                FROM sd_df
+                                WHERE uri.topic == '{topic}'
+                                ORDER BY RAND(1337)
+                                LIMIT {clouds_n_clouds}
+                            """.format(
+                              topic=topic, clouds_n_clouds=clouds_n_clouds))
+      orig_sample_sd_df = orig_sample_sd_df.repartition('uri.timestamp')
+
+      if topic in depth_camera_topics:
+        def _make_pc(row):
+          sd = table.StampedDatumTable.sd_from_row(row)
+          pc = sd.camera_image.depth_image_to_point_cloud()
+          sd.camera_image = None
+          sd.point_cloud = pc
+          return sd
+        sd_rdd = orig_sample_sd_df.rdd.map(_make_pc)
+        sdt = table.StampedDatumTable.from_datum_rdd(sd_rdd, spark=spark)
+        sample_sd_df = sdt.to_spark_df(spark=spark)
+        sample_sd_df = sample_sd_df.persist()
+      else:
+        sample_sd_df = orig_sample_sd_df
+      
+      def _get_t_cloud_world(row):
+        pc = table.StampedDatumTable.sd_from_row(row.point_cloud)
+        cloud = pc.get_cloud()
+        cloud = cloud[:, :3]
+        T_world_from_ego = pc.ego_pose['ego', 'world']
+        cloud_world = T_world_from_ego.apply(cloud).T
+        return row.uri.timestamp, cloud_world
+      
+      ##################################################
+      ## Package clouds for plotly
+      total_n_world = 0
+      cloud_t_worlds = sample_sd_df.rdd.map(_get_t_cloud_world).collect()
+      cloud_worlds = [
+        c for t, c in sorted(cloud_t_worlds, key=lambda tc: tc[0])
+      ]
+      cloud_df = None
+      pts_per_cloud = int(clouds_n_pts_per_plot / len(cloud_worlds))
+      for i, cloud in enumerate(cloud_worlds):
+        total_n_world  += cloud.shape[0]
+        color = int(255 * (float(i) / len(cloud_worlds)))
+        if cloud.shape[0] > pts_per_cloud:
+          idx = np.random.choice(
+                  np.arange(cloud.shape[0]), pts_per_cloud)
+          cloud = cloud[idx, :]
+        cur_df = pd.DataFrame(cloud, columns=['x', 'y', 'z'])
+        cur_df['color'] = 'rgb(%s, %s, %s)' % (color, color, color)
+        if cloud_df is None:
+          cloud_df = cur_df
+        else:
+          cloud_df = pd.concat([cloud_df, cur_df])
+      
+      ##################################################
+      ## Create plots
+      plots = []
+      scatter = go.Scatter3d(
+                x=cloud_df['x'], y=cloud_df['y'], z=cloud_df['z'],
+                mode='markers',
+                marker=dict(size=2, color=cloud_df['color'], opacity=0.5),)
+      plots.append(scatter)
+
+      ##################################################
+      ## Add pose plots if needed
+      if cloud_include_cam_poses:
+        util.log.info("... adding camera poses for %s ..." % topic)
+        if topic in depth_camera_topics:
+          ci_sd_df = orig_sample_sd_df
+        else:
+          ci_sd_df = spark.sql("""
+                          SELECT *
+                          FROM sd_df
+                          WHERE camera_image IS NOT NULL
+                          ORDER BY RAND(1337)
+                          LIMIT {limit}
+                      """.format(limit=cloud_n_cam_poses))
+        def _get_t_ci(row):
+          ci = table.StampedDatumTable.sd_from_row(row.camera_image)
+          return row.uri.timestamp, ci
+        t_cis = ci_sd_df.rdd.map(_get_t_ci).collect()
+        cis = [ci for t, ci in sorted(t_cis, key=lambda tc: tc[0])]
+
+        plots += [ci.to_plotly_world_frame_3d() for ci in cis]
+
+      ##################################################
+      ## Render to HTML
+      fig = go.Figure(data=plots)
+      fig.update_layout(
+        width=1000, height=700,
+        scene_aspectmode='data')
+      footer = """
+            <i>Showing {sample} of {total} points from {n} clouds</i>
+            """.format(
+                  sample=len(cloud_df),
+                  total=total_n_world,
+                  n=len(cloud_worlds))
+      html = (
+        fig.to_html(include_plotlyjs=True, full_html=False) + '<br/><br/>' + 
+        footer)     
+      topic_htmls.append((topic, html))
+    
+    def _to_section_html(info):
+      topic, pchtml = info
+      html = """
+        <h3>{topic}</h3><br/><br/>
+        {pchtml}
+        <br/><br/>
+      """.format(topic=topic, pchtml=pchtml)
+      return html
+
+    section_html = ''.join(_to_section_html(i) for i in topic_htmls)
+    reports.append({'section': 'Point Clouds', 'html': section_html})
+
+  ## Matches ###############################################################
+  if matches_n_examples > 0:
+    sample_sd_df = spark.sql("""
+                        SELECT *
+                        FROM sd_df
+                        WHERE matched_pair IS NOT NULL
+                        ORDER BY RAND(1337)
+                        LIMIT {matches_n_examples}
+                    """.format(matches_n_examples=matches_n_examples))
+    sample_sd_df = sample_sd_df.repartition('uri.timestamp')
+
+    util.log.info(
+      "... rendering MatchedPair viz for %s pairs ..." % sample_sd_df.count())
+
+    def _row_to_mp_debug_image(row):
+      sd = table.StampedDatumTable.sd_from_row(row)
+      mp = sd.matched_pair
+      debug = mp.get_debug_line_image()
+      
+      from oarphpy.plotting import img_to_img_tag
+      debug_img_html = img_to_img_tag(debug, format='jpg')
+
+      html = "<b>%s</b></br>%s<br/><br/>" % (sd.uri, debug_img_html)
+      return html
+    mp_htmls = sample_sd_df.rdd.map(_row_to_mp_debug_image).collect()
+
+    section_html = ''.join(mp_htmls)
+    reports.append({'section': 'MatchedPair Viz', 'html': section_html})
+
+  ## Points2D ##############################################################
+  if points2d_n_examples > 0:
+    sample_sd_df = spark.sql("""
+                        SELECT *
+                        FROM sd_df
+                        WHERE points_2d IS NOT NULL
+                        ORDER BY RAND(1337)
+                        LIMIT {points2d_n_examples}
+                    """.format(points2d_n_examples=points2d_n_examples))
+    sample_sd_df = sample_sd_df.repartition('uri.timestamp')
+
+    def _row_to_p2d_debug_image(row):
+      sd = table.StampedDatumTable.sd_from_row(row)
+      p2d = sd.points_2d
+      debug = p2d.get_debug_points_image()
+      
+      from oarphpy.plotting import img_to_img_tag
+      debug_img_html = img_to_img_tag(debug, format='jpg')
+
+      html = """
+      <div id="p2d-{sd.uri}" style="padding: 10px; background-color: #eee">
+        <b>%s</b></br>
+        %s
+        <br/><br/>
+      </div>
+      """ % (sd.uri, debug_img_html)
+      return html
+    p2d_htmls = sample_sd_df.rdd.map(_row_to_p2d_debug_image).collect()
+
+    section_html = ''.join(p2d_htmls)
+    reports.append({'section': 'Points2D Viz', 'html': section_html})
+
+
+  ## Generate full report!
+  util.log.info(
+    "... have reports %s ..." % ([i['section'] for i in reports],))
+
+  def report_info_to_html(info):
+    section = info['section']
+    content = info['html']
+
+    html = """
+      <hr/>
+      <h2>{section}</h2><br/><br/>
+
+      {content}
+
+      <br/><br/>
+    """.format(section=section, content=content)
+    return html
+
+  full_html = "".join(report_info_to_html(info) for info in reports)
+  if need_plotly_init:
+    full_html = PLOTLY_INIT_HTML + '<br />' + full_html
+
+  util.log.info(
+    "... completed report is %.2f MBytes ..." % (1e-6 * len(full_html)))
+  return full_html
+
+
+if __name__ == '__main__':
+  import sys
+  sys.path.append('/opt/psegs')
+
+  import os
+  lidar_root = '/outer_root/media/970-evo-plus-raid0/lidarphone_lidar_scans/'
+  # scan_dirs = [
+  #   os.path.join(lidar_root, f)
+  #   for f in os.listdir(lidar_root)
+  #   if os.path.isdir(os.path.join(lidar_root, f))
+  # ]
+  # scan_dirs = sorted(scan_dirs)
+
+  from psegs.datasets import ios_lidar
+  from psegs.spark import Spark
+
+  class F(ios_lidar.Fixtures):
+    @classmethod
+    def threeDScannerApp_data_root(cls):
+      return lidar_root
+
+  T = ios_lidar.IOSLidarSDTable
+  T.FIXTURES = F
+
+  seg_uris = T.get_all_segment_uris()
+  import pprint
+  # pprint.pprint(seg_uris)
+  with Spark.sess() as spark:
+    for suri in seg_uris:
+
+      if suri.segment_id == 'Untitled Scan':
+        print('fixme', suri)
+        continue
+      if suri.segment_id in ('amiot-crow-bar', 'headlands-downhill-2'):
+        print("fixme", 'amiot-crow-bar')
+        continue
+
+
+      pprint.pprint('working on')
+      pprint.pprint(suri)
+      sd_df = T.as_df(spark, force_compute=True, only_segments=[suri])
+      if not sd_df:
+        print('no df!', suri)
+        continue
+    
+      outpath = os.path.join(lidar_root, suri.segment_id + '.html')
+
+      # import plotly.graph_objects as go
+      # import pandas as pd
+      # cloud_df = pd.DataFrame(np.ones((100, 3)), columns=['x', 'y', 'z'])
+      # color = 128
+      # cloud_df['color'] = 'rgb(%s, %s, %s)' % (color, color, color)
+      
+      # plots = []
+      # scatter = go.Scatter3d(
+      #           x=cloud_df['x'], y=cloud_df['y'], z=cloud_df['z'],
+      #           mode='markers',
+      #           marker=dict(size=2, color=cloud_df['color'], opacity=0.5),)
+      # plots.append(scatter)
+
+      # fig = go.Figure(data=plots)
+      # fig.update_layout(
+      #   width=1000, height=700,
+      #   scene_aspectmode='data')
+      # footer = """
+      #       <i>asdgasgs</i>
+      #       """
+      # html = (
+      #   fig.to_html(include_plotlyjs=True, full_html=False) + '<br/><br/>' + 
+      #   footer)     
+
+
+
+      html = sample_to_html(spark, sd_df)
+      with open(outpath, 'w') as f:
+        f.write(html)
+
+      print('saved', outpath)
+      print(suri)
diff --git a/psegs/util/video.py b/psegs/util/video.py
new file mode 100644
index 0000000..e587ae6
--- /dev/null
+++ b/psegs/util/video.py
@@ -0,0 +1,220 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import attr
+from pathlib import Path
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class VideoMeta(object):
+  video_uri = attr.ib(default='')
+
+  start_time_nanostamp = attr.ib(default=0)
+  
+  # Note: A video can have dropped frames and/or invalid metadata, so these
+  # could disagree with the actual extracted frames (especially `n_frames`)
+  n_frames = attr.ib(default=0)
+  frames_per_second = attr.ib(default=0.0)
+  end_time_nanostamp = attr.ib(default=0)
+  height = attr.ib(default=0)
+  width = attr.ib(default=0)
+  
+  is_10bit_hdr = attr.ib(default=False)
+
+  lstat_nanostamp = attr.ib(default=0)
+  lstat_attr = attr.ib(default='st_mtime')
+
+  @classmethod
+  def create_for_video(cls, video_uri, lstat_attr='st_mtime', prefer_ffmpeg=True):
+    import imageio
+    r = imageio.get_reader(video_uri)
+    n_frames = r.get_meta_data()['nframes']
+    if n_frames == float('inf'):
+      # For some python / imageio versions, you have to use this API:
+      n_frames = r.count_frames()
+      if n_frames == float('inf'):
+        # TODO: use ffmpeg directly?
+        raise ValueError(
+          "Don't currently support infinite streams: %s %s" % (
+            r.get_meta_data(), video_uri))
+    
+    fps = r.get_meta_data()['fps']
+    h, w = r.get_data(0).shape[:2]
+
+    lstat_res = Path(video_uri).lstat()
+    start_time_sec = getattr(lstat_res, lstat_attr)
+    lstat_nanostamp = int(1e9 * start_time_sec)
+    start_time_nanostamp = lstat_nanostamp
+    
+    duration_sec = r.get_meta_data().get('duration', 0.0)
+    end_time_nanostamp = start_time_nanostamp + duration_sec * 1e9
+
+    is_10bit_hdr = False
+
+    vid_dict = maybe_get_ffmpeg_meta(video_uri)
+    if vid_dict:
+      if prefer_ffmpeg:
+        # imageio and ffmpeg can sometimes disagree on e.g. frame count :(
+        for stream in vid_dict.get('streams', []):
+          if stream.get('codec_type') == 'video':
+            n_frames = int(stream.get('nb_frames', n_frames))
+            
+            def _to_float(fraction_str):
+              try:
+                n, d = fraction_str.split('/')
+                return float(n) / float(d)
+              except ValueError:
+                try:
+                  return float(fraction_str)
+                except Exception:
+                  return None
+            
+            ffmpeg_fps = _to_float(stream.get('avg_frame_rate'))
+            fps = ffmpeg_fps or fps
+
+            duration_sec_str = stream.get('duration', '')
+            if duration_sec_str:
+              duration_sec = None
+              try:
+                duration_sec = float(duration_sec_str)
+              except:
+                pass
+              if duration_sec is not None:
+                end_time_nanostamp = start_time_nanostamp + duration_sec * 1e9
+
+            break
+
+        ffmpeg_start_time_nanostamp = ffmpeg_meta_maybe_get_start_time(vid_dict)
+        if ffmpeg_start_time_nanostamp is not None and prefer_ffmpeg:
+          start_time_nanostamp = ffmpeg_start_time_nanostamp
+      
+      is_10bit_hdr = (
+        ffmpeg_meta_maybe_get_is_10bit_hdr(vid_dict) or is_10bit_hdr)
+      
+
+      # TODO try to read:
+      # * com.apple.quicktime.location.ISO6709
+      # * Core Media Metadata
+
+    return cls(
+            video_uri=video_uri,
+            start_time_nanostamp=start_time_nanostamp,
+            frames_per_second=fps,
+            n_frames=n_frames,
+            end_time_nanostamp=end_time_nanostamp,
+            height=h,
+            width=w,
+            is_10bit_hdr=is_10bit_hdr,
+            lstat_nanostamp=lstat_nanostamp,
+            lstat_attr=lstat_attr)
+
+
+def maybe_get_ffmpeg_meta(video_uri):
+  import ffmpeg
+  assert hasattr(ffmpeg, 'probe'), \
+    f"probe() function missing, do you have a bad install of `ffmpeg-python`? {ffmpeg.__file__}"
+
+  vid_dict = None
+  try:
+    vid_dict = ffmpeg.probe(video_uri)
+  except Exception as e:
+    # TODO maybe just log as warning?
+    pass
+
+  return vid_dict
+
+def ffmpeg_meta_maybe_get_start_time(vid_dict):
+  import dateparser
+
+  date_raw_str = (
+    vid_dict['format']['tags'].get('com.apple.quicktime.creationdate') or
+    vid_dict['format']['tags'].get('creation_time') or
+    '')
+  
+  if not date_raw_str:
+    return None
+
+  try:
+    return dateparser.parse(date_raw_str).timestamp()
+  except Exception as e:
+    return None
+  
+def ffmpeg_meta_maybe_get_is_10bit_hdr(vid_dict):
+  for s in vid_dict.get('streams', []):
+    pix_fmt = s.get('pix_fmt', '')
+    if pix_fmt.endswith('p10le'):
+      return True
+  
+  return False
+
+
+@attr.s(slots=True, eq=True, weakref_slot=False)
+class VideoExplodeParams(object):
+
+  max_hw = attr.ib(default=-1)
+
+  image_file_extension = attr.ib(default='png')
+
+  jpeg_quality_percent = attr.ib(default=100)
+
+  # Extract only the first `n_frames`
+  n_frames = attr.ib(default=-1)
+
+
+def ffmpeg_explode(params, video_uri, dest_root):
+  import math
+  from oarphpy import util as oputil
+  
+  try:
+    oputil.run_cmd("ffmpeg -h")
+  except Exception as e:
+    raise ValueError(f"This functionality requires system ffmpeg, got {e}")
+
+  video_path = Path(video_uri).resolve()
+
+  rescale_arg = ''
+  if params.max_hw >= 0:
+    rescale_arg = (
+      f"-vf 'scale=if(gte(iw\,ih)\,min({params.max_hw}\,iw)\,-2):if(lt(iw\,ih)\,min({params.max_hw}\,ih)\,-2)' "
+    )
+  qscale_arg = ''
+  if params.image_file_extension == 'jpg':
+    # ffmpeg jpeg quality is from 2 to 31 with 2 highest
+    jpeg_quality = 2 + (31 - 2) * (1. - (params.jpeg_quality_percent / 100.))
+    qscale_arg = f" -qscale {jpeg_quality} "
+
+  vframes_arg = ''
+  zfill = 6
+  if params.n_frames >= 0:
+    zfill = int(math.log10(params.n_frames)) + 1
+    vframes_arg = f" -vframes {params.n_frames} "
+
+  FFMPEG_CMD = f"""
+    cd "{dest_root}" && \
+    ffmpeg \
+      -y -v quiet -stats \
+      -noautorotate \
+      -i {video_path} \
+      {vframes_arg} \
+      {rescale_arg} \
+      -vsync 0 \
+      {qscale_arg} \
+        ffmpeg_explode_frame_%0{zfill}d.{params.image_file_extension}
+  """
+  oputil.run_cmd(FFMPEG_CMD)
+
+  paths = sorted(
+            oputil.all_files_recursive(
+              dest_root, 
+              pattern='ffmpeg_explode_frame_*'))
+  return paths
diff --git a/psegs/xform/__init__.py b/psegs/xform/__init__.py
new file mode 100644
index 0000000..d50fe34
--- /dev/null
+++ b/psegs/xform/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.xform.jobutil import configure_arg_parser
+from psegs.xform.jobutil import get_matching_seg_uris
+from psegs.xform.jobutil import get_partition_path
+from psegs.xform.jobutil import get_partition_paths
+from psegs.xform.jobutil import get_segment_tables_for_uris
+from psegs.xform.jobutil import run_standard_actions
diff --git a/psegs/xform/charuco.py b/psegs/xform/charuco.py
new file mode 100644
index 0000000..0e455c6
--- /dev/null
+++ b/psegs/xform/charuco.py
@@ -0,0 +1,1049 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+## For Charuco board pattern generation see:
+##  * https://github.com/opencv/opencv_contrib/tree/a26f71313009c93d105151094436eecd4a0990ed/modules/aruco/misc/pattern_generator 
+##  * https://calib.io/pages/camera-calibration-pattern-generator 
+
+from typing import List, Dict
+
+from cmath import isfinite
+import attr
+import numpy as np
+
+from psegs import util
+from psegs import datum
+
+# def from_cv2_import_aruco():
+#   try:
+#     from cv2 import aruco
+#     return aruco
+#   except ImportError as e:
+#     raise ValueError(
+#       f"This feature requires opencv-contrib-python>=4.5.5.62, error: {e}")
+
+@attr.s()
+class CharucoBoardParams(object):
+  dict_key = attr.ib(default='DICT_6X6_1000', type='str')
+  cols = attr.ib(default=11, type='int')
+  rows = attr.ib(default=8, type='int')
+  square_length_meters = attr.ib(default=0.022, type='float')
+  marker_length_meters = attr.ib(default=0.017, type='float')
+  
+  # Important!!
+  # https://github.com/opencv/opencv/issues/23873#issuecomment-1620504453
+  is_legacy_pattern = attr.ib(default=True, type='bool')
+
+  # dict_key = attr.ib(default='DICT_5X5_1000', type='str')
+  # cols = attr.ib(default=4, type='int')
+  # rows = attr.ib(default=4, type='int')
+  # square_length_meters = attr.ib(default=0.040, type='float')
+  # marker_length_meters = attr.ib(default=0.031, type='float')
+
+
+
+
+  # def create_aruco_dict(self):
+  #   # aruco = from_cv2_import_aruco()
+  #   from cv2 import aruco
+
+  #   # TODO perhaps support custom_dictionary()
+  #   flag = getattr(aruco, self.dict_key, None)
+  #   if flag is None:
+  #     valid_flags = sorted(k for k in dir(aruco) if k.startswith('DICT_'))
+  #     raise ValueError(
+  #       f"Requested {self.dict_key} but only support {valid_flags}")
+
+  #   try:
+  #     aruco_dict = aruco.Dictionary_get(flag)
+  #   except AttributeError:
+  #     aruco_dict = aruco.getPredefinedDictionary(flag)
+  #   return aruco_dict
+
+  # def create_board_and_dict(self):
+  #   # aruco = from_cv2_import_aruco()
+  #   from cv2 import aruco
+    
+  #   aruco_dict = self.create_aruco_dict()
+
+  #   if hasattr(aruco, 'CharucoBoard_create'):
+  #     board = aruco.CharucoBoard_create(
+  #             squaresX=self.cols,
+  #             squaresY=self.rows,
+  #             squareLength=self.square_length_meters,
+  #             markerLength=self.marker_length_meters,
+  #             dictionary=aruco_dict)
+  #   else:
+  #     board = aruco.CharucoBoard(
+  #       (self.cols, self.rows),
+  #       squareLength=self.square_length_meters,
+  #       markerLength=self.marker_length_meters,
+  #       dictionary=aruco_dict)
+  #   return board, aruco_dict
+
+  # def create_board_image(self, height_pixels=2000, width_pixels=1000):
+  #   board, _ = self.create_board_and_dict()
+  #   img = board.draw((width_pixels, height_pixels))
+  #   return img
+
+  # def detect_board_legacy(self, img_gray, K=None, dist_coeffs=None, refine=False):
+  #   """
+  #   TODO
+  #   refine - Tutorials says do not use when detecting Charuco boards because
+  #     it can confuse marker corners with checkerboard corners; use this
+  #     feature for detecting markers in the wild.
+  #     https://docs.opencv.org/3.4/df/d4a/tutorial_charuco_detection.html
+  #   """
+  #   # aruco = from_cv2_import_aruco()
+  #   # import cv2
+  #   from cv2 import aruco
+
+  #   # from packaging import version
+  #   # assert version.parse(cv2.__version__) >= version.parse('4.8.1'), (
+  #   #   "Required cv2 version >= 4.8.1 b/c the aruco impl has changed "
+  #   #   "dramatically")
+     
+    
+  #   board, aruco_dict = self.create_board_and_dict()
+
+  #   if hasattr(aruco, 'CharucoDetector'):
+  #     detector = aruco.CharucoDetector(board)
+  #     idk1, idk2, arucoCorners, arucoIds = detector.detectBoard(img_gray)
+  #   else:
+  #     aruco_params = aruco.DetectorParameters_create()
+  #     arucoCorners, arucoIds, rejectedImgPoints = aruco.detectMarkers(
+  #                                         img_gray,
+  #                                         aruco_dict,
+  #                                         parameters=aruco_params)
+
+      
+  #   # if refine:
+  #   #   ret = aruco.refineDetectedMarkers(
+  #   #             img_gray,
+  #   #             board,
+  #   #             arucoCorners,
+  #   #             arucoIds,
+  #   #             rejectedImgPoints)
+  #   #   arucoCorners, arucoIds, rejectedCorners, recoveredIdxs = ret
+
+  #   if arucoIds is None or len(arucoIds) == 0:
+  #     print('FIXME no detections')
+  #     return None
+
+  #   """
+  #   cv::VideoCapture inputVideo;
+  #   inputVideo.open(0);
+  #   cv::Mat cameraMatrix, distCoeffs;
+  #   // You can read camera parameters from tutorial_camera_params.yml
+  #   readCameraParameters(filename, cameraMatrix, distCoeffs);  // This function is implemented in aruco_samples_utility.hpp
+  #   cv::aruco::Dictionary dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_6X6_250);
+  #   // To use tutorial sample, you need read custom dictionaty from tutorial_dict.yml
+  #   readDictionary(filename, dictionary); // This function is implemented in opencv/modules/objdetect/src/aruco/aruco_dictionary.cpp
+  #   cv::Ptr<cv::aruco::GridBoard> board = cv::aruco::GridBoard::create(5, 7, 0.04, 0.01, dictionary);
+  #   cv::aruco::DetectorParameters detectorParams = cv::aruco::DetectorParameters();
+  #   cv::aruco::ArucoDetector detector(dictionary, detectorParams);
+    
+
+
+  #   import cv2
+  #   aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_6X6_1000)
+  #   aruco_board = cv2.aruco.CharucoBoard((11, 8), 0.022, 0.017, dictionary=aruco_dict)
+  #   detector_params = cv2.aruco.DetectorParameters()
+  #   charuco_params = cv2.aruco.CharucoParameters()
+  #   charuco_params.tryRefineMarkers = True
+  #   refine_params = cv2.aruco.RefineParameters()
+  #   detector = cv2.aruco.CharucoDetector(board=aruco_board, charucoParams=charuco_params, detectorParams=detector_params, refineParams=refine_params)
+    
+  #   ret = detector.detectBoard(img)
+
+  #   """
+
+  #   breakpoint()
+  #   ret = aruco.interpolateCornersCharuco(
+  #           arucoCorners,
+  #           arucoIds,
+  #           img_gray,
+  #           board,
+  #           cameraMatrix=K,
+  #           distCoeffs=dist_coeffs)
+
+  #   retval, charucoCorners, charucoIds = ret
+  #   return (arucoCorners, arucoIds), (charucoCorners, charucoIds)
+
+  # def calibrate_from_images(
+  #       self,
+  #       all_corners=[],
+  #       all_ids=[],
+  #       images=[],
+  #       img_hw=(-1, -1),
+  #       n_dist_coeffs=4):
+    
+  #   import cv2
+  #   from cv2 import aruco
+
+
+  #   debugs = []
+
+  #   if not (all_corners and all_ids):
+  #     assert len(images), "Need input images OR marker coordinates"
+    
+  #     util.log.info(f"Running Charuco detection on {len(images)} images ...")
+
+  #     all_corners = []
+  #     all_ids = []
+  #     from tqdm import tqdm
+  #     for img_gray in tqdm(images):
+  #       if img_hw == (-1, -1):
+  #         img_hw = img_gray.shape[:2]
+
+  #       ret = self.detect_board(img_gray)
+  #       if ret is None:
+  #         continue
+
+  #       (arucoCorners, arucoIds), (charucoCorners, charucoIds) = ret
+  #       if charucoCorners is None or charucoIds is None:
+  #         continue
+        
+  #       if len(charucoIds) < 6:
+  #         continue
+
+  #       if not all(np.isfinite(c).all() for c in charucoCorners):
+  #         continue
+
+  #       all_corners.append(charucoCorners)
+  #       all_ids.append(charucoIds)
+
+  #       if charucoCorners is not None and charucoIds is not None:
+  #         debug = img_gray.copy()
+  #         debug = aruco.drawDetectedMarkers(debug, arucoCorners, arucoIds)
+  #         debug = aruco.drawDetectedCornersCharuco(debug, charucoCorners, charucoIds)
+  #         debugs.append(debug)
+
+  #   from tqdm import tqdm
+  #   import imageio
+  #   for i, debug in enumerate(tqdm(debugs)):
+  #     imageio.imwrite(f"/opt/psegs/psegs_test/det_charuco_{i}.jpg", debug)
+
+
+  #   assert img_hw != (-1, -1), "Need image size"
+
+  #   board, _ = self.create_board_and_dict()
+  #   h, w = img_hw
+  #   K_init = np.eye(3, 3, dtype='float')
+  #   K_init[0, 0] = float(w)
+  #   K_init[1, 1] = float(h)
+    
+  #   import math
+  #   dist_init = np.zeros((n_dist_coeffs, 1))
+  #   # dist_init[0] =  -70. * math.pi / 180
+  #   flags = cv2.CALIB_FIX_ASPECT_RATIO#cv2.CALIB_USE_QR#cv2.CALIB_RATIONAL_MODEL#cv2.CALIB_FIX_ASPECT_RATIO
+    
+  #   util.log.info(f"Running Charuco calibration ...")
+  #   try:
+  #     # breakpoint()
+
+  #     # ret = cv2.calibrateCameraExtended(
+  #     #               all_obj_points,
+  #     #               all_corners,
+  #     #               imageSize=(w, h),
+  #     #               cameraMatrix=K_init,
+  #     #               distCoeffs=dist_init,
+  #     #               flags=0)
+
+  #     # all_obj_points = []
+  #     # for ids in all_ids:
+  #     #   obj_points = np.array([board.chessboardCorners[i] for i in ids])
+  #     #   all_obj_points.append(obj_points)
+
+  #     # ret = cv2.fisheye.calibrate(
+  #     #               all_obj_points,
+  #     #               all_corners,
+  #     #               image_size=(w, h),
+  #     #               K=K_init,
+  #     #               D=dist_init,
+  #     #               flags=(
+  #     #                 cv2.fisheye.CALIB_FIX_PRINCIPAL_POINT | 
+  #     #                 cv2.fisheye.CALIB_FIX_SKEW | 
+  #     #                 cv2.fisheye.CALIB_CHECK_COND),
+  #     #               criteria=(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10000, 1e-9))
+  #     # ( retval,
+  #     # cameraMatrix,
+  #     # distCoeffs,
+  #     # rvecs,
+  #     # tvecs) = ret
+
+
+  #     ret = aruco.calibrateCameraCharucoExtended(
+  #                   charucoCorners=all_corners,
+  #                   charucoIds=all_ids,
+  #                   board=board,
+  #                   imageSize=(w, h),
+  #                   cameraMatrix=K_init,
+  #                   distCoeffs=dist_init,
+  #                   flags=flags,
+  #                   criteria=(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10000, 1e-9))
+  #     ( retval,
+  #       cameraMatrix,
+  #       distCoeffs,
+  #       cal_rvecs,
+  #       cal_tvecs,
+  #       stdDeviationsIntrinsics,
+  #       stdDeviationsExtrinsics,
+  #       perViewErrors) = ret
+  #   except Exception as e:
+  #     print(e)
+  #     #breakpoint()
+  #     print()
+  #   util.log.info(f"... done")
+
+    
+    
+  #   extra = {
+  #     'rms_reproj_error': retval,
+  #   }
+    
+
+  #   # ncameraMatrix, roi = cv2.getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, (w,h), 1, (w,h))
+
+  #   from tqdm import tqdm
+  #   rvecss = []
+  #   tvecss = []
+  #   debugs_est = []
+  #   for ii in tqdm(range(len(images))):
+  #     img = images[ii]
+
+      
+
+  #     ncameraMatrix = None
+  #     img = cv2.undistort(src=img, cameraMatrix=cameraMatrix, distCoeffs=distCoeffs, newCameraMatrix=ncameraMatrix)
+  #     # img = cv2.fisheye.undistortImage(img, cameraMatrix, distCoeffs, Knew=ncameraMatrix)
+
+  #     ret = self.detect_board(img)
+  #     if ret is None:
+  #       import imageio
+  #       imageio.imwrite(f"/opt/psegs/psegs_test/charuco_{ii}.jpg", img)
+  #       continue
+  #     (arucoCorners, arucoIds), (charucoCorners, charucoIds) = ret
+  #     img = aruco.drawDetectedMarkers(img, arucoCorners, arucoIds)
+  #     img = aruco.drawDetectedCornersCharuco(img, charucoCorners, charucoIds)
+  #     rvecs, tvecs, objPts = aruco.estimatePoseSingleMarkers(
+  #                             arucoCorners,
+  #                             self.marker_length_meters,
+  #                             cameraMatrix,
+  #                             distCoeffs)
+      
+  #     if tvecs is not None:
+  #       for i in range(len(tvecs)):
+  #           img = aruco.drawAxis(
+  #                       img,
+  #                       cameraMatrix,
+  #                       distCoeffs,
+  #                       rvecs[i],
+  #                       tvecs[i],
+  #                       self.marker_length_meters)
+
+  #     isValid, rvec, tvec = aruco.estimatePoseCharucoBoard(
+  #                             charucoCorners,
+  #                             charucoIds,
+  #                             board=board,
+  #                             cameraMatrix=cameraMatrix,
+  #                             distCoeffs=distCoeffs,
+  #                             rvec=np.array([]),
+  #                             tvec=np.array([]))
+  #     rvecss.append(rvec)
+  #     tvecss.append(tvec)
+  #     try:
+  #       img = cv2.drawFrameAxes(img, cameraMatrix, distCoeffs, rvec, tvec, 0.1)
+  #       debugs_est.append(img)
+  #     except Exception as e:
+  #       print('draw', e)
+  #       debugs_est.append(None)
+      
+  #     import imageio
+  #     imageio.imwrite(f"/opt/psegs/psegs_test/charuco_{ii}.jpg", img)
+
+  #   # print(ret)
+  #   # print(extra)
+  #   # # breakpoint()
+  #   # print()
+  #   #return rvecss, tvecss, debugs_est, cameraMatrix
+  #   return cal_rvecs, cal_tvecs, debugs_est, cameraMatrix
+
+
+
+
+
+def check_opencv_version_for_aruco():
+  import cv2
+
+  from packaging import version
+  assert version.parse(cv2.__version__) >= version.parse('4.8.1'), (
+    "Required cv2 version >= 4.8.1 b/c the aruco impl has changed "
+    "dramatically between versions; aruco was moved from opencv-contrib "
+    "to mainly opencv objdetect and board patterns changed. See e.g. "
+    "https://github.com/opencv/opencv/blob/9b97c97bd1a4726f84679618a586e7a6cc8b0909/modules/objdetect/misc/python/test/test_objdetect_aruco.py#L189 "
+    "and "
+    "https://github.com/opencv/opencv/issues/23873#issuecomment-1620504453")
+
+
+@attr.s(slots=True)
+class CharucoCalibrationResults(object):
+  # Core Results
+  opencv_calib_model = attr.ib(default='')
+  rms_error = attr.ib(default=0., type=float)
+  K = attr.ib(type=np.ndarray, default=np.eye(3, 3))
+  distortion_kv = attr.ib(default={}, type=Dict[str, float])
+  board_poses = attr.ib(default=[], type=List[datum.Transform])
+
+  # Inputs to calibration in same order as charuco detections used
+  all_board_corner_points = attr.ib(default=[], type=List[np.ndarray]) # nx3
+  all_image_points = attr.ib(default=[], type=List[np.ndarray]) # nx2
+  all_board_point_ids = attr.ib(default=[], type=List[np.ndarray]) # nx1
+
+  # Well actually, calibration failed
+  error_msg = attr.ib(default='', type=str)
+
+  def is_error(self):
+    return bool(self.error_msg)
+
+def charuco_get_all_obj_img_points(dets):
+  aruco_board = None
+  board_params = None
+  all_object_points = []
+  all_image_points = []
+  all_charuco_ids = []
+  for det in dets:
+    if aruco_board is None:
+      board_params = det.board_params
+      aruco_board, aruco_dict = charuco_create_board(board_params)
+    else:
+      assert board_params == det.board_params, (
+        f"""
+          Programming error, not all detections are for same board; 
+            board_params={board_params} 
+            det={det.board_params}
+        """)
+
+    charuco_corners = det.charuco_corners
+    charuco_ids = det.charuco_ids
+    if charuco_corners is None or charuco_ids is None:
+      # Careful, prevent a segfault below in `aruco_board.matchImagePoints()`
+      continue
+
+    frame_pts = aruco_board.matchImagePoints(charuco_corners, charuco_ids)
+    frame_obj_points, frame_img_points = frame_pts
+    all_object_points.append(frame_obj_points)
+    all_image_points.append(frame_img_points)
+    all_charuco_ids.append(charuco_ids)
+  
+  res = (
+    all_object_points,
+    all_image_points,
+    all_charuco_ids,
+  )
+  return res
+
+def charuco_calibrate_from_detections(camera_hw, dets):
+  import cv2
+  
+  h, w = camera_hw
+  
+  try:
+    res = charuco_get_all_obj_img_points(dets)
+    all_object_points, all_image_points, all_charuco_ids = res
+    calib_result = cv2.calibrateCamera(
+        all_object_points,
+        all_image_points, 
+        (w, h),
+        None,
+        None,
+        flags=cv2.CALIB_RATIONAL_MODEL)
+  except cv2.error as e:
+    return CharucoCalibrationResults(error_msg=str(e))
+  
+  opencv_calib_model = 'OPENCV_CHARUCO_RATIONAL_MODEL'
+
+  rms, camera_matrix, dist_coefs, rvecs, tvecs = calib_result
+  assert len(rvecs) == len(tvecs)
+  board_poses = []
+  for rvec, tvec in zip(rvecs, tvecs):
+    
+    # FMI https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga3207604e4b1a1758aa66acb6ed5aa65d
+    # "brings the calibration pattern from the object coordinate space (in which object points are specified) to the camera coordinate space"
+    R, jac = cv2.Rodrigues(rvec)
+    T = tvec
+
+    board_poses.append(
+      datum.Transform(
+        src_frame='board',
+        dest_frame='camera',
+        rotation=R,
+        translation=T))
+
+  dist_coefs = dist_coefs.flatten()
+  assert len(dist_coefs) >= 8
+  
+  # fmt: off
+  distortion_kv = {
+    'k1': float(dist_coefs[0]),
+    'k2': float(dist_coefs[1]),
+    
+    'p1': float(dist_coefs[2]),
+    'p2': float(dist_coefs[3]),
+    
+    # These are only available with `cv2.CALIB_RATIONAL_MODEL` and are
+    # important for very wide / near-fisheye lenses
+    'k3': float(dist_coefs[4]),
+    'k4': float(dist_coefs[5]),
+    'k5': float(dist_coefs[6]),
+    'k6': float(dist_coefs[7]),
+  }
+  # fmt: on
+
+  cvcalib = CharucoCalibrationResults(
+    opencv_calib_model=opencv_calib_model,
+
+    rms_error=rms,
+    K=camera_matrix,
+    distortion_kv=distortion_kv,
+    board_poses=board_poses,
+
+    all_board_corner_points=all_object_points,
+    all_image_points=all_image_points,
+    all_board_point_ids=all_charuco_ids)
+  return cvcalib
+
+
+@attr.s(slots=True)
+class CharucoDetections(object):
+  board_id = attr.ib(default='anon', type=str)
+  board_params = attr.ib(default=None, type=CharucoBoardParams)
+
+  aruco_marker_corners = attr.ib(default=None, type=List[np.ndarray])
+  aruco_marker_ids = attr.ib(default=None, type=np.ndarray)
+  aruco_rejected_image_points = attr.ib(default=None, type=np.ndarray)
+
+  charuco_corners = attr.ib(default=None, type=List[np.ndarray])
+  charuco_ids = attr.ib(default=None, type=np.ndarray)
+
+  # These are usually identical to `aruco_marker_corners`
+  charuco_marker_corners = attr.ib(default=None, type=List[np.ndarray])
+  charuco_marker_ids = attr.ib(default=None, type=np.ndarray)
+
+  cvcalib = attr.ib(default=None, type=CharucoCalibrationResults)
+
+
+def charuco_create_board(board_params):
+  check_opencv_version_for_aruco()
+
+  import cv2
+  import cv2.aruco
+
+  if hasattr(cv2.aruco, board_params.dict_key):
+    dict_key = getattr(cv2.aruco, board_params.dict_key)
+  else:
+    valid_flags = sorted(k for k in dir(cv2.aruco) if k.startswith('DICT_'))
+    raise ValueError(
+      f"Requested {board_params.dict_key} but only support {valid_flags}")
+
+  aruco_dict = cv2.aruco.getPredefinedDictionary(dict_key)
+  aruco_board = cv2.aruco.CharucoBoard(
+                    (board_params.cols, board_params.rows), 
+                    board_params.square_length_meters, 
+                    board_params.marker_length_meters, 
+                    dictionary=aruco_dict)
+  
+  # https://github.com/opencv/opencv/issues/23873#issuecomment-1620504453
+  aruco_board.setLegacyPattern(board_params.is_legacy_pattern)
+
+  return aruco_board, aruco_dict
+
+def charuco_detect_board(
+      board_params,
+      img_gray,
+      include_calibration=True,
+      try_refine_markers=True):
+  
+  check_opencv_version_for_aruco()
+
+  import cv2
+  import cv2.aruco
+
+  aruco_board, aruco_dict = charuco_create_board(board_params)
+  
+  detector_params = cv2.aruco.DetectorParameters()
+  charuco_params = cv2.aruco.CharucoParameters()
+  refine_params = cv2.aruco.RefineParameters()
+
+  # Often does nothing but we include it as an option
+  charuco_params.tryRefineMarkers = try_refine_markers
+  
+  marker_detector = cv2.aruco.ArucoDetector(
+    dictionary=aruco_dict,
+    detectorParams=detector_params,
+    refineParams=refine_params)
+
+  md_ret = marker_detector.detectMarkers(img_gray)
+  markerCorners, markerIds, rejectedImgPoints = md_ret
+
+  board_detector = cv2.aruco.CharucoDetector(
+    board=aruco_board,
+    charucoParams=charuco_params)
+  
+  bdet_ret = board_detector.detectBoard(img_gray)
+  charucoCorners, charucoIds, bdet_markerCorners, bdet_markerIds = bdet_ret
+ 
+  result = CharucoDetections(
+      board_params=board_params,
+
+      aruco_marker_corners = markerCorners,
+      aruco_marker_ids = markerIds,
+      aruco_rejected_image_points=rejectedImgPoints,
+
+      charuco_corners = charucoCorners,
+      charuco_ids = charucoIds,
+
+      charuco_marker_corners = bdet_markerCorners,
+      charuco_marker_ids = bdet_markerIds)
+
+  cvcalib = None
+  if include_calibration:
+    cvcalib = charuco_calibrate_from_detections(
+                img_gray.shape[:2],
+                [result])
+  result.cvcalib = cvcalib
+
+  return result
+
+
+@attr.s(slots=True)
+class CharucoDetectionDebugImages(object):
+  debug_marker_detections = attr.ib(default=None, type=np.ndarray)
+  debug_marker_rejections = attr.ib(default=None, type=np.ndarray)
+  debug_board_image = attr.ib(default=None, type=np.ndarray)
+  debug_board_detections = attr.ib(default=None, type=np.ndarray)
+  debug_board_marker_detections = attr.ib(default=None, type=np.ndarray)
+
+
+def charuco_detect_many_boards(
+    board_id_to_params,
+    camera_image,
+    try_refine_markers=True,
+    include_single_image_calibration=True,
+  ):
+  
+  import cv2
+  image_rgb = camera_image.image
+  image_gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
+
+  all_detections = []
+  for board_id, board_params in sorted(board_id_to_params.items()):
+    dets = charuco_detect_board(
+                board_params,
+                image_gray,
+                include_calibration=include_single_image_calibration,
+                try_refine_markers=try_refine_markers)
+    dets.board_id = board_id
+    all_detections.append(dets)
+  return all_detections
+
+
+def charuco_should_use_board_marker_corners(det):
+  """Decide if we should use raw aruco detections or charuco aruco detections,
+  which are often identical"""
+  if not (det.charuco_marker_corners and det.aruco_marker_corners):
+    return False
+  
+  if len(det.charuco_marker_corners) < len(det.aruco_marker_corners):
+    return False
+  elif len(det.charuco_marker_corners) == len(det.aruco_marker_corners):
+    ar_marker_id_to_corners = dict(
+      zip(det.aruco_marker_ids.flatten(), det.aruco_marker_corners))
+    ch_marker_id_to_corners = dict(
+      zip(det.charuco_marker_ids.flatten(), det.charuco_marker_corners))
+    ar_mids = sorted(ar_marker_id_to_corners.keys())
+    ch_mids = sorted(ch_marker_id_to_corners.keys())
+    if ar_mids != ch_mids:
+      return False
+    else:
+      return any(
+        np.abs(
+          ar_marker_id_to_corners[mid] - ch_marker_id_to_corners[mid]
+          ).sum() > 0
+        for mid in ar_mids
+      )
+  else:
+    # Then len(det.charuco_marker_corners) > len(det.aruco_marker_corners)
+    return True
+
+
+def charuco_get_marker_corner_global_id(board_params, marker_id, corner_num):
+  """Induce a globally unique integer identifier for any single corner of any
+  single square aruco marker."""
+  import hashlib
+
+  assert corner_num <= 3, "Square aruco markers only have four corners"
+
+  corner_str_id = f"{board_params.dict_key}.mid={marker_id}.cid={corner_num}"
+  
+  h_i = int(hashlib.sha1(str(corner_str_id).encode('utf-8')).hexdigest(), 16)
+  h = (h_i % 2654435769)
+  return h
+
+
+def charuco_get_board_corner_global_id(board_params, corner_id):
+  """Induce a globally unique integer identifier for any single corner of any
+  charuco board (i.e. chessboard corners where the black and white squares
+  meet)."""
+  import hashlib
+
+  board_key_attrs = (
+    'dict_key',
+    'cols',
+    'rows',
+    'is_legacy_pattern',
+
+    # TODO toggle board scale variance
+    'square_length_meters',
+    'marker_length_meters',
+  )
+  board_key = ".".join(
+    f"{k}={str(getattr(board_params, k))}"\
+    for k in board_key_attrs
+  )
+
+  corner_str_id = f"{board_key}.cid={corner_id}"
+  
+  h_i = int(hashlib.sha1(str(corner_str_id).encode('utf-8')).hexdigest(), 16)
+  h = (h_i % 2654435769)
+  return h
+
+
+def charuco_detections_to_point2ds(
+        det,
+        include_aruco_marker_corners=True,
+        include_board_corners=True,
+        try_use_board_marker_corners=True,
+        ignore_empty_detections=True,
+        include_board_xyz=True):
+  
+  from oarphpy.util.misc import np_truthy
+
+  # We will return these
+  all_p2ds = []
+
+  board_params = det.board_params
+  base_extra = {
+    'charuco.board_id': det.board_id,
+    'charuco.dict_key': board_params.dict_key,
+    'charuco.cols': str(board_params.cols),
+    'charuco.rows': str(board_params.rows),
+    'charuco.square_length_meters': str(board_params.square_length_meters),
+    'charuco.marker_length_meters': str(board_params.marker_length_meters),
+    'charuco.is_legacy_pattern': str(board_params.is_legacy_pattern),
+  }
+
+  if include_aruco_marker_corners:
+    aruco_use_board = False
+    if try_use_board_marker_corners:
+      aruco_use_board = charuco_should_use_board_marker_corners(det)
+    
+    attrib_prefix = 'aruco_marker_'
+    if aruco_use_board:
+      attrib_prefix = 'charuco_marker_'
+
+    det_mids = getattr(det, attrib_prefix + 'ids')
+    det_corners = getattr(det, attrib_prefix + 'corners')
+    
+    xyinfos = []
+    if np_truthy(det_mids) and np_truthy(det_corners):
+      for mid, corners in zip(det_mids, det_corners):
+        mid = mid.item()
+        corners = corners.squeeze()
+        for c in (0, 1, 2, 3):
+          x, y = corners[c]
+          gid = charuco_get_marker_corner_global_id(board_params, mid, c)
+          xyinfos.append(
+            [x, y, mid, c, gid]
+          )
+    if xyinfos or (not ignore_empty_detections):
+      # TODO: support board XYZs for aruco markers, today they're not
+      # immediately available
+      points_array = np.array(xyinfos, dtype='float64')
+      points_colnames = [
+        'x', 'y', 'aruco_marker_id', 'corner_num', 'psegs_aruco_marker_corner_gid'
+      ]
+      extra = {
+        'charuco.try_use_board_marker_corners': str(try_use_board_marker_corners),
+        'charuco.is_aruco_use_board': str(aruco_use_board),
+      }
+      extra.update(base_extra)
+
+      p2d = datum.Points2D(
+        annotator_name='aruco_marker_corners',
+        points_array=points_array,
+        points_colnames=points_colnames,
+        extra=extra,
+      )
+      all_p2ds.append(p2d)
+
+
+  if include_board_corners:
+    det_bcids = det.charuco_ids
+    det_bcorners = det.charuco_corners
+
+    xyinfos = []
+    if np_truthy(det_bcids) and np_truthy(det_bcorners):
+      for bcid, bcorner in zip(det_bcids, det_bcorners):
+        bcid = bcid.item()
+        bcorner = bcorner.squeeze()
+        x, y = bcorner
+        bcgid = charuco_get_board_corner_global_id(board_params, bcid)
+        xyinfos.append(
+          [x, y, bcid, bcgid]
+        )
+
+    if xyinfos or (not ignore_empty_detections):
+      points_colnames = [
+        'x', 'y', 'charuco_corner_id', 'psegs_charuco_corner_gid'
+      ]
+      if include_board_xyz:
+        all_object_points, _, all_charuco_ids = charuco_get_all_obj_img_points([det])
+        assert len(all_object_points) == 1
+        det_board_xyz = all_object_points[0].reshape([-1, 3]).tolist()
+        assert len(all_charuco_ids) == 1
+        det_charuco_ids = all_charuco_ids[0]
+
+        # Sanity checks to ensure quick unpack & repack below is safe
+        assert len(xyinfos) == len(det_board_xyz), f"{len(xyinfos)} != {len(det_board_xyz)}"
+        xyinfo_cids = [cid for x, y, cid, gid in xyinfos]
+        assert det_charuco_ids.flatten().tolist() == xyinfo_cids, f"{det_charuco_ids.flatten().tolist()} != {xyinfo_cids}"
+
+        points_colnames += [
+          'charuco_board_frame_x', 'charuco_board_frame_y', 'charuco_board_frame_z'
+        ]
+        xyinfos = [
+          (x, y, cid, gid, bX, bY, bZ) for 
+          ((x, y, cid, gid), (bX, bY, bZ)) in zip(xyinfos, det_board_xyz)
+        ]
+
+      points_array = np.array(xyinfos, dtype='float64')
+      
+      extra = {}
+      extra.update(base_extra)
+
+      p2d = datum.Points2D(
+        annotator_name='charuco_corners',
+        points_array=points_array,
+        points_colnames=points_colnames,
+        extra=extra,
+      )
+      all_p2ds.append(p2d)
+
+  return all_p2ds
+
+
+def charuco_create_debug_images(
+      img,
+      detections,
+      create_marker_debug_images=True,
+      create_board_image=True,
+      create_board_detections_debug_images=True,
+      create_board_marker_debug_images=True):
+  
+  check_opencv_version_for_aruco()
+
+  import cv2
+  import cv2.aruco
+
+  dt = detections
+
+  debug_marker_detections = None
+  debug_marker_rejections = None
+  if create_marker_debug_images:
+    debug_marker_detections = cv2.aruco.drawDetectedMarkers(
+                              img.copy(),
+                              corners=dt.aruco_marker_corners,
+                              ids=dt.aruco_marker_ids)
+
+    debug_marker_rejections = cv2.aruco.drawDetectedMarkers(
+                                img.copy(),
+                                corners=dt.aruco_rejected_image_points)
+
+  debug_board_image = None
+  if create_board_image:
+    aruco_board, aruco_dict = charuco_create_board(dt.board_params)
+
+    debug_board_image = aruco_board.generateImage(
+      (dt.board_params.cols*50, dt.board_params.rows*50),
+      marginSize=10)
+
+
+  debug_board_detections = None
+  if create_board_detections_debug_images:
+    if dt.charuco_corners is not None:
+      debug_board_detections = cv2.aruco.drawDetectedCornersCharuco(
+        img.copy(), dt.charuco_corners, charucoIds=dt.charuco_ids)
+    else:
+      debug_board_detections = img.copy()
+
+
+  debug_board_marker_detections = None
+  if create_board_marker_debug_images:
+    if dt.charuco_marker_corners is not None:
+      debug_board_marker_detections = cv2.aruco.drawDetectedMarkers(
+                                img.copy(),
+                                corners=dt.charuco_marker_corners,
+                                ids=dt.charuco_marker_ids)
+    else:
+      debug_board_marker_detections = img.copy()
+    
+  debug_images = CharucoDetectionDebugImages(
+    debug_marker_detections = debug_marker_detections,
+    debug_marker_rejections = debug_marker_rejections,
+    debug_board_image = debug_board_image,
+    debug_board_detections = debug_board_detections,
+    debug_board_marker_detections = debug_board_marker_detections)
+  return debug_images
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+w, h 5312 2988
+K
+array([[9.58550221e+02, 0.00000000e+00, 2.88703475e+03],
+       [0.00000000e+00, 9.58550221e+02, 2.84402233e+03],
+       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
+
+distCoeffs
+array([[ 0.01447736],
+       [-0.00325806],
+       [ 0.00411286],
+       [-0.01069181],
+       [ 0.00011799]])
+
+
+based upon 1920x1080 /outer_root/media/970-evo-plus-raid0/hloc_out/pwais.private.lidar_hero10_winter_stinsin_GX010018.MP4_cache/images/camera_adhoc.1645923337846437120.png
+
+w, h 1920 1080
+
+K
+array([[2.07996977e+03, 0.00000000e+00, 4.58423012e+02],
+       [0.00000000e+00, 2.07996977e+03, 5.82382328e+02],
+       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
+
+distCoeffs
+array([[-1.88078561],
+       [ 3.09708077],
+       [ 0.01062191],
+       [ 0.20261413],
+       [-3.1799531 ]])
+
+
+TODO:
+ * add a video input ...
+ * enable subsampling for ~200 images b/c seems to be properly N^2
+ * input: images
+ * output:
+     * CameraImage with K, disp and RT for every image
+     * cuboids for board and markers
+     * rectified debug video
+     * 3D scene with camera poses and cuboids
+     * a hook to tutorial / readme for that
+
+Then we can SLAM the gopro
+
+"""
+
+
+
+
+
+
+
+
+
+# def create_calibrated_cameras(images):
+#   pass
+
+# # def charuco_board_image(
+# #       aruco_dict_key='DICT_6X6_250',
+
+# #       squaresX=11, squaresY=8, squareLength=.022, markerLength=.017
+# # ):
+
+# def detect_charuco_board(
+#           img_gray,
+#           aruco_dict_key='DICT_6X6_250'):
+  
+  
+
+
+#     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.0001)
+#     for corner in corners:
+#         cv2.cornerSubPix(gray, corner, winSize = (3,3), zeroZone = (-1,-1), criteria = criteria)
+
+
+# frame = cv2.imread(path)
+#     img_undist = cv2.undistort(src = frame, cameraMatrix = mtx, distCoeffs = dist)
+    
+#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+#     aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
+#     parameters =  aruco.DetectorParameters_create()
+#     corners, ids, rejectedImgPoints = aruco.detectMarkers(gray, aruco_dict,
+#                                                           parameters=parameters)
+#     # SUB PIXEL DETECTION
+#     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.0001)
+#     for corner in corners:
+#         cv2.cornerSubPix(gray, corner, winSize = (3,3), zeroZone = (-1,-1), criteria = criteria)
+
+#     frame_markers = aruco.drawDetectedMarkers(frame.copy(), corners, ids)
+    
+#     size_of_marker =  0.012 # side lenght of the marker in meter
+#     rvecs,tvecs,objPts = aruco.estimatePoseSingleMarkers(corners, size_of_marker , mtx, dist)
+    
+#     length_of_axis = 0.012
+#     imaxis = aruco.drawDetectedMarkers(frame.copy(), corners, ids)
+
+#     if tvecs is not None:
+#         for i in range(len(tvecs)):
+#             imaxis = aruco.drawAxis(imaxis, mtx, dist, rvecs[i], tvecs[i], length_of_axis)
+    
+#     imaxis = cv2.resize(imaxis, (frame.shape[1] // 4, frame.shape[0] // 4))
+#     writer.append_data(imaxis)
+#     print('did frame', ii)
+
+# """
+
+# debug images of detected markers. unrect only at first
+
+# then run calib, and plot detected markers on rectified
+
+# """
+
diff --git a/psegs/xform/jobutil.py b/psegs/xform/jobutil.py
new file mode 100644
index 0000000..fb4115a
--- /dev/null
+++ b/psegs/xform/jobutil.py
@@ -0,0 +1,161 @@
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SEGXFORM_DESC = """
+segxform - A script to processs one or more PSegs segments.
+
+## Example
+
+
+"""
+
+import os
+
+import six
+
+from psegs.datum import URI
+
+
+def configure_arg_parser(parser=None):
+  """Configure the `ArgumentParser` instance `parser` with PSegs-related
+  options and return the parser.  Create an `ArgumentParser` if needed.
+  """
+  
+  from psegs.conf import DEFAULT_DATA_ROOT
+
+  if parser is None:
+    import argparse
+    parser = argparse.ArgumentParser(
+                      description="Default PSegs segxform job",
+                      formatter_class=argparse.RawDescriptionHelpFormatter)
+  
+  # Pick your data
+  segex_sel_group = parser.add_argument_group(
+                        "PSegs Selection",
+                        "Select the data to process")
+  segex_sel_group.add_argument(
+    '--segment-id', default='',
+    help='Select only this segment. Use --dataset and/or --split if you need '
+         'to distinguish segments with the same name.')
+  segex_sel_group.add_argument(
+    '--segment-ids-with', default='',
+    help='Select only segment IDs that contain this string.')
+  segex_sel_group.add_argument(
+    '--dataset', default='',
+    help='Restrict to only this dataset')
+  segex_sel_group.add_argument(
+    '--split', default='',
+    help='Restrict to only this split')
+
+  # Configure PSegs environment fixtures
+  segex_env_group = parser.add_argument_group(
+                        "PSegs Environment",
+                        "Configure where PSegs looks for assets")
+  segex_env_group.add_argument(
+    '--ps-root', default=DEFAULT_DATA_ROOT,
+    help='Use this as the PSegs root (where PSegs code and data '
+         'fixtures live) [default %(default)s]')
+
+  # Standard Actions
+  segex_act_group = parser.add_argument_group(
+                        "PSegs Standard Actions",
+                        "Script actions powered by PSegs XForm")
+  segex_act_group.add_argument(
+    '--list-and-exit', default=False, action='store_true',
+    help='Just list available `source` segments to stdout and exit')
+
+  return parser
+
+
+def run_standard_actions(args):
+
+  if args.list_and_exit:
+    import pprint
+    from psegs.table.canonical_factory import CanonicalFactory
+    CanonicalFactory.init_from_environ()
+    seg_uris = CanonicalFactory.get_all_segment_uris()
+    seg_uris = sorted(str(suri) for suri in seg_uris)
+    pprint.pprint(f"Available segments: {len(seg_uris)}")
+    pprint.pprint(seg_uris)
+    return True
+  
+  return False
+
+
+def get_matching_seg_uris(args):
+  from psegs.table.canonical_factory import CanonicalFactory
+  CanonicalFactory.init_from_environ()
+  seg_uris = CanonicalFactory.get_all_segment_uris()
+
+  if args.segment_id:
+    seg_uris = [
+      suri for suri in seg_uris
+      if suri.segment_id == args.segment_id
+    ]
+  if args.segment_ids_with:
+    seg_uris = [
+      suri for suri in seg_uris
+      if args.segment_ids_with in suri.segment_id
+    ]
+  if args.dataset:
+    seg_uris = [
+      suri for suri in seg_uris
+      if suri.dataset == args.dataset
+    ]
+  if args.split:
+    seg_uris = [
+      suri for suri in seg_uris
+      if suri.split == args.split
+    ]
+  return seg_uris
+
+
+def get_partition_paths(seg_uris):
+  # part_keys = set(
+  #   (uri.dataset, uri.split, uri.segment_id)
+  #   for uri in seg_uris)
+  # return [
+  #   os.path.join(
+  #     "dataset=" + (dataset or "EMPTY_DATASET"),
+  #     "split=" + (split or "EMPTY_SPLIT"),
+  #     "segment_id=" + (segment_id or "EMPTY_SEGMENT_ID"))
+  #   for (dataset, split, segment_id) in sorted(part_keys)
+  #]
+  return sorted(set(
+    uri.to_segment_partition_relpath()
+    for uri in seg_uris))
+
+
+def get_partition_path(v):
+  if isinstance(v, six.string_types):
+    v = URI.from_str(v)
+  
+  if (not isinstance(v, URI)) and hasattr(v, '__iter__'):
+    vs = [vv for vv in v]
+    assert len(vs) == 1, \
+      "Wanted exactly one value, but have %s" % (v,)
+    v = vs[0]
+
+  if not isinstance(v, URI):
+    raise ValueError("Don't know what to do with %s" % (v,))
+  
+  return get_partition_paths([v])[0]
+
+
+def get_segment_tables_for_uris(seg_uris, spark=None):
+  from psegs.table.canonical_factory import CanonicalFactory
+  return [
+    CanonicalFactory.get_segment_sd_table(seg_uri, spark=spark)
+    for seg_uri in seg_uris
+  ]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..939872f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,53 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This version of attrs has better cloudpickle support
+attrs>=21.4.0
+
+dateparser
+fasteners
+ffmpeg-python
+imageio
+imageio[ffmpeg]
+
+# fixme why does 1.21.5 break? and who installs it?
+#numpy
+numpy>=1.26.1,<2
+
+oarphpy[spark]==0.1.1
+open3d>=0.11.1
+
+# Fixme did we need old one? maybe the old one was because of qt5 headless problems?
+# opencv-python>=4.5.5.62
+opencv-python>=4.8.1
+opencv-python-headless>=4.8.1
+
+pandas
+plotly>=4.2.1
+plyfile
+psutil
+pyspark>=3.3.2
+pytest
+python-slugify
+rich
+scipy>=1.4.0
+shapely
+six>=1.14.0
+sphinx-autoapi
+tabulate
+threadpoolctl
+tqdm
+trimesh
+xmltodict
+
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..65f8181
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[aliases]
+test="pytest"
+
+[tool:pytest]
+addopts = -v --durations=0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..36aefe6
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import re
+
+try:
+  from setuptools import setup, find_packages
+except ImportError:
+  from distutils.core import setup, find_packages
+
+# Function to parse __version__ in `psegs/__init__.py`
+def find_version():
+  here = os.path.abspath(os.path.dirname(__file__))
+  with open(os.path.join(here, 'psegs', '__init__.py'), 'r') as fp:
+    version_file = fp.read()
+  version_match = re.search(
+    r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
+  if version_match:
+    return version_match.group(1)
+  raise RuntimeError("Unable to find version string.")
+
+
+NUSC_DEPS = [
+  # TODO try to use v1.1 when they create a formal release ...
+  'nuscenes-devkit==1.1.0'
+]
+
+KITT_360_DEPS = [
+  'xmltodict',
+  'open3d',
+]
+
+# SPARK_DEPS = [
+#   'findspark==1.3.0',
+#   'numpy',
+#   'pandas>=0.19.2'
+# ]
+# HAVE_SYSTEM_SPARK = (
+#   os.environ.get('SPARK_HOME') or
+#   os.path.exists('/opt/spark'))
+# if not HAVE_SYSTEM_SPARK:
+#   SPARK_DEPS += ['pyspark>=2.4.4']
+
+# TF_DEPS = [
+#   'crcmod',
+#   'tensorflow<=1.15.0',
+# ]
+
+# UTILS = [
+#   # For various
+#   'six',
+
+#   # For SystemLock
+#   # 'fasteners==0.14.1', TODO clean up util.SystemLock
+  
+#   # For lots of things
+#   'pandas',
+
+#   # For ThruputObserver
+#   'humanfriendly',
+#   'tabulate',
+#   'tabulatehelper',
+
+#   # For misc image utils
+#   'imageio'
+# ]
+
+# ALL_DEPS = UTILS + SPARK_DEPS + TF_DEPS
+
+dist = setup(
+  name='psegs',
+  version=find_version(),
+  description='A library for normalized autonomous vehicle datasets',
+  author='Paul Wais',
+  author_email='u@oarph.me',
+  url='https://github.com/pwais/psegs',
+  license='Apache License 2.0',
+  packages=find_packages(exclude=['test*']),
+  long_description=open('README.md').read(),
+  long_description_content_type="text/markdown",
+  classifiers=[
+    'Development Status :: 4 - Beta',
+    'Intended Audience :: Developers',
+    'License :: OSI Approved :: Apache Software License',
+    'Programming Language :: Python :: 3',
+    'Topic :: Software Development :: Libraries',
+    'Topic :: Scientific/Engineering',
+    'Topic :: System :: Distributed Computing',
+  ],
+  
+  test_suite='test',
+  setup_requires=['pytest-runner'],
+  tests_require=['pytest'],
+  
+  extras_require={
+    # 'all': ALL_DEPS,
+    # 'utils': UTILS,
+    # 'spark': SPARK_DEPS,
+    # 'tensorflow': TF_DEPS,
+  },
+)
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/datasets/__init__.py b/test/datasets/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/test/datasets/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/datasets/test_adhoc_pixels.py b/test/datasets/test_adhoc_pixels.py
new file mode 100644
index 0000000..20f0fc7
--- /dev/null
+++ b/test/datasets/test_adhoc_pixels.py
@@ -0,0 +1,289 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import datetime
+
+from psegs import datum
+from psegs.datasets import adhoc_pixels as ap
+
+from test import testutil
+
+def test_AdhocImagePathsSDTFactory_create_factory_for_images():
+  FIXTURE_PQ = (testutil.test_fixtures_dir() / 
+    'test_AdhocImagePathsSDTFactory_create_factory_for_images.parquet')
+
+  # Borrow the COLMAP test images
+  IMAGES_DIR = testutil.test_fixtures_dir() / 'test_colmap' / 'images'
+
+  F = ap.AdhocImagePathsSDTFactory.create_factory_for_images(
+            images_dir=IMAGES_DIR,
+            timestamp_use=None)
+              # Force sequential timestamps for reproducibility
+
+  with testutil.LocalSpark.sess() as spark:
+    sdt = F.create_sd_table(spark=spark)
+    
+    # Let's do a quick URI check, in part for the reader to see what we expect:
+    expected_uris = [
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=1&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00000.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=2&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00003.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=3&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00006.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=4&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00009.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=5&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00012.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=6&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00015.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=7&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00030.jpg',
+      'psegs://dataset=anon&split=anon&segment_id=images&timestamp=8&topic=camera_adhoc&extra.AdhocImagePathsSDTFactory.image_path=/opt/psegs/test/fixtures/test_colmap/images/frame_00033.jpg'
+    ]
+    actual_uris = sdt.as_uri_rdd().map(lambda x: str(x)).collect()
+    actual_uris.sort()
+    assert actual_uris == expected_uris
+
+    sd_df_actual = sdt.to_spark_df()
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=FIXTURE_PQ)
+
+
+def test_AdhocVideosSDTFactory_create_factory_for_video():
+  FIXTURE_PQ = (testutil.test_fixtures_dir() / 
+    'test_AdhocVideosSDTFactory_create_factory_for_video.parquet')
+
+  # Create a test video borrowing the COLMAP test images
+  IMAGES_DIR = testutil.test_fixtures_dir() / 'test_colmap' / 'images'
+  VID_DIR = testutil.test_tempdir(
+            'test_AdhocVideosSDTFactory_create_factory_for_video')
+  VID_PATH = VID_DIR / 'my_video.mp4'
+
+  import imageio
+  FPS = 2
+  w = imageio.get_writer(VID_PATH, fps=FPS)
+  for p in sorted(IMAGES_DIR.iterdir()):
+    im = imageio.imread(p)
+    w.append_data(im)
+  w.close()
+
+  F = ap.AdhocVideosSDTFactory.create_factory_for_video(VID_PATH)
+  with testutil.LocalSpark.sess() as spark:
+    sdt = F.create_sd_table(spark=spark)
+    
+    sd_df_actual = sdt.to_spark_df()
+    sd_df_actual.show()
+
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=FIXTURE_PQ)
+
+
+def test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video():
+
+  ## Setup
+
+  CLS_CACHE_TEST_DIR = testutil.test_tempdir(
+        'test_DiskCachedFramesVideoSegmentFactory_cache')
+  IMAGE_CACHE_DIR = testutil.test_tempdir(
+        'test_DiskCachedFramesVideoSegmentFactory_images')
+
+  TEST_IMG_CACHE_CLS = testutil.PSegsTestLocalDiskCache.cache_cls_for_testroot(
+                    IMAGE_CACHE_DIR)
+
+  FIXTURE_PQ = (testutil.test_fixtures_dir() / 
+    'test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet')
+
+  # Create a test video borrowing the COLMAP test images
+  IMAGES_DIR = testutil.test_fixtures_dir() / 'test_colmap' / 'images'
+  VID_DIR = testutil.test_tempdir(
+        'test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video')
+  VID_PATH = VID_DIR / 'my_video.mp4'
+
+  import imageio
+  FPS = 4
+  w = imageio.get_writer(VID_PATH, fps=FPS)
+  for p in sorted(IMAGES_DIR.iterdir()):
+    im = imageio.imread(p)
+    w.append_data(im)
+  w.close()
+  EXPECTED_NUM_FRAMES = len(sorted(IMAGES_DIR.iterdir()))
+
+  # DiskCachedFramesVideoSegmentFactory will use the mtime as a base timestamp
+  # for the video datums, so set that to a fixed value for our test fixture
+  mtime = datetime.datetime(2023, 1, 1, 1, 0, 0)
+  os.utime(
+    str(VID_PATH),
+    (os.stat(str(VID_PATH)).st_atime,
+    mtime.timestamp()))
+
+
+  ## Test!
+  F = ap.DiskCachedFramesVideoSegmentFactory.create_factory_for_video(
+            VID_PATH,
+            cls_cache_dir=CLS_CACHE_TEST_DIR,
+            img_cache_cls=TEST_IMG_CACHE_CLS)
+
+  expected_base_uri = datum.URI(
+                    dataset='anon',
+                    split='anon',
+                    segment_id='my_video.mp4_3e859aae95',
+                    topic='video_camera|max_hw_-1|ext_png')
+  assert F.BASE_URI == expected_base_uri
+
+  assert F.VIDEO_METADATA.video_uri == VID_PATH
+  assert F.VIDEO_METADATA.frames_per_second == float(FPS)
+  assert F.VIDEO_METADATA.n_frames == EXPECTED_NUM_FRAMES
+  assert F.VIDEO_METADATA.height == 240
+  assert F.VIDEO_METADATA.width == 320
+  assert F.VIDEO_METADATA.is_10bit_hdr == False
+
+  # Check the cache was used
+  expected_cache_pkl_path = (
+    CLS_CACHE_TEST_DIR /
+    'anon' / 'anon' / 'my_video.mp4_3e859aae95' /
+    'video_camera|max_hw_-1|ext_png' /
+    'DiskCachedFramesVideoSegmentFactory_cls.cpkl')
+  assert expected_cache_pkl_path.exists()
+
+  # Reload should use cache
+  F = ap.DiskCachedFramesVideoSegmentFactory.create_factory_for_video(
+            VID_PATH,
+            cls_cache_dir=CLS_CACHE_TEST_DIR)
+  
+
+  # Test explode 
+  EF = F.explode_frames()
+
+  assert EF.EXPLODED_FRAME_PATHS is not None
+  assert len(EF.EXPLODED_FRAME_PATHS) == EXPECTED_NUM_FRAMES
+
+  # Re-loading from cache should have the frames
+  F = ap.DiskCachedFramesVideoSegmentFactory.create_factory_for_video(
+            VID_PATH,
+            cls_cache_dir=CLS_CACHE_TEST_DIR)
+  assert F.EXPLODED_FRAME_PATHS is not None
+  assert len(F.EXPLODED_FRAME_PATHS) == EXPECTED_NUM_FRAMES
+
+  # Test SDT
+  with testutil.LocalSpark.sess() as spark:
+    # Use a factory freshly loaded from cache
+    F = ap.DiskCachedFramesVideoSegmentFactory.create_factory_for_video(
+            VID_PATH,
+            cls_cache_dir=CLS_CACHE_TEST_DIR)
+
+    sdt = F.create_sd_table(spark=spark)
+    
+    sd = sdt.to_datum_rdd().first()
+    ci = sd.camera_image
+    image = ci.image
+    h, w = image.shape[:2]
+    assert h == 240
+    assert w == 320
+
+    sd_df_actual = sdt.to_spark_df()
+    sd_df_actual.show()
+
+    # Ensure we got URIs for all of the frames
+    expected_frame_ids = sorted(str(i) for i in range(EXPECTED_NUM_FRAMES))
+    actual_extra_rows = sd_df_actual.select('uri.extra').collect()
+    actual_frame_ids = sorted(
+      r.extra['DiskCachedFramesVideoSegmentFactory.frame_index']
+      for r in actual_extra_rows)
+    assert actual_frame_ids == expected_frame_ids
+
+    # Ensure the camera_images have distinct paths too
+    actual_ci_extra_rows = sd_df_actual.select('camera_image.extra').collect()
+    actual_ci_extra_fpaths = set(
+      r.extra['DiskCachedFramesVideoSegmentFactory.frame_path']
+      for r in actual_ci_extra_rows)
+    assert len(actual_ci_extra_fpaths) == EXPECTED_NUM_FRAMES
+
+    sd_df_actual = sd_df_actual.repartition(1)
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=FIXTURE_PQ)
+
+
+def test_DiskCachedFramesVideoSegmentFactory_resized_create_factory_for_video():
+  from psegs.util.video import VideoExplodeParams
+
+  ## Setup
+
+  CLS_CACHE_TEST_DIR = testutil.test_tempdir(
+        'test_DiskCachedFramesVideoSegmentFactory_cache_resized')
+  IMAGE_CACHE_DIR = testutil.test_tempdir(
+        'test_DiskCachedFramesVideoSegmentFactory_images_resized')
+
+  TEST_IMG_CACHE_CLS = testutil.PSegsTestLocalDiskCache.cache_cls_for_testroot(
+                    IMAGE_CACHE_DIR)
+
+  # Create a test video borrowing the COLMAP test images
+  IMAGES_DIR = testutil.test_fixtures_dir() / 'test_colmap' / 'images'
+  VID_DIR = testutil.test_tempdir(
+    'test_DiskCachedFramesVideoSegmentFactory_resized_create_factory_for_video')
+  VID_PATH = VID_DIR / 'my_video.mp4'
+
+  import imageio
+  FPS = 4
+  w = imageio.get_writer(VID_PATH, fps=FPS)
+  for p in sorted(IMAGES_DIR.iterdir()):
+    im = imageio.imread(p)
+    w.append_data(im)
+  w.close()
+  EXPECTED_NUM_FRAMES = len(sorted(IMAGES_DIR.iterdir()))
+
+
+  ## Test!
+  F = ap.DiskCachedFramesVideoSegmentFactory.create_factory_for_video(
+            VID_PATH,
+            explode_params=VideoExplodeParams(
+              max_hw=300,
+              image_file_extension='jpg'),
+            cls_cache_dir=CLS_CACHE_TEST_DIR,
+            img_cache_cls=TEST_IMG_CACHE_CLS)
+
+  expected_base_uri = datum.URI(
+                    dataset='anon',
+                    split='anon',
+                    segment_id='my_video.mp4_86e7a426d9',
+                    topic='video_camera|max_hw_300|ext_jpg')
+  assert F.BASE_URI == expected_base_uri
+
+  assert F.VIDEO_METADATA.video_uri == VID_PATH
+  assert F.VIDEO_METADATA.height == 240
+  assert F.VIDEO_METADATA.width == 320
+
+  # Test explode
+  EF = F.explode_frames()
+
+  assert EF.EXPLODED_FRAME_PATHS is not None
+  assert len(EF.EXPLODED_FRAME_PATHS) == EXPECTED_NUM_FRAMES
+
+  with testutil.LocalSpark.sess() as spark:
+    sdt = EF.create_sd_table(spark=spark)
+    
+    sd_df_actual = sdt.to_spark_df()
+    sd_df_actual.show()
+
+    hw_sdf = sd_df_actual.select(['camera_image.height', 'camera_image.width'])
+    hw_pdf = hw_sdf.toPandas()
+    assert all(hw_pdf['height'] == 226)
+    assert all(hw_pdf['width'] == 300)
+
+    sd = sdt.to_datum_rdd().first()
+    ci = sd.camera_image
+    image = ci.image
+    h, w = image.shape[:2]
+    assert h == 226
+    assert w == 300
diff --git a/test/datasets/test_colmap.py b/test/datasets/test_colmap.py
new file mode 100644
index 0000000..fe2848d
--- /dev/null
+++ b/test/datasets/test_colmap.py
@@ -0,0 +1,235 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+
+import numpy as np
+
+from psegs.datasets import colmap as pscolmap
+
+from test import testutil
+
+
+def test_colmap_create_camera_image():
+  pytest.importorskip("pycolmap")
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+  
+  ci = pscolmap.colmap_recon_create_camera_image(
+                  'frame_00012.jpg',
+                  FIXTURES_DIR / 'sparse' / '0',
+                  FIXTURES_DIR / 'images')
+  assert ci.image.shape == (240, 320, 3)
+  assert ci.extra['colmap.image_id'] == '3'
+  assert ci.extra['colmap.image_name'] == 'frame_00012.jpg'
+  assert ci.K[0][0] == 262.71597626202475
+
+
+def test_colmap_create_depth_image():
+  pytest.importorskip("pycolmap")
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+
+  dci = pscolmap.colmap_recon_create_camera_image(
+                  'frame_00012.jpg',
+                  FIXTURES_DIR / 'sparse' / '0',
+                  FIXTURES_DIR / 'images',
+                  create_depth_image=True)
+  assert dci.image.shape == (240, 320, 3)
+  assert dci.get_depth().min() == 0
+  np.testing.assert_allclose(dci.get_depth().max(), 54.110733)
+  assert dci.get_chan('colmap_err').min() == 0
+  np.testing.assert_allclose(dci.get_chan('colmap_err').max(), 2.194288)
+  assert dci.get_chan('num_views_visible').min() == 0
+  assert dci.get_chan('num_views_visible').max() == 14
+  assert dci.extra['colmap.image_id'] == '3'
+  np.testing.assert_allclose(dci.K[0][0], 262.71597626202475)
+
+
+def test_colmap_get_image_name_to_covis_names():
+  pytest.importorskip("pycolmap")
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+  recon_dir = FIXTURES_DIR / 'sparse' / '0'
+
+  import pycolmap
+  recon = pycolmap.Reconstruction(recon_dir)
+
+  image_name_to_covis_names = (
+    pscolmap.colmap_get_image_name_to_covis_names(recon))
+
+  assert image_name_to_covis_names == EXPECTD_COVIS
+
+
+def test_colmap_create_matched_pair():
+  pytest.importorskip("pycolmap")
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+  recon_dir = FIXTURES_DIR / 'sparse' / '0'
+
+  import pycolmap
+  recon = pycolmap.Reconstruction(recon_dir)
+  image_name_to_covis_names = (
+    pscolmap.colmap_get_image_name_to_covis_names(recon))
+
+  EXPECTED_PAIRS_TO_TEST = (
+    ('frame_00033.jpg', 'frame_00003.jpg'),
+    ('frame_00012.jpg', 'frame_00003.jpg'),
+  )
+
+  for image1_name, image2_name in EXPECTED_PAIRS_TO_TEST:
+    assert image1_name in image_name_to_covis_names[image2_name]
+    assert image2_name in image_name_to_covis_names[image1_name]
+
+    mp = pscolmap.colmap_recon_create_matched_pair(
+          image1_name,
+          image2_name,
+          recon_dir,
+          img1='not_null_sentinel',
+          img2='not_null_sentinel')
+
+    matches = mp.get_matches()
+
+    # fmt: off
+    assert mp.matches_colnames == [
+      'x1', 'y1', 'x2', 'y2',
+      'r', 'g', 'b',
+      'world_x', 'world_y', 'world_z',
+      'error', 'track_length', 'colmap_p3id',
+    ] # fmt: on
+
+    assert matches.shape[1] == len(mp.matches_colnames)
+    
+    # Spot check some numbers we pulled manually from 
+    # fmt: off
+    EXPECTED_IM1_TO_MATCHES_ROWS = {
+      'frame_00033.jpg': [
+        (129.14500427246094, 169.54025268554688,
+              109.0311279296875, 176.9521484375,
+         102., 102., 90.,
+         -2.2979449902111684, 6.357728828532021, 28.658129770725225,
+         0.29433191072803566, 4., 1.),
+      ],
+    }
+    # fmt: on
+    
+    expected_match_rows = EXPECTED_IM1_TO_MATCHES_ROWS.get(image1_name, [])
+    actual_match_rows = set(tuple(r) for r in matches)
+    for expected_row in expected_match_rows:
+      assert expected_row in actual_match_rows
+
+
+    assert mp.img1 == 'not_null_sentinel'
+    assert mp.img2 == 'not_null_sentinel'
+    assert mp.extra['colmap.image1_name'] == image1_name
+    assert mp.extra['colmap.image2_name'] == image2_name
+
+    # Now ensure the image parsing works
+    src_images_dir = FIXTURES_DIR / 'images'
+
+    mp = pscolmap.colmap_recon_create_matched_pair(
+          image1_name,
+          image2_name,
+          recon_dir,
+          src_images_dir=src_images_dir)
+
+    assert mp.img1.extra['colmap.image_name'] == image1_name
+    assert mp.img2.extra['colmap.image_name'] == image2_name
+
+  # TODO we don't have any pairs that are NOT covisible in this fixture
+
+
+def test_colmap_create_sd_table_for_reconstruction():
+  pytest.importorskip("pycolmap")
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+
+  # Dump numpy cached assets to a temp dir
+  PSEGS_ASSET_DIR = testutil.test_tempdir(
+      'test_colmap_create_sd_table_for_reconstruction')
+
+  with testutil.LocalSpark.sess() as spark:
+    sdt = pscolmap.COLMAP_SDTFactory.create_sd_table_for_reconstruction(
+              FIXTURES_DIR / 'sparse' / '0',
+              FIXTURES_DIR / 'images',
+              PSEGS_ASSET_DIR,
+              spark=spark)
+    
+    sd_df_actual = sdt.to_spark_df()
+    
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=FIXTURES_DIR / 'test_colmap_sdt_expected.parquet')
+
+
+
+EXPECTD_COVIS = {'frame_00000.jpg': [
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00003.jpg': ['frame_00000.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00006.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00009.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00012.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00015.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00030.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00030.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00033.jpg'],
+ 'frame_00033.jpg': ['frame_00000.jpg',
+                     'frame_00003.jpg',
+                     'frame_00006.jpg',
+                     'frame_00009.jpg',
+                     'frame_00012.jpg',
+                     'frame_00015.jpg',
+                     'frame_00030.jpg']}
\ No newline at end of file
diff --git a/test/datasets/test_ios_lidar.py b/test/datasets/test_ios_lidar.py
new file mode 100644
index 0000000..4280482
--- /dev/null
+++ b/test/datasets/test_ios_lidar.py
@@ -0,0 +1,349 @@
+# Copyright 2021 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+import imageio
+
+from psegs import datum
+from psegs.datasets import ios_lidar
+
+from test import testutil
+
+
+def test_threeDScannerApp_json_parsing():
+  testutil.skip_if_fixture_absent(
+    ios_lidar.Fixtures.threeDScannerApp_data_root())
+
+  json_data_path = (
+    ios_lidar.Fixtures.threeDScannerApp_data_root() / 
+      'charuco-test-fixture-lowres' / 'frame_00000.json')
+
+  assert json_data_path.exists()
+  with open(json_data_path, 'r') as f:
+    json_data = json.load(f)
+
+  xform = ios_lidar.threeDScannerApp_get_ego_pose(json_data)
+  assert xform.src_frame == 'ego'
+  assert xform.dest_frame == 'world'
+
+  K = ios_lidar.threeDScannerApp_get_K(json_data)
+  assert K[0][0] != 0
+  assert K[1][1] != 0
+  assert K[0][2] != 0
+  assert K[1][2] != 0
+
+  frame_id = ios_lidar.threeDScannerApp_frame_id_from_fname(json_data_path)
+  assert frame_id == '00000'
+
+
+def test_threeDScannerApp_timestamps():
+  testutil.skip_if_fixture_absent(
+    ios_lidar.Fixtures.threeDScannerApp_data_root())
+  
+  scene_dir = (
+    ios_lidar.Fixtures.threeDScannerApp_data_root() /
+      'charuco-test-fixture-lowres')
+
+  frame_id_to_nanostamp = ios_lidar.threeDScannerApp_create_frame_to_timestamp(
+                            scene_dir)
+  
+  assert len(frame_id_to_nanostamp) == 92
+
+  # These will break if we have wrong image mtimes
+  assert frame_id_to_nanostamp['00000'] == 1637363832000000000
+  assert frame_id_to_nanostamp['00001'] == 1637363832227758083
+  assert frame_id_to_nanostamp['00002'] == 1637363832428882749
+  assert frame_id_to_nanostamp['00003'] == 1637363832527052333
+  assert frame_id_to_nanostamp['00004'] == 1637363832728491833
+  assert frame_id_to_nanostamp['00005'] == 1637363832929434458
+
+  assert frame_id_to_nanostamp['00087'] == 1637363846836126208
+  assert frame_id_to_nanostamp['00088'] == 1637363847036753166
+  assert frame_id_to_nanostamp['00089'] == 1637363847268963208
+  assert frame_id_to_nanostamp['00090'] == 1637363847369206458
+  assert frame_id_to_nanostamp['00091'] == 1637363847570889541
+
+  # In human-readable form
+  import datetime
+  EXPECTED_STAMPS = {
+    '00000': datetime.datetime(2021, 11, 19, 23, 17, 12),
+    '00001': datetime.datetime(2021, 11, 19, 23, 17, 12, 227758),
+
+    '00005': datetime.datetime(2021, 11, 19, 23, 17, 12, 929435),
+    '00087': datetime.datetime(2021, 11, 19, 23, 17, 26, 836126),
+
+    '00090': datetime.datetime(2021, 11, 19, 23, 17, 27, 369207),
+    '00091': datetime.datetime(2021, 11, 19, 23, 17, 27, 570889),
+  }
+  for frame_id, expected_t in EXPECTED_STAMPS.items():
+    actual_t = datetime.datetime.fromtimestamp(
+                1e-9 * frame_id_to_nanostamp[frame_id])
+    assert expected_t == actual_t
+
+
+### Test PointCloud from Mesh #################################################
+
+def test_threeDScannerApp_create_point_cloud_from_mesh():
+  testutil.skip_if_fixture_absent(
+    ios_lidar.Fixtures.threeDScannerApp_data_root())
+  
+  scene_dir = (
+    ios_lidar.Fixtures.threeDScannerApp_data_root() /
+      'charuco-test-fixture-highres')
+  
+  pc = ios_lidar.threeDScannerApp_create_point_cloud_from_mesh(
+          scene_dir / 'export.obj')
+  
+  cloud = pc.get_cloud()
+  assert cloud.shape == (113955, 3)
+
+  outdir = testutil.test_tempdir(
+            'test_threeDScannerApp_create_point_cloud_from_mesh')
+  imageio.imwrite(
+    outdir / 'debug_mesh.png',
+    datum.PointCloud.get_ortho_debug_image(
+              cloud,
+              flatten_axis='+y', # For iOS, +y is up
+              u_axis='+x', # For this scene, +x is "right"
+              v_axis='-z', # For this scene, -z is "up"
+              u_bounds=(-.75, .75),
+              v_bounds=(-.75, .75),
+              filter_behind=False,
+              pixels_per_meter=400))
+  
+  expected_base = (
+    ios_lidar.Fixtures.threeDScannerApp_test_data_root() / 
+      'test_threeDScannerApp_create_point_cloud_from_mesh')
+  
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+
+### Test CameraImage ##########################################################
+
+def test_threeDScannerApp_create_camera_image_lowres():
+  testutil.skip_if_fixture_absent(
+    ios_lidar.Fixtures.threeDScannerApp_data_root())
+  
+  scene_dir = (
+    ios_lidar.Fixtures.threeDScannerApp_data_root() /
+      'charuco-test-fixture-lowres')
+  
+
+  ### Test RGB
+
+  frame_json_path = scene_dir / 'frame_00045.json'
+  ci = ios_lidar.threeDScannerApp_create_camera_image(frame_json_path)
+  EXTRA_EXPECTED = {
+    'threeDScannerApp.averageAngularVelocity': '0.1675194501876831',
+    'threeDScannerApp.averageVelocity': '0.09594733268022537',
+    'threeDScannerApp.cameraGrain': '0',
+    'threeDScannerApp.exposureDuration': '0.016393441706895828',
+    'threeDScannerApp.frame_id': '00045',
+    'threeDScannerApp.frame_index': '45',
+    'threeDScannerApp.frame_json_name': 'frame_00045.json',
+    'threeDScannerApp.img_path': 'frame_00045.jpg',
+    'threeDScannerApp.intrinsics': '[1453.939453125, 0, 973.69287109375, 0, '
+                              '1453.939453125, 714.6398315429688, 0, 0, 1]',
+    'threeDScannerApp.motionQuality': '0.952137291431427',
+    'threeDScannerApp.projectionMatrix': '[1.514520287513733, 0, '
+                                      '-0.01478421688079834, 0, 0, '
+                                      '2.019360303878784, '
+                                      '-0.006750226020812988, 0, 0, 0, '
+                                      '-0.9999997615814209, '
+                                      '-0.0009999998146668077, 0, 0, -1, 0]',
+    'threeDScannerApp.scan_dir': 'charuco-test-fixture-lowres',
+    'threeDScannerApp.time': '942363.2627448752'
+  }
+  assert ci.extra == EXTRA_EXPECTED
+  
+  img = ci.image
+  assert (ci.height, ci.width) == (1440, 1920)
+  assert img.shape[:3] == (1440, 1920, 3)
+
+
+  ### Test Depth
+
+  dci = ios_lidar.threeDScannerApp_create_camera_image(
+          frame_json_path, sensor_name='depth|front')
+  
+  assert dci.channel_names == ['depth', 'confidence']
+  assert (dci.height, dci.width) == (192, 256)
+  dimg = dci.image
+  assert dimg.shape[:3] == (192, 256, 2)
+
+  dpc = dci.depth_image_to_point_cloud()
+  assert dpc.cloud_colnames == ['x', 'y', 'z', 'confidence']
+  cloud = dpc.get_cloud()
+  assert cloud.shape == (49152, 4)
+
+
+  ### Test Projection / Calibration
+
+  outdir = testutil.test_tempdir(
+            'test_threeDScannerApp_create_camera_image_lowres')
+  frame_id = ci.extra['threeDScannerApp.frame_id']
+
+  imageio.imwrite(
+    outdir / ('depth_debug_%s_5mm.png' % frame_id),
+    dci.get_debug_image(period_meters=0.005))
+
+  imageio.imwrite(
+    outdir / ('projected_lidar_%s_5cm.png' % frame_id),
+    ci.get_debug_image(clouds=[dpc], period_meters=0.05))
+
+  imageio.imwrite(
+    outdir / ('front_rv_debug_%s.png' % frame_id),
+    dpc.get_front_rv_debug_image(
+            camera_images=[ci],
+            z_bounds_meters=(-1, 1),
+            y_bounds_meters=(-1.5, 1.5),
+            pixels_per_meter=400))
+
+  imageio.imwrite(
+    outdir / ('bev_debug_%s.png' % frame_id),
+    dpc.get_bev_debug_image(
+            camera_images=[ci],
+            x_bounds_meters=(-.4, .4),
+            y_bounds_meters=(-.6, .6),
+            pixels_per_meter=400))
+
+
+  expected_base = (
+    ios_lidar.Fixtures.threeDScannerApp_test_data_root() / 
+      'test_threeDScannerApp_create_camera_image_lowres')
+  
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+def test_threeDScannerApp_create_camera_image_high():
+  testutil.skip_if_fixture_absent(
+    ios_lidar.Fixtures.threeDScannerApp_data_root())
+  
+  scene_dir = (
+    ios_lidar.Fixtures.threeDScannerApp_data_root() /
+      'charuco-test-fixture-highres')
+  
+
+  ### Test RGB
+
+  frame_json_path = scene_dir / 'frame_00012.json'
+  ci = ios_lidar.threeDScannerApp_create_camera_image(frame_json_path)
+  EXTRA_EXPECTED = {
+    'threeDScannerApp.altitude': '44.56772631313652',
+    'threeDScannerApp.averageAngularVelocity': '0.06802098453044891',
+    'threeDScannerApp.averageVelocity': '0.05275433138012886',
+    'threeDScannerApp.cameraGrain': '0',
+    'threeDScannerApp.exposureDuration': '0.016393441706895828',
+    'threeDScannerApp.frame_id': '00012',
+    'threeDScannerApp.frame_index': '12',
+    'threeDScannerApp.frame_json_name': 'frame_00012.json',
+    'threeDScannerApp.gpsTime': '1637363870.0003262',
+    'threeDScannerApp.hasGPS': 'true',
+    'threeDScannerApp.horizontalAccuracy': '16.303965550780777',
+    'threeDScannerApp.img_path': 'frame_00012.jpg',
+    'threeDScannerApp.intrinsics': '[1455.00341796875, 0, 980.0227661132812, 0, '
+                                    '1455.00341796875, 713.0078125, 0, 0, 1]',
+    'threeDScannerApp.latitude': '37.77783127898064',
+    'threeDScannerApp.longitude': '-122.39396648365398',
+    'threeDScannerApp.motionQuality': '0.9805654287338257',
+    'threeDScannerApp.projectionMatrix': '[1.5156285762786865, 0, '
+                                          '-0.021377921104431152, 0, 0, '
+                                          '2.0208380222320557, '
+                                          '-0.009016871452331543, 0, 0, 0, '
+                                          '-0.9999997615814209, '
+                                          '-0.0009999998146668077, 0, 0, -1, 0]',
+    'threeDScannerApp.scan_dir': 'charuco-test-fixture-highres',
+    'threeDScannerApp.time': '942393.5939737086',
+    'threeDScannerApp.verticalAccuracy': '6.213013810869289'
+  }
+  assert ci.extra == EXTRA_EXPECTED
+  
+  img = ci.image
+  assert (ci.height, ci.width) == (1440, 1920)
+  assert img.shape[:3] == (1440, 1920, 3)
+
+  frame_id = ci.extra['threeDScannerApp.frame_id']
+  assert int(frame_id) == 12
+
+  ### Test Mesh Projection / Calibration
+
+  pc = ios_lidar.threeDScannerApp_create_point_cloud_from_mesh(
+          scene_dir / 'export.obj')
+  
+  # Cloud is in world frame, put it in the ego frame of the camera
+  cloud = pc.get_cloud()
+  pc.cloud = ci.ego_pose['world', 'ego'].apply(cloud).T
+  
+  outdir = testutil.test_tempdir(
+            'test_threeDScannerApp_create_camera_image_highres')
+
+  imageio.imwrite(
+    outdir / ('projected_lidar_%s_5mm.png' % frame_id),
+    ci.get_debug_image(clouds=[pc], period_meters=0.005))
+
+  imageio.imwrite(
+    outdir / ('front_rv_debug_%s.png' % frame_id),
+    pc.get_front_rv_debug_image(
+            camera_images=[ci],
+            z_bounds_meters=(-1.5, 1.5),
+            y_bounds_meters=(-2.0, 2.0),
+            pixels_per_meter=400))
+  imageio.imwrite(
+    outdir / ('bev_debug_%s.png' % frame_id),
+    pc.get_bev_debug_image(
+            camera_images=[ci],
+            x_bounds_meters=(-.5, .5),
+            y_bounds_meters=(-.6, .6),
+            pixels_per_meter=400))
+
+
+  expected_base = (
+    ios_lidar.Fixtures.threeDScannerApp_test_data_root() / 
+      'test_threeDScannerApp_create_camera_image_highres')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+
+def test_threeDScannerApp_sd_table():
+  testutil.skip_if_fixture_absent(
+      ios_lidar.Fixtures.threeDScannerApp_data_root())
+  
+  with testutil.LocalSpark.sess() as spark:
+    suri = datum.URI.from_str(
+      'psegs://dataset=psegs-ios-lidar-ext&split=threeDScannerApp_data&segment_id=charuco-test-fixture-lowres')
+    sdt = ios_lidar.IOSLidarSDTFactory.get_segment_sd_table(suri, spark=spark)
+    sd_df_actual = sdt.to_spark_df()
+    
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=(
+        ios_lidar.Fixtures.threeDScannerApp_test_data_root() / 
+          'test_threeDScannerApp_charuco-test-fixture-lowres-sd.parquet'))
+
+
+    suri = datum.URI.from_str(
+      'psegs://dataset=psegs-ios-lidar-ext&split=threeDScannerApp_data&segment_id=charuco-test-fixture-highres')
+    sdt = ios_lidar.IOSLidarSDTFactory.get_segment_sd_table(suri, spark=spark)
+    sd_df_actual = sdt.to_spark_df()
+    
+    testutil.check_stamped_datum_dfs_equal(
+      spark,
+      sd_df_actual,
+      sd_df_expected_path=(
+        ios_lidar.Fixtures.threeDScannerApp_test_data_root() / 
+          'test_threeDScannerApp_charuco-test-fixture-highres-sd.parquet'))
+
diff --git a/test/datasets/test_kitti.py b/test/datasets/test_kitti.py
new file mode 100644
index 0000000..260aecf
--- /dev/null
+++ b/test/datasets/test_kitti.py
@@ -0,0 +1,810 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import imageio
+import numpy as np
+
+from psegs import datum
+from psegs import util
+from psegs.datasets import kitti
+
+from test import testutil
+
+
+###############################################################################
+## Utils
+
+def save_projected_lidar(base_dir, outdir, frame, camera, K, lidar_to_cam):
+  with open(base_dir / ('training/velodyne/%s.bin' % frame), 'rb') as f:
+    raw_lidar = np.frombuffer(f.read(), dtype=np.float32).reshape((-1, 4))
+  xyz = raw_lidar[:, :3]
+  # unused: reflectance = raw_lidar[:, 3:]
+
+  img = imageio.imread(base_dir / 'training' / camera / ('%s.png' % frame))
+
+  xyd = K.dot(lidar_to_cam.apply(xyz))
+  xyd[0, :] /= xyd[2, :]
+  xyd[1, :] /= xyd[2, :]
+  xyd = xyd.T
+
+  def filter_behind_cam(my_xyd):
+    my_xyd = my_xyd.T
+    idx_ = np.where(my_xyd[2, :] > 0)
+    idx_ = idx_[0]
+    my_xyd = my_xyd[:, idx_]
+    return my_xyd.T
+  
+  xyd = filter_behind_cam(xyd)
+
+  from psegs.util import plotting as pspl
+  util.log.info("Projecting %s %s ..." % (frame, camera))
+  pspl.draw_xy_depth_in_image(img, xyd, alpha=0.5)
+  imageio.imwrite(
+    outdir / ('projected_lidar_%s_%s.png' % (frame.replace('/', '_'), camera)),
+    img)
+
+
+def save_projected_cuboids(
+    base_dir, cuboids, outdir, frame, camera, K, lidar_to_cam):
+  
+  img = imageio.imread(base_dir / 'training' / camera / ('%s.png' % frame))
+
+  for cuboid in cuboids:
+    # NB: For simplicity, we do NOT filter off-camera cuboids; these will plot
+    # oddly but consistently.
+    cuboid.obj_from_ego.src_frame = 'ego' # In KITTI, lidar = ego
+    cxyz = cuboid.get_box3d()
+    # from psegs.datum.datumutils import maybe_make_homogeneous ~~~~~~~~~~~~~~~~~~~~~
+    # cxyd = calib.P2.dot(maybe_make_homogeneous(cxyz).T)
+    cxyd = K.dot(lidar_to_cam.apply(cxyz))
+    cxyd[0, :] /= cxyd[2, :]
+    cxyd[1, :] /= cxyd[2, :]
+    cxyd = cxyd.T
+    from psegs.util import plotting as pspl
+    from oarphpy.plotting import hash_to_rbg
+    pspl.draw_cuboid_xy_in_image(
+      img, cxyd[:,:2], hash_to_rbg(cuboid.category_name))
+  
+  fname = 'projected_cuboids_%s_%s.png' % (frame.replace('/', '_'), camera)
+  imageio.imwrite(outdir / fname, img)
+
+
+def save_labels_projected_to_lidar(
+  base_dir, outdir, frame, calib, cloud, cuboids):
+
+  ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  ## Note: KITTI Cuboids are in the *camera* frame and must be projected
+  ## into the lidar frame for plotting. This test helps document and 
+  ## ensure this assumption holds.
+  ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  lidar_to_cam = calib.R0_rect @ calib.velo_to_cam_unrectified
+  cam_to_lidar = lidar_to_cam.get_inverse()
+
+  cuboids = [c for c in cuboids if c.category_name != 'DontCare']
+  for c in cuboids:
+    from psegs.datum.transform import Transform
+    obj_from_ego_lidar = cam_to_lidar @ c.obj_from_ego
+    c.obj_from_ego = obj_from_ego_lidar
+    c.obj_from_ego.src_frame = 'ego'
+    c.obj_from_ego.dest_frame = 'obj'
+
+  ## Now create debug images
+  pc = datum.PointCloud(cloud=cloud)
+
+  util.log.info("Projecting BEV %s ..." % frame)
+  import time
+  start = time.time()
+  bev_img = pc.get_bev_debug_image(cuboids=cuboids)
+  print('bev', time.time() - start)
+  fname = 'projected_lidar_labels_bev_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, bev_img)
+
+  util.log.info("Projecting Front RV %s ..." % frame)
+  import time
+  start = time.time()
+  rv_img = pc.get_front_rv_debug_image(cuboids=cuboids)
+  print('rv', time.time() - start)
+  fname = 'projected_lidar_labels_front_rv_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, rv_img)
+
+
+
+###############################################################################
+## Common
+
+MOCK_CALIBRATION = """
+P0: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+P1: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+P2: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+P3: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+R0_rect: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02
+Tr_velo_to_cam: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+Tr_imu_to_velo: 1e+02 2e+02 3e+02 4e+02 5e+02 6e+02 7e+02 8e+02 9e+02 10e+02 11e+02 12e+02
+"""
+
+
+def test_kitti_load_calibration():
+  calib = kitti.Calibration.from_kitti_str(MOCK_CALIBRATION)
+  
+  MOCK_3x4 = np.array([
+       [ 100.,  200.,  300.,  400.],
+       [ 500.,  600.,  700.,  800.],
+       [ 900., 1000., 1100., 1200.],
+  ])
+  MOCK_3x3 = np.array([
+       [100., 200., 300.],
+       [400., 500., 600.],
+       [700., 800., 900.],
+  ])
+  MOCK_R = MOCK_3x4[:3, :3]
+  MOCK_T = MOCK_3x4[:3, 3]
+  MOCK_R0_rect = datum.Transform(
+                  rotation=MOCK_3x3,
+                  src_frame='camera|left_raw',
+                  dest_frame='camera|left_sensor')
+  MOCK_velo_to_cam_unrectified = datum.Transform(
+                  rotation=MOCK_R,
+                  translation=MOCK_T,
+                  src_frame='lidar',
+                  dest_frame='camera|left_grey_raw')
+
+  # Fixtures for derived attributes
+  MOCK_DERIVED_T = np.array([[-3596., -1398.66666667, 1200.]]).T
+  vel_to_cam_left_grey = MOCK_R0_rect @ MOCK_velo_to_cam_unrectified
+
+  RT_left_color = datum.Transform(translation=MOCK_DERIVED_T)
+  MOCK_velo_to_cam_2_rect = RT_left_color @ vel_to_cam_left_grey
+  MOCK_velo_to_cam_2_rect.src_frame = 'ego'
+  MOCK_velo_to_cam_2_rect.dest_frame = 'camera|left'
+
+  RT_right_color = datum.Transform(translation=MOCK_DERIVED_T)
+  MOCK_velo_to_cam_3_rect = RT_right_color @ vel_to_cam_left_grey
+  MOCK_velo_to_cam_3_rect.src_frame = 'ego'
+  MOCK_velo_to_cam_3_rect.dest_frame = 'camera|right'
+
+
+  EXPECTED = kitti.Calibration(
+              P2=MOCK_3x4,
+              P3=MOCK_3x4,
+
+              K2=MOCK_3x4[:3, :3],
+              K3=MOCK_3x4[:3, :3],
+              T2=MOCK_DERIVED_T,
+              T3=MOCK_DERIVED_T,
+              
+              R0_rect=MOCK_R0_rect,
+              velo_to_cam_unrectified=MOCK_velo_to_cam_unrectified,
+              imu_to_velo=
+                datum.Transform(
+                  rotation=MOCK_R,
+                  translation=MOCK_T,
+                  src_frame='oxts',
+                  dest_frame='lidar'),
+              
+              velo_to_cam_2_rect=MOCK_velo_to_cam_2_rect,
+              velo_to_cam_3_rect=MOCK_velo_to_cam_3_rect,
+  )
+
+  assert calib == EXPECTED
+
+
+
+MOCK_OXTS = """
+49.0 8.0 115.0 0.3 0.4 0.5 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25 26 27 28 29
+49.0 8.0 115.0 0.3 0.4 0.5 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25 26 27 28 29
+"""
+
+
+def test_kitti_load_transforms_from_oxts():
+  from scipy.spatial.transform import Rotation as R
+
+  frame_to_xform = kitti.load_transforms_from_oxts(MOCK_OXTS)
+  assert sorted(frame_to_xform.keys()) == [0, 1]
+
+  EXPECT_R = R.from_euler('xyz', [0.3, 0.4, 0.5]).as_matrix()
+  EXPECT_T = np.array([[5.84257256e+05, 4.11667947e+06, 115.0]]).T
+
+  xform = frame_to_xform[0]
+  np.testing.assert_allclose(EXPECT_R, xform.rotation)
+  np.testing.assert_allclose(EXPECT_T, xform.translation)
+  assert xform.src_frame == 'world'
+  assert xform.dest_frame == 'oxts'
+
+
+def test_kitti_archive_file_to_uri():
+  INPUT_EXPECTED_OUT = {
+    ## Object Benchmark
+    ('data_object_label_2.zip', 'training/label_2/006192.txt'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=labels|cuboids&extra.kitti.archive=data_object_label_2.zip&extra.kitti.archive.file=training/label_2/006192.txt&extra.kitti.frame=006192',
+    ('data_object_image_2.zip', 'training/image_2/006192.png'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=camera|left&extra.kitti.archive=data_object_image_2.zip&extra.kitti.archive.file=training/image_2/006192.png&extra.kitti.frame=006192',
+    ('data_object_image_3.zip', 'training/image_3/006192.png'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=camera|right&extra.kitti.archive=data_object_image_3.zip&extra.kitti.archive.file=training/image_3/006192.png&extra.kitti.frame=006192',
+    ('data_object_prev_2.zip', 'training/prev_2/006192_02.png'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=camera|left&extra.kitti.archive=data_object_prev_2.zip&extra.kitti.archive.file=training/prev_2/006192_02.png&extra.kitti.frame=006192&extra.kitti.prev=02',
+    ('data_object_prev_3.zip', 'training/prev_3/006192_02.png'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=camera|right&extra.kitti.archive=data_object_prev_3.zip&extra.kitti.archive.file=training/prev_3/006192_02.png&extra.kitti.frame=006192&extra.kitti.prev=02',
+    ('data_object_velodyne.zip', 'training/velodyne/006192.bin'):
+      'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train&topic=lidar&extra.kitti.archive=data_object_velodyne.zip&extra.kitti.archive.file=training/velodyne/006192.bin&extra.kitti.frame=006192',
+    
+    ## Tracking Benchmark
+    ('data_tracking_label_2.zip', 'training/label_02/0005.txt'):
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005&topic=labels|cuboids&extra.kitti.archive=data_tracking_label_2.zip&extra.kitti.archive.file=training/label_02/0005.txt',
+    ('data_tracking_image_2.zip', 'training/image_02/0005/000039.png'):
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005&topic=camera|left&extra.kitti.frame=000039&extra.kitti.archive=data_tracking_image_2.zip&extra.kitti.archive.file=training/image_02/0005/000039.png',
+    ('data_tracking_image_3.zip', 'training/image_03/0005/000039.png'):
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005&topic=camera|right&extra.kitti.frame=000039&extra.kitti.archive=data_tracking_image_3.zip&extra.kitti.archive.file=training/image_03/0005/000039.png',
+    ('data_tracking_velodyne.zip', 'training/velodyne/0005/000039.bin'):
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005&topic=lidar&extra.kitti.frame=000039&extra.kitti.archive=data_tracking_velodyne.zip&extra.kitti.archive.file=training/velodyne/0005/000039.bin',
+    ('data_tracking_oxts.zip', 'training/oxts/0005.txt'):
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005&topic=ego_pose&extra.kitti.archive=data_tracking_oxts.zip&extra.kitti.archive.file=training/oxts/0005.txt',
+  }
+
+  for args, expected_out in INPUT_EXPECTED_OUT.items():
+    assert (
+      datum.URI.from_str(expected_out) == 
+        kitti.kitti_archive_file_to_uri(*args))
+
+
+## test_kitti_benchmark_to_raw_mapper Fixtures
+
+# Breadcrumbs: 
+# pdf.query(' or '.join("(benchmark == '%s' and b_filename == '%s')" % k for k in keys))
+MOCK_BENCH_TO_RAW_CSV = """
+pandas_id,b_digest,r_digest,benchmark,segment_category,b_filename,r_filename,split,filename,frame,nanostamp,segment,topic
+2719,mock-sha-1,mock-sha-1,data_tracking_image_2.zip,road,training/image_02/0005/000039.png,2011_09_26/2011_09_26_drive_0015_sync/image_02/data/0000000039.png,train,2011_09_26/2011_09_26_drive_0015_sync/image_02/data/0000000039.png,39,1317042727991676,2011_09_26_drive_0015_sync,image_02
+24296,mock-sha-1,mock-sha-1,data_tracking_velodyne.zip,road,training/velodyne/0005/000039.bin,2011_09_26/2011_09_26_drive_0015_sync/velodyne_points/data/0000000039.bin,train,2011_09_26/2011_09_26_drive_0015_sync/velodyne_points/data/0000000039.bin,39,1317042727981173,2011_09_26_drive_0015_sync,velodyne_points
+71594,mock-sha-1,,data_tracking_image_2.zip,,testing/image_02/0009/000039.png,,test,,0,0,,
+83489,mock-sha-1,mock-sha-1,data_object_image_3.zip,residential,training/image_3/006192.png,2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000117.png,train,2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000117.png,117,1317047755608174,2011_09_26_drive_0061_sync,image_03
+96234,mock-sha-1,mock-sha-1,data_tracking_image_3.zip,road,training/image_03/0005/000039.png,2011_09_26/2011_09_26_drive_0015_sync/image_03/data/0000000039.png,train,2011_09_26/2011_09_26_drive_0015_sync/image_03/data/0000000039.png,39,1317042727991186,2011_09_26_drive_0015_sync,image_03
+146436,mock-sha-1,mock-sha-1,data_object_image_2.zip,residential,training/image_2/006192.png,2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000117.png,train,2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000117.png,117,1317047755608672,2011_09_26_drive_0061_sync,image_02
+199206,mock-sha-1,,data_object_image_2.zip,,testing/image_2/006192.png,,test,,0,0,,
+278639,mock-sha-1,mock-sha-1,data_object_velodyne.zip,residential,training/velodyne/006192.bin,2011_09_26/2011_09_26_drive_0061_sync/velodyne_points/data/0000000117.bin,train,2011_09_26/2011_09_26_drive_0061_sync/velodyne_points/data/0000000117.bin,117,1317047755598192,2011_09_26_drive_0061_sync,velodyne_points
+300477,mock-sha-1,mock-sha-1,data_object_prev_3.zip,residential,training/prev_3/006192_02.png,2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000115.png,train,2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000115.png,115,131704775540113,2011_09_26_drive_0061_sync,image_03
+337804,mock-sha-1,mock-sha-1,data_object_prev_2.zip,residential,training/prev_2/006192_02.png,2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000115.png,train,2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000115.png,115,131704775540175,2011_09_26_drive_0061_sync,image_02
+"""
+
+def test_kitti_benchmark_to_raw_mapper_mock():
+  test_tempdir = testutil.test_tempdir(
+                              'test_kitti_benchmark_to_raw_mapper_mock')
+
+  class Fixtures(kitti.Fixtures):
+    @classmethod
+    def bench_to_raw_path(cls):
+      return test_tempdir / 'mock_bench_to_raw_df'
+
+    @classmethod
+    def index_root(cls):
+      return test_tempdir / 'kitti_index_root'
+
+  # We'll test this class that uses mock data
+  class MockBenchmarkToRawMapper(kitti.BenchmarkToRawMapper):
+    FIXTURES = Fixtures
+
+  # First create a mock bench_to_raw parquet table
+  MOCK_BENCH_TO_RAW_PATH = test_tempdir / 'mock_bench_to_raw_df'
+  with testutil.LocalSpark.getOrCreate() as spark:
+    from io import StringIO
+    import pandas as pd
+    pdf = pd.read_csv(StringIO(MOCK_BENCH_TO_RAW_CSV))
+    pdf.fillna(value='', inplace=True)
+    df = spark.createDataFrame(pdf)
+    df.write.parquet(str(Fixtures.bench_to_raw_path()))
+
+  # Now build index ...
+  with testutil.LocalSpark.getOrCreate() as spark:
+    MockBenchmarkToRawMapper.setup(spark)
+
+  # ... and create a mapper to test.
+  mapper = MockBenchmarkToRawMapper()
+  
+  # Now test BenchmarkToRawMapper logic!
+  INPUT_EXPECTED_OUT = {
+    ## Object Benchmark
+    ('data_object_label_2.zip', 'training/label_2/006192.txt'):
+      (1317047755608672,
+      {'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.timestamp': '1317047755608672'}),
+    
+    ('data_object_image_2.zip', 'training/image_2/006192.png'):
+      (1317047755608672,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000117.png',
+        'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317047755608672'}),
+    
+    ('data_object_image_3.zip', 'training/image_3/006192.png'):
+      (1317047755608174,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000117.png',
+        'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317047755608174'}),
+    
+    ('data_object_image_2.zip', 'testing/image_2/006192.png'):
+      (620200000000,
+      {}),
+
+    ('data_object_prev_2.zip', 'training/prev_2/006192_02.png'):
+      (131704775540175,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000115.png',
+        'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '131704775540175'}),
+    
+    ('data_object_prev_3.zip', 'training/prev_3/006192_02.png'):
+      (131704775540113,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0061_sync/image_03/data/0000000115.png',
+        'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '131704775540113'}),
+    
+    ('data_object_velodyne.zip', 'training/velodyne/006192.bin'):
+      (1317047755598192,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0061_sync/velodyne_points/data/0000000117.bin',
+        'kitti.raw.segment': '2011_09_26_drive_0061_sync',
+        'kitti.raw.segment_category': 'residential',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317047755598192'}),
+
+
+    ## Tracking Benchmark
+    ('data_tracking_label_2.zip', 'training/label_02/0005.txt', '000039'):
+      (1317042727991676,
+      {'kitti.raw.segment': '2011_09_26_drive_0015_sync',
+        'kitti.raw.segment_category': 'road',
+        'kitti.raw.timestamp': '1317042727991676'}),
+    
+    ('data_tracking_image_2.zip', 'training/image_02/0005/000039.png'):
+      (1317042727991676,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0015_sync/image_02/data/0000000039.png',
+        'kitti.raw.segment': '2011_09_26_drive_0015_sync',
+        'kitti.raw.segment_category': 'road',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317042727991676'}),
+    
+    ('data_tracking_image_2.zip', 'testing/image_02/0009/000039.png'):
+      (4900000000,
+      {}),
+    
+    ('data_tracking_image_3.zip', 'training/image_03/0005/000039.png'):
+      (1317042727991186,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0015_sync/image_03/data/0000000039.png',
+        'kitti.raw.segment': '2011_09_26_drive_0015_sync',
+        'kitti.raw.segment_category': 'road',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317042727991186'}),
+
+    ('data_tracking_velodyne.zip', 'training/velodyne/0005/000039.bin'):
+      (1317042727981173,
+      {'kitti.raw.filename': '2011_09_26/2011_09_26_drive_0015_sync/velodyne_points/data/0000000039.bin',
+        'kitti.raw.segment': '2011_09_26_drive_0015_sync',
+        'kitti.raw.segment_category': 'road',
+        'kitti.raw.sha-1': 'mock-sha-1',
+        'kitti.raw.timestamp': '1317042727981173'}),
+
+    ('data_tracking_oxts.zip', 'training/oxts/0005.txt', '000039'):
+      (4900000000, {}),
+  }
+  
+  for args, expected_out in INPUT_EXPECTED_OUT.items():
+    uri = kitti.kitti_archive_file_to_uri(*args[:2])
+    
+    # For oxts and labels test, we include frame
+    if len(args) == 3:
+      uri.extra['kitti.frame'] = args[-1]
+    
+    extra = mapper.get_extra(uri)
+    mapper.fill_timestamp(uri)
+    
+    et, exp_extra = expected_out
+    assert exp_extra == extra
+    assert et == uri.timestamp
+
+
+###############################################################################
+## Object Benchmark
+
+MOCK_OBJECT_LABEL = """
+Cyclist 0.0 0 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 3.14159 13.
+Pedestrian 0.1 10 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 1.570796 13.
+"""
+
+
+def test_kitti_object_load_label():
+  from scipy.spatial.transform import Rotation as R
+  
+  cuboids, bboxes = kitti.parse_object_label_cuboids(MOCK_OBJECT_LABEL)
+
+  EXPECTED_CUBOIDS = [
+      datum.Cuboid(
+        category_name='Cyclist', 
+        extra={
+          'kitti.truncated': '0.0',
+          'kitti.occluded': '0',
+          'kitti.score': '13.0',
+          'kitti.cam_relative_yaw': '1.0',
+        },
+        length_meters=8.0,
+        width_meters=6.0,
+        height_meters=7.0,
+        obj_from_ego=datum.Transform(
+          rotation=R.from_euler('yzx', [3.14159, 0, 0]).as_matrix(),
+          translation=[9., 7., 11.],
+          src_frame='camera|left',
+          dest_frame='obj'),
+      ),
+      datum.Cuboid(
+        category_name='Pedestrian',
+        extra={
+          'kitti.truncated': '0.1',
+          'kitti.occluded': '10',
+          'kitti.score': '13.0',
+          'kitti.cam_relative_yaw': '1.0',
+        },
+        length_meters=8.0,
+        width_meters=6.0,
+        height_meters=7.0,
+        obj_from_ego=datum.Transform(
+          rotation=R.from_euler('yzx', [1.570796, 0, 0]).as_matrix(),
+          translation=[9., 7., 11.],
+          src_frame='camera|left',
+          dest_frame='obj'),
+      ),
+  ]
+  assert cuboids == EXPECTED_CUBOIDS
+
+  EXPECTED_BBOXES = [
+    datum.BBox2D(
+      x=2, y=3, width=13, height=3, category_name='Cyclist',
+      extra={
+        'kitti.truncated': '0.0',
+        'kitti.occluded': '0',
+        'kitti.score': '13.0',
+        'kitti.cam_relative_yaw': '1.0'
+    }),
+    datum.BBox2D(
+      x=2, y=3, width=13, height=3, category_name='Pedestrian', 
+      extra={
+        'kitti.truncated': '0.1', 
+        'kitti.occluded': '10', 
+        'kitti.score': '13.0', 
+        'kitti.cam_relative_yaw': '1.0'}
+  )]
+
+  assert bboxes == EXPECTED_BBOXES
+
+
+def test_kitti_object_lidar_camera_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti.Fixtures.object_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_object_lidar_camera_projection')
+
+  for frame in kitti.Fixtures.OBJ_TEST_FRAMES:
+    calib_path = base_dir / ('training/calib/%s.txt' % frame)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_2', calib.K2, calib.velo_to_cam_2_rect)
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_3', calib.K3, calib.velo_to_cam_3_rect)
+
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_object_lidar_camera_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+def test_kitti_object_label_camera_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+  
+  base_dir = kitti.Fixtures.object_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_object_label_camera_projection')
+
+  for frame in kitti.Fixtures.OBJ_TEST_FRAMES:
+    calib_path = base_dir / ('training/calib/%s.txt' % frame)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+
+    cuboids, bboxes = kitti.parse_object_label_cuboids(
+      open(base_dir / ('training/label_2/%s.txt' % frame), 'r').read())
+
+    def save_projected_bboxes(camera):
+      img = imageio.imread(base_dir / 'training' / camera / ('%s.png' % frame))
+      for bbox in bboxes:
+        h, w = img.shape[:2]
+        bbox.im_height = h
+        bbox.im_width = w
+        bbox.draw_in_image(img)
+      imageio.imwrite(
+        outdir / ('projected_bboxes_%s_%s.png' % (frame, camera)), img)
+
+    save_projected_cuboids(
+      base_dir, cuboids, 
+      outdir, frame,
+      'image_2', calib.K2, datum.Transform(translation=calib.T2))
+    save_projected_cuboids(
+      base_dir, cuboids, 
+      outdir, frame,
+      'image_3', calib.K3, datum.Transform(translation=calib.T3))
+    save_projected_bboxes('image_2')
+    # We don't bother projecting bboxes to the right camera
+
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_object_label_camera_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+def test_kitti_object_label_lidar_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti.Fixtures.object_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_object_label_lidar_projection')
+
+  for frame in kitti.Fixtures.OBJ_TEST_FRAMES:
+    calib_path = base_dir / ('training/calib/%s.txt' % frame)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+
+    cuboids, bboxes = kitti.parse_object_label_cuboids(
+      open(base_dir / ('training/label_2/%s.txt' % frame), 'r').read())
+
+    with open(base_dir / ('training/velodyne/%s.bin' % frame), 'rb') as f:
+      raw_lidar = np.frombuffer(f.read(), dtype=np.float32).reshape((-1, 4))
+    xyz = raw_lidar[:, :3]
+    # unused: reflectance = raw_lidar[:, 3:]
+
+    save_labels_projected_to_lidar(
+      base_dir, outdir, frame, calib, xyz, cuboids)
+    
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_object_label_lidar_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+###############################################################################
+## Tracking Benchmark
+
+MOCK_TRACKING_LABEL = """
+0 1 Cyclist 0.0 0 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 3.14159 13.
+0 2 Pedestrian 0.1 10 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 1.570796 13.
+1 1 Cyclist 0.0 0 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 3.14159 13.
+1 2 Pedestrian 0.1 10 1. 2. 3. 14. 5. 6. 7. 8. 9. 10. 11. 1.570796 13.
+"""
+
+
+def test_kitti_tracking_load_label():
+  from scipy.spatial.transform import Rotation as R
+  
+  f_to_cuboids, f_to_bboxes = kitti.parse_tracking_label_cuboids(
+                                                  MOCK_TRACKING_LABEL)
+
+  assert 0 in f_to_bboxes
+  assert 1 in f_to_bboxes
+  assert 0 in f_to_cuboids
+  assert 1 in f_to_cuboids
+  cuboids = f_to_cuboids[0]
+
+  # Note: these are identical to the cuboids listed in
+  # test_kitti_object_load_label(), except these
+  # have track_id and frame_num populated.
+  EXPECTED_CUBOIDS = [
+      datum.Cuboid(
+        category_name='Cyclist', 
+        track_id='1',
+        extra={
+          'kitti.truncated': '0.0',
+          'kitti.occluded': '0',
+          'kitti.score': '13.0',
+          'kitti.cam_relative_yaw': '1.0',
+          'kitti.track_id': '1',
+          'kitti.frame_num': '0'
+        },
+        length_meters=8.0,
+        width_meters=6.0,
+        height_meters=7.0,
+        obj_from_ego=datum.Transform(
+          rotation=R.from_euler('yzx', [3.14159, 0, 0]).as_matrix(),
+          translation=[9., 7., 11.],
+          src_frame='camera|left',
+          dest_frame='obj'),
+      ),
+      datum.Cuboid(
+        category_name='Pedestrian',
+        track_id='2',
+        extra={
+          'kitti.truncated': '0.1',
+          'kitti.occluded': '10',
+          'kitti.score': '13.0',
+          'kitti.cam_relative_yaw': '1.0',
+          'kitti.track_id': '2',
+          'kitti.frame_num': '0'
+        },
+        length_meters=8.0,
+        width_meters=6.0,
+        height_meters=7.0,
+        obj_from_ego=datum.Transform(
+          rotation=R.from_euler('yzx', [1.570796, 0, 0]).as_matrix(),
+          translation=[9., 7., 11.],
+          src_frame='camera|left',
+          dest_frame='obj'),
+      ),
+  ]
+  assert cuboids == EXPECTED_CUBOIDS
+
+
+def test_kitti_tracking_lidar_camera_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti.Fixtures.tracking_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_tracking_lidar_camera_projection')
+
+  for frame in kitti.Fixtures.TRACKING_TEST_FRAMES:
+    seq, frame_num = frame.split('/')
+    calib_path = base_dir / ('training/calib/%s.txt' % seq)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+    
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_02', calib.K2, calib.velo_to_cam_2_rect)
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_03', calib.K3, calib.velo_to_cam_3_rect)
+
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_tracking_lidar_camera_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+def test_kitti_tracking_label_camera_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti.Fixtures.tracking_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_tracking_label_camera_projection')
+
+  for frame in kitti.Fixtures.TRACKING_TEST_FRAMES:
+    seq, frame_num = frame.split('/')
+    frame_num = int(frame_num)
+    
+    calib_path = base_dir / ('training/calib/%s.txt' % seq)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+    
+    f_to_cuboids, f_to_bboxes = kitti.parse_tracking_label_cuboids(
+      open(base_dir / ('training/label_02/%s.txt' % seq), 'r').read())
+
+    assert frame_num in f_to_bboxes
+    assert frame_num in f_to_cuboids
+    cuboids = f_to_cuboids[frame_num]
+
+    save_projected_cuboids(
+      base_dir, cuboids,
+      outdir, frame, 
+      'image_02', calib.K2, datum.Transform(translation=calib.T2))
+    save_projected_cuboids(
+      base_dir, cuboids, 
+      outdir, frame, 
+      'image_03', calib.K3, datum.Transform(translation=calib.T3))
+    
+    # We don't bother testing bboxes
+
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_tracking_label_camera_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+def test_kitti_tracking_label_lidar_projection():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti.Fixtures.tracking_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_tracking_label_lidar_projection')
+
+  for frame in kitti.Fixtures.TRACKING_TEST_FRAMES:
+    seq, frame_num = frame.split('/')
+    frame_num = int(frame_num)
+    
+    calib_path = base_dir / ('training/calib/%s.txt' % seq)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+    
+    f_to_cuboids, _ = kitti.parse_tracking_label_cuboids(
+      open(base_dir / ('training/label_02/%s.txt' % seq), 'r').read())
+
+    assert frame_num in f_to_cuboids
+    cuboids = f_to_cuboids[frame_num]
+
+    with open(base_dir / ('training/velodyne/%s.bin' % frame), 'rb') as f:
+      raw_lidar = np.frombuffer(f.read(), dtype=np.float32).reshape((-1, 4))
+    xyz = raw_lidar[:, :3]
+    # unused: reflectance = raw_lidar[:, 3:]
+
+    save_labels_projected_to_lidar(base_dir, outdir, frame, calib, xyz, cuboids)
+    
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_tracking_label_lidar_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+###############################################################################
+## Stamped Datum Table
+
+
+def test_kitti_sd_table_tracking():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.ROOT)
+  
+  TEST_TEMPDIR = testutil.test_tempdir('test_kitti_sd_table_tracking')
+
+  class Fixtures(kitti.Fixtures):
+    @classmethod
+    def index_root(cls):
+      return TEST_TEMPDIR / 'kitti_index_root'
+    
+  class TrackingTestTable(kitti.KITTISDTable):
+    INCLUDE_OBJECT_BENCHMARK = False
+    FIXTURES = Fixtures
+
+    @classmethod
+    def table_root(cls):
+      return TEST_TEMPDIR / 'sd_table'
+  
+  with testutil.LocalSpark.sess() as spark:
+    suri = datum.URI.from_str(
+      'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0012')
+    TrackingTestTable.build(spark, only_segments=[suri])
+
+    df = TrackingTestTable.as_df(spark)
+    df.createOrReplaceTempView('seg')
+    spark.sql(""" SELECT uri.topic AS topic, count(*) AS N, MAX(uri.timestamp),  MIN(uri.timestamp) FROM seg GROUP BY topic """).show()
+    import pdb; pdb.set_trace()
+    print() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    """
+    
+    test fixture:
+      * uris, hashes of any image arrays and cloud arrays, hashes of messages? minutes cloudpicklecallables ?
+    
+    """
+
+
+EXPECTED_SEGMENTS = (
+  'psegs://dataset=kitti-object&split=test&segment_id=kitti-object-benchmark-test',
+  'psegs://dataset=kitti-object&split=train&segment_id=kitti-object-benchmark-train',
+  'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0000', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0001', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0002', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0003', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0004', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0005', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0006', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0007', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0008', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0009', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0010', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0011', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0012', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0013', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0014', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0015', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0016', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0017', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0018', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0019', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0020', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0021', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0022', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0023', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0024', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0025', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0026', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0027', 'psegs://dataset=kitti-tracking&split=test&segment_id=kitti-tracking-test-0028',
+  'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0000', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0001', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0002', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0003', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0004', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0005', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0006', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0007', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0008', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0009', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0010', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0011', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0012', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0013', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0014', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0015', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0016', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0017', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0018', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0019', 'psegs://dataset=kitti-tracking&split=train&segment_id=kitti-tracking-train-0020',
+)
+
+def test_kitti_all_segment_uris():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.ROOT)
+  actual = kitti.KITTISDTable.get_all_segment_uris()
+  assert sorted(EXPECTED_SEGMENTS) == sorted(str(uri) for uri in actual)
+
+
+###############################################################################
+## DSUtil Tests
+
+def test_kitti_dsutil_smoke():
+  testutil.skip_if_fixture_absent(kitti.Fixtures.ROOT)
+  testutil.skip_if_fixture_absent(kitti.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  # The above are preconditions, so this should succeed:
+  assert kitti.DSUtil.emplace()
diff --git a/test/datasets/test_kitti_360.py b/test/datasets/test_kitti_360.py
new file mode 100644
index 0000000..85e7f13
--- /dev/null
+++ b/test/datasets/test_kitti_360.py
@@ -0,0 +1,719 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs import datum
+from psegs.datasets import kitti_360
+
+from test import testutil
+
+# NB: cv2 File Storage in _python_ does not support integers, only floats :(
+# import cv2
+# CV2_FILE_NODE_TYPE_TO_NAME = dict(
+#   (getattr(cv2, attr), attr)
+#   for attr in dir(cv2)
+#   if attr.startswith('FILE_NODE_'))
+# CV2_FILE_NODE_INTEGRAL_TYPE_TO_GETTER = {
+#   'FILE_NODE_FLOAT':  lambda n: n.real(),
+#   'FILE_NODE_REAL':   lambda n: n.real(),
+#   'FILE_NODE_INT':    lambda n: n.real(),
+#   'FILE_NODE_STRING': lambda n: n.string(),
+#   'FILE_NODE_MAT':    lambda n: n.mat(),
+# }
+
+# def cvnode_to_python(n):
+#   node_type_name = CV2_FILE_NODE_TYPE_TO_NAME[n.type()]
+#   if node_type_name in CV2_FILE_NODE_INTEGRAL_TYPE_TO_GETTER:
+#     f = CV2_FILE_NODE_INTEGRAL_TYPE_TO_GETTER[node_type_name]
+#     return f(n)
+#   elif node_type_name == 'FILE_NODE_MAP':
+#     return dict(
+#       (k, cvnode_to_python(n.getNode(k)))
+#       for k in n.keys()
+#     )
+#   elif node_type_name == 'FILE_NODE_SEQ':
+#     return [cvnode_to_python(n.at(i)) for i in range(n.size())]
+#   else:
+#     raise ValueError("Don't know how to handle node of type %s: %s" % (
+#       node_type_name, n))
+
+
+def kitti_360_get_parsed_node(d):
+
+  def to_ndarray(d):
+    import numpy as np
+    r = int(d['rows'])
+    c = int(d['cols'])
+    dtype = str(d['dt'])
+    parse = float if dtype == 'f' else int
+    data = [parse(t) for t in d['data'].split() if t]
+    a = np.array(data)
+    return a.reshape((r, c))
+
+  def fill_cuboid(d):
+    # Appears the cuboid bounds are encoded in the RT; in the raw XML, the
+    # vertices are +/- 0.5m for all objects in the XML
+    # FMI https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/annotation.py#L125
+    R = d['transform'][:3, :3]
+    T = d['transform'][:3, 3]
+    v = d['vertices']
+    d['cuboid'] = np.matmul(R, v.T).T + T
+
+  # ??? Not sure what this is about
+  # FMI https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/annotation.py#L154
+  def to_class(label_value):
+    K360_CLASSMAP = {
+      'driveway': 'parking',
+      'ground': 'terrain',
+      'unknownGround': 'ground', 
+      'railtrack': 'rail track'
+    }
+    if label_value in K360_CLASSMAP:
+      return K360_CLASSMAP[label_value]
+    else:
+      return label_value
+
+  out = {
+    'index':            int(d['index']),
+    'label':            str(d['label']),
+    'k360_class_name':  to_class(str(d['label'])),
+    'semanticId_orig':  int(d['semanticId_orig']),
+    'semanticId':       int(d['semanticId']),
+    'instanceId':       int(d['instanceId']),
+    'category':         str(d['category']),
+    'timestamp':        int(d['timestamp']),
+    'dynamic':          int(d['dynamic']),
+    'start_frame':      int(d['start_frame']),
+    'end_frame':        int(d['end_frame']),
+    'transform':        to_ndarray(d['transform']),
+    'vertices':         to_ndarray(d['vertices']),
+    'faces':            to_ndarray(d['faces']),
+  }
+
+  fill_cuboid(out)
+  return out
+
+def test_kitti360_video():
+  T = kitti_360.KITTI360SDTable
+  uris = T.get_uris_for_sequence('2013_05_28_drive_0000_sync')
+  # uris = [u for u in uris if u.extra['kitti-360.frame_id'] == '5661']
+  # import pdb; pdb.set_trace()
+
+  sample_uris = []
+  from collections import defaultdict
+  kitti_fid_to_uris = defaultdict(list)
+  for uri in uris:
+    if 'right' in uri.topic:
+      continue
+    if 'sick' in uri.topic:
+      continue
+    if 'fisheye' in uri.topic:
+      continue
+    if 'dynamic' in uri.topic:
+      continue
+    # if 'fused' in uri.topic:
+    #   continue
+    if uri.topic == 'lidar':
+      continue
+    kitti_fid_to_uris[int(uri.extra['kitti-360.frame_id'])].append(uri)
+  
+  with testutil.LocalSpark.sess() as spark:
+    def entry_to_fid_mframe(entry):
+      import time
+      fid, uris = entry
+      sample_time = time.time()
+      sample = datum.Sample(datums=[T.create_stamped_datum(uri) for uri in uris])
+      sample_time = time.time() - sample_time
+      debug = None
+      for ci in sample.camera_images:
+        if not 'left' in ci.sensor_name:
+          continue
+        debug_time = time.time()
+        debug = ci.get_debug_image(
+                  clouds=sample.lidar_clouds,
+                  cuboids=sample.cuboid_labels)
+        debug_time = time.time() - debug_time
+      assert debug is not None
+      print('sample_time', sample_time, 'debug_time', debug_time)
+      return (fid, debug)
+    
+    tasks = sorted(kitti_fid_to_uris.items())[300:600]
+    print('tasks', len(tasks))
+    import imageio
+    writer = imageio.get_writer('/opt/psegs/psegs_test/debug.mp4', fps=10)
+    from oarphpy import util as oputil
+    for chunk in oputil.ichunked(tasks, 500):
+      entry_rdd = spark.sparkContext.parallelize(chunk)
+      fid_debug = sorted(entry_rdd.map(entry_to_fid_mframe).collect())
+      print('len(fid_debug)', len(fid_debug))
+
+      for fid, debug in fid_debug:
+          writer.append_data(debug)
+    writer.close()
+
+
+def test_kitti360_painted():
+  T = kitti_360.KITTI360SDTable
+  uris = [
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_rect&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=180',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar|fused_static&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=180',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=labels|cuboids&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=180',
+  ]
+  uris = [datum.URI.from_str(u) for u in uris]
+
+  # uris = [
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=ego_pose&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_rect&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_rect&extra.kitti-360.camera=image_01&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_fisheye&extra.kitti-360.camera=image_02&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_fisheye&extra.kitti-360.camera=image_03&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=laser|sick&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=labels|cuboids&extra.kitti-360.frame_id=106',
+  # ]
+  # uris = [datum.URI.from_str(u) for u in uris]
+
+  sample = datum.Sample(datums=[T.create_stamped_datum(uri) for uri in uris])
+
+  
+  testutil.check_sample_debug_images(sample, 'no_exist_yet')
+
+
+def test_kitti360_uris():
+  T = kitti_360.KITTI360SDTable
+  # uris = T.get_uris_for_sequence('2013_05_28_drive_0000_sync')
+  # # uris = [u for u in uris if u.extra['kitti-360.frame_id'] == '5661']
+  
+  # from collections import defaultdict
+  # kitti_fid_to_uris = defaultdict(list)
+  # for uri in uris:
+  #   if 'right' in uri.topic:
+  #     continue
+  #   if 'sick' in uri.topic:
+  #     continue
+  #   if 'fisheye' in uri.topic:
+  #     continue
+  #   if 'dynamic' in uri.topic:
+  #     continue
+  #   if 'fused' in uri.topic:
+  #     continue
+  #   # if uri.topic == 'lidar':
+  #   #   continue
+  #   fid = int(uri.extra['kitti-360.frame_id'])
+  #   if not (99 <= fid <= 110):
+  #     continue
+  #   kitti_fid_to_uris[fid].append(uri)
+  
+  # tasks = sorted(kitti_fid_to_uris.items())
+  # with testutil.LocalSpark.sess() as spark:
+
+  #   def entry_to_wcloud(entry):
+  #     import time
+  #     fid, uris = entry
+  #     sample_time = time.time()
+  #     sample = datum.Sample(datums=[T.create_stamped_datum(uri) for uri in uris])
+  #     sample_time = time.time() - sample_time
+  #     debug = None
+  #     for pc in sample.lidar_clouds:
+  #       if not pc.ego_pose:
+  #         return None
+  #       # return pc.ego_to_sensor.get_inverse().apply(pc.cloud).T
+  #       cloud_ego = pc.ego_to_sensor.get_inverse().apply(pc.cloud[:, :3]).T
+  #       T_world_to_ego = pc.ego_pose
+  #       cloud_world = T_world_to_ego.apply(cloud_ego).T
+  #       return cloud_world
+        
+  #       # T_world_to_velo = (T_world_to_ego @ pc.ego_to_sensor.get_inverse()).get_inverse()
+  #       # wcloud = T_world_to_velo.get_inverse().apply(pc.cloud).T
+  #       # return wcloud
+    
+  #   tasks = sorted(kitti_fid_to_uris.items())
+  #   print('tasks', len(tasks))
+  #   entry_rdd = spark.sparkContext.parallelize(tasks)
+  #   wclouds = entry_rdd.map(entry_to_wcloud).filter(lambda x: x is not None).collect()
+  #   import numpy as np
+  #   wcloud = np.vstack(wclouds)
+  #   print('wcloud.shape', wcloud.shape)
+
+  #   sd = T.create_stamped_datum(datum.URI.from_str('psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_rect&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=106'))
+  #   ci = sd.camera_image
+  #   wcloud_ego = ci.ego_pose.apply(wcloud).T
+  #   xyzrgb = datum.PointCloud.paint_ego_cloud(
+  #               wcloud, camera_images=[ci])
+    
+  #   import open3d as o3d
+  #   pcd = o3d.geometry.PointCloud()
+  #   pcd.points = o3d.utility.Vector3dVector(xyzrgb[:, :3])
+  #   pcd.colors = o3d.utility.Vector3dVector(xyzrgb[:, 3:] / 256.)
+  #   o3d.io.write_point_cloud('/opt/psegs/psegs_test/painted-manual-fuse.ply', pcd)
+  #   return
+
+  #   # import open3d as o3d
+  #   # pcd = o3d.geometry.PointCloud()
+  #   # pcd.points = o3d.utility.Vector3dVector(wcloud)
+  #   # o3d.io.write_point_cloud('/opt/psegs/psegs_test/kitti_fused.ply', pcd)
+  #   # return
+
+  
+
+  uris = [
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=ego_pose&extra.kitti-360.frame_id=106',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_rect&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=106',
+    # 'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_rect&extra.kitti-360.camera=image_01&extra.kitti-360.frame_id=106',
+    # 'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_fisheye&extra.kitti-360.camera=image_02&extra.kitti-360.frame_id=106',
+    # 'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_fisheye&extra.kitti-360.camera=image_03&extra.kitti-360.frame_id=106',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar&extra.kitti-360.frame_id=106',
+    # 'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=laser|sick&extra.kitti-360.frame_id=106',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar|fused_static&extra.kitti-360.frame_id=106',
+    # 'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar|fused_dynamic&extra.kitti-360.frame_id=106',
+    'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=labels|cuboids&extra.kitti-360.frame_id=106'
+  ]
+  uris = [datum.URI.from_str(u) for u in uris]
+
+  # uris = [
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=ego_pose&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_rect&extra.kitti-360.camera=image_00&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_rect&extra.kitti-360.camera=image_01&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|left_fisheye&extra.kitti-360.camera=image_02&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=camera|right_fisheye&extra.kitti-360.camera=image_03&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=lidar&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=laser|sick&extra.kitti-360.frame_id=106',
+  #   'psegs://dataset=kitti-360&split=train&segment_id=2013_05_28_drive_0000_sync&timestamp=1060000000&topic=labels|cuboids&extra.kitti-360.frame_id=106',
+  # ]
+  # uris = [datum.URI.from_str(u) for u in uris]
+
+  sample = datum.Sample(datums=[T.create_stamped_datum(uri) for uri in uris])
+
+  # for pc in sample.lidar_clouds:
+  #   cloud_ego = pc.ego_to_sensor.get_inverse().apply(pc.cloud).T
+  #   xyzrgb = datum.PointCloud.paint_ego_cloud(
+  #               cloud_ego, camera_images=sample.camera_images)
+    
+  #   import open3d as o3d
+  #   pcd = o3d.geometry.PointCloud()
+  #   pcd.points = o3d.utility.Vector3dVector(xyzrgb[:, :3])
+  #   pcd.colors = o3d.utility.Vector3dVector(xyzrgb[:, 3:] / 256.)
+  #   o3d.io.write_point_cloud('/opt/psegs/psegs_test/painted-%s.ply' % pc.sensor_name.replace('|', '_'), pcd)
+
+  testutil.check_sample_debug_images(sample, 'no_exist_yet')
+
+  # for uri in uris:
+  #   sd = T.create_stamped_datum(uri)
+  #   print(sd.uri)
+
+def test_kitti350_play():
+
+  import imageio
+
+  import numpy as np
+
+  from pathlib import Path
+
+
+  # FRAMEID = 8112#8096
+  FRAMEID = 8096
+  FRAMENAME = str(FRAMEID).rjust(10, '0')
+  
+
+  import xmltodict
+  d = xmltodict.parse(open("/outer_root/media/seagates-ext4/au_datas/kitti/kitti-360/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml").read())
+
+  objects = d['opencv_storage']
+  obj_name_to_value = dict(
+    (k, kitti_360_get_parsed_node(v)) for (k, v) in objects.items())
+
+  
+  obs_in_frame = dict(
+    (k, v) for (k, v) in obj_name_to_value.items()
+    if (
+      ((not v['dynamic']) and (v['start_frame'] <= FRAMEID <= v['end_frame'])) or
+      False)   #)(v['dynamic'] and v['index'] == FRAMEID))
+  )
+
+  ROOT = Path('/outer_root/media/seagates-ext4/au_datas/kitti/kitti-360/KITTI-360/')
+
+  calib_cam_to_pose = open(ROOT / 'calibration/calib_cam_to_pose.txt').read()
+  calib_cam_to_velo = open(ROOT / 'calibration/calib_cam_to_velo.txt').read()
+  calib_sick_to_velo = open(ROOT / 'calibration/calib_sick_to_velo.txt').read()
+  perspective = open(ROOT / 'calibration/perspective.txt').read()
+
+  calib = Calibration.from_kitti_360_strs(
+            calib_cam_to_pose,
+            calib_cam_to_velo,
+            calib_sick_to_velo,
+            perspective)
+  
+  img = imageio.imread(ROOT / ('data_2d_raw/2013_05_28_drive_0000_sync/image_00/data_rect/%s.png' % FRAMENAME))
+  img2 = imageio.imread(ROOT / ('data_2d_raw/2013_05_28_drive_0000_sync/image_01/data_rect/%s.png'% FRAMENAME))
+
+  vel_path = ROOT / ('data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/%s.bin' % FRAMENAME)
+  cloud = np.fromfile(vel_path, dtype=np.float32)
+  cloud = np.reshape(cloud, [-1, 4])
+  cloud = cloud[:, :3]
+
+  static_path =  ROOT / 'data_3d_semantics/2013_05_28_drive_0000_sync/static/007968_008291.ply'
+  dynamic_path = ROOT / 'data_3d_semantics/2013_05_28_drive_0000_sync/dynamic/007968_008291.ply'
+
+  import open3d
+  static_cloud = open3d.io.read_point_cloud(str(static_path))
+  dynamic_cloud = open3d.io.read_point_cloud(str(dynamic_path))
+
+
+  # https://github.com/autonomousvision/kitti360Scripts/blob/fc4e92bfe7d7da0a404e58bca3b98660147ca09c/kitti360scripts/helpers/project.py#L65
+  cam0_to_world = ROOT / 'data_poses/2013_05_28_drive_0000_sync/cam0_to_world.txt'
+  poses = np.loadtxt(cam0_to_world)
+  frames = poses[:,0]
+  poses_raw = np.reshape(poses[:, 1:],[-1, 4, 4])
+  cam0_to_world = dict(zip(frames, poses_raw))
+
+  poses_idk = ROOT / 'data_poses/2013_05_28_drive_0000_sync/poses.txt'
+  poses = np.loadtxt(poses_idk)
+  frames = poses[:,0]
+  poses_raw = np.reshape(poses[:, 1:],[-1, 3, 4])
+  poses_idk = dict(zip(frames, poses_raw))
+
+  cuboids_cam = []
+  cuboids_lidar = []
+  print('obs_in_frame', len(obs_in_frame))
+
+  close = None
+  for obj_name, obj in obs_in_frame.items():
+    from psegs import datum
+
+    if obj['k360_class_name'] in ('building', 'garage'):
+      continue
+
+    # IMU frame: x = forward, y = right, z = down
+    # +x +y +z
+    # +x +y -z
+    # +x -y +z
+    # +x -y -z
+    # -x +y -z
+    # -x +y +z
+    # -x -y -z
+    # -x -y +z
+    
+    front_world = obj['cuboid'][[0, 1, 2, 3], :]
+    rear_world = obj['cuboid'][[5, 4, 7, 6], :]
+
+    # Now:
+    # +x +y +z
+    # +x +y -z
+    # +x -y +z
+    # +x -y -z
+    # -x +y +z
+    # -x +y -z
+    # -x -y +z
+    # -x -y -z
+
+    print(front_world - np.mean(obj['cuboid'], axis=0))
+    print(rear_world - np.mean(obj['cuboid'], axis=0))
+
+    # w = 1.5#abs(front_world[0, 0] - rear_world[0, 0])
+    # l = 2.5#abs(front_world[0, 1] - front_world[2, 1])
+    # h = 1.5#abs(front_world[0, 2] - front_world[1, 2])
+    w = np.linalg.norm(front_world[0, :] - front_world[2, :])
+    l = np.linalg.norm(front_world[0, :] - rear_world[0, :])
+    h = np.linalg.norm(front_world[0, :] - front_world[1, :])
+
+    T_world = np.mean(obj['cuboid'], axis=0)
+    # print(obj['cuboid'] - T_world)
+
+    from scipy.spatial.transform import Rotation as R
+    import math
+    heading = front_world[0, :] - rear_world[0, :]
+    heading_hat = heading / np.linalg.norm(heading)
+    X_HAT = np.array([1, 0, 0])
+    cos_theta = heading_hat.dot(X_HAT)
+    rot_axis = np.cross(heading_hat, X_HAT)
+    R_world2 = R.from_rotvec(
+      math.acos(cos_theta) * rot_axis / np.linalg.norm(rot_axis)).as_matrix()
+    # WTF why doesn't this work??
+    
+    # KITTI-360 Transform confounds R and S; we need to separate them.
+    # See also https://math.stackexchange.com/a/1463487
+    obj_sR = obj['transform'][:3, :3]
+    sx = np.linalg.norm(obj_sR[:, 0])
+    sy = np.linalg.norm(obj_sR[:, 1])
+    sz = np.linalg.norm(obj_sR[:, 2])
+    R_world = obj_sR.copy()
+    R_world[:, 0] *= 1. / sx
+    R_world[:, 1] *= 1. / sy
+    R_world[:, 2] *= 1. / sz
+
+    print('R_world - R_world2', R_world - R_world2)
+
+    # heading *= 2 * np.pi # effectively zero rotation about axis
+    # R_world = R.from_rotvec(heading).as_matrix()
+    # R_world = np.eye(3, 3)
+
+    T_world_to_obj = datum.Transform.from_transformation_matrix(
+                          np.column_stack([R_world, T_world]),
+                          src_frame='world',
+                          dest_frame='obj')
+
+    RT_cam0_to_world = cam0_to_world[FRAMEID]
+    
+    RT_world_to_ego = poses_idk[FRAMEID]
+    T_world_to_ego = datum.Transform.from_transformation_matrix(
+                          RT_world_to_ego,
+                          src_frame='world',
+                          dest_frame='ego')
+
+    T_ego_to_velo = (
+        calib.cam_left_raw_to_ego @
+      calib.cam_left_raw_to_velo.get_inverse())
+    T_world_to_velo = (
+      T_world_to_ego @ T_ego_to_velo).get_inverse()
+
+
+    # print('little', cam0_to_world[FRAMEID + 1][:3, 3] - cam0_to_world[FRAMEID][:3, 3])
+    # print('big', cam0_to_world[8122][:3, 3] - cam0_to_world[FRAMEID][:3, 3])
+
+    
+    # R_cam0_to_world = np.linalg.inv(wtf_fwd) @ RT_cam0_to_world[:3, :3] @ wtf_fwd
+
+    R_cam0_to_world = RT_cam0_to_world[:3, :3]
+    T_cam0_to_world = RT_cam0_to_world[:3, 3]
+
+    # if close is None:
+    #   close = obj
+    # else:
+    #   dist = np.linalg.norm(obj['transform'][:3, 3] - T_cam0_to_world)
+    #   cdist = np.linalg.norm(close['transform'][:3, 3] - T_cam0_to_world)
+    #   if dist < cdist:
+    #     close = obj
+
+    # T_cam0_to_world = -T_cam0_to_world[[0, 2, 1]]
+    # R_cam0_to_world = R_cam0_to_world.T
+      # World y and z axes are flipped vs lidar/ego
+      # https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/project.py#L17
+    # import pdb; pdb.set_trace()
+    Tr_cam0_to_world = datum.Transform(
+                          rotation=R_cam0_to_world,
+                          translation=T_cam0_to_world,
+                          src_frame='world',
+                          dest_frame='camera|left_raw')
+                            # Name should be "from" not "to" ?
+                            # https://github.com/autonomousvision/kitti360Scripts/blob/081c08b34a14960611f459f23a0ad049542205c6/kitti360scripts/helpers/project.py#L106
+
+    # print('T_world_to_obj.translation - T_cam0_to_world.translation', T_world_to_obj.translation - T_cam0_to_world.translation)
+    # if np.linalg.norm(T_world_to_obj.translation - T_cam0_to_world.translation) > 10:
+    #   continue
+    
+    # Tr_cam0_to_world name is backwards?
+    T_obj_from_cam = (
+      T_world_to_obj.get_inverse() @ Tr_cam0_to_world).get_inverse()
+    T_obj_from_cam.src_frame = 'ego' # actually cam but tag this way to make psegs happy
+    T_obj_from_cam.dest_frame = 'obj'
+
+
+    # T_obj_from_ego = (
+    #   T_world_to_obj @ 
+    #   Tr_cam0_to_world['world', 'camera|left_raw'])
+    #     # left camera is ego?  or do we need `poses` that has lidar?
+    # T_obj_from_ego.src_frame = 'ego'
+    # T_obj_from_ego.dest_frame = 'obj'
+
+    c = datum.Cuboid(
+          track_id=obj_name,
+          category_name=obj['k360_class_name'], #obj['label'],
+          length_meters=l,
+          width_meters=w,
+          height_meters=h,
+          obj_from_ego=T_obj_from_cam)
+    cuboids_cam.append(c)
+    # import pdb; pdb.set_trace()
+
+    ### Lidar
+
+    # From kitti tracking
+    # kitti_Tr_imu_to_velo = np.array([
+    #   9.999976000000e-01, 7.553071000000e-04, -2.035826000000e-03, 
+    #   -8.086759000000e-01, -7.854027000000e-04, 9.998898000000e-01,
+    #    -1.482298000000e-02, 3.195559000000e-01, 2.024406000000e-03,
+    #     1.482454000000e-02, 9.998881000000e-01, -7.997231000000e-01])
+
+    # kitti_imu_to_velo = datum.Transform.from_transformation_matrix(
+    #   np.reshape(kitti_Tr_imu_to_velo, (3, 4)),
+    #   src_frame='oxts', dest_frame='lidar')
+
+    
+    T_obj_from_velo = (
+      T_world_to_obj.get_inverse() @ T_world_to_ego @ T_ego_to_velo).get_inverse()
+
+      #calib.cam_left_raw_to_velo)
+      # @ )
+          # cam_left_raw_to_velo is really opposite?
+    # print('T_obj_from_ego', T_obj_from_ego.translation)
+    # T_lidar_to_world = (
+    #   T_world_to_obj['obj', 'world'] @ 
+    #   T_cam0_to_world['world', 'camera|left_raw'])
+    
+    # (
+    #   calib.cam_left_raw_to_velo.get_inverse() @ T_cam0_to_world)
+        # TODO: fix name?
+    # T_obj_from_ego = (
+    #   T_world_to_obj['obj', 'world'] @ 
+    #   T_lidar_to_world['world', 'lidar'])
+    #     # left camera is ego?  or do we need `poses` that has lidar?
+    # T_obj_from_ego.rotation = np.eye(3, 3)#T_obj_from_ego.rotation.T
+    # T_obj_from_ego.translation = T_obj_from_ego.translation[[1, 0, 2]]
+    # T_obj_from_ego.translation[1] *= -1
+    # T_obj_from_ego.translation[0] *= -1
+
+    # T_obj_from_ego.translation -= calib.cam_left_raw_to_velo.translation
+
+    # T_obj_from_ego.translation = T_obj_from_ego.rotation.T
+    T_obj_from_velo.src_frame = 'ego' # actually velo but tag this way to make psegs happy
+    T_obj_from_velo.dest_frame = 'obj'
+
+    c = datum.Cuboid(
+          track_id=obj_name,
+          category_name=obj['k360_class_name'],
+          length_meters=l,
+          width_meters=w,
+          height_meters=h,
+          obj_from_ego=T_obj_from_velo)
+    cuboids_lidar.append(c)
+  
+  # print('close', close)
+  # print('T_cam0_to_world', T_cam0_to_world)
+  # print('cdist', cdist, close['transform'][:3, 3] - T_cam0_to_world)
+  # print('cdist future', close['transform'][:3, 3] - cam0_to_world[8122][:3, 3])
+  # assert False
+
+  from psegs import util
+  from psegs import datum
+  outdir = Path('/opt/psegs/test_run_output')
+
+  
+
+  frame = 'yay'
+  pc = datum.PointCloud(cloud=cloud)
+  util.log.info("Projecting BEV %s ..." % frame)
+  import time
+  start = time.time()
+  bev_img = pc.get_bev_debug_image(cuboids=cuboids_lidar)
+  print('bev', time.time() - start)
+  fname = 'projected_lidar_labels_bev_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, bev_img)
+
+  util.log.info("Projecting Front RV %s ..." % frame)
+  import time
+  start = time.time()
+  rv_img = pc.get_front_rv_debug_image(cuboids=cuboids_lidar)
+  print('rv', time.time() - start)
+  fname = 'projected_lidar_labels_front_rv_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, rv_img)
+
+
+
+  frame = 'static'
+  static_cloud_arr = np.asarray(static_cloud.points)
+  
+  # xform = vel_from_cam @ Tr_cam0_to_world['camera|left_raw', 'world']
+  static_cloud_arr = T_world_to_velo.apply(static_cloud_arr).T
+  # static_cloud_arr -= np.mean(static_cloud_arr, axis=0)
+  pc = datum.PointCloud(cloud=static_cloud_arr)
+  util.log.info("Projecting BEV %s ..." % frame)
+  import time
+  start = time.time()
+  bev_img = pc.get_bev_debug_image(cuboids=cuboids_lidar)
+  print('bev', time.time() - start)
+  fname = 'projected_lidar_labels_bev_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, bev_img)
+
+  util.log.info("Projecting Front RV %s ..." % frame)
+  import time
+  start = time.time()
+  rv_img = pc.get_front_rv_debug_image(cuboids=cuboids_lidar)
+  print('rv', time.time() - start)
+  fname = 'projected_lidar_labels_front_rv_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, rv_img)
+
+
+
+  frame = 'dynamic'
+  dynamic_cloud_arr = np.asarray(dynamic_cloud.points)
+  dynamic_cloud_arr -= np.mean(dynamic_cloud_arr, axis=0)
+  pc = datum.PointCloud(cloud=dynamic_cloud_arr)
+  util.log.info("Projecting BEV %s ..." % frame)
+  import time
+  start = time.time()
+  bev_img = pc.get_bev_debug_image(cuboids=cuboids_lidar)
+  print('bev', time.time() - start)
+  fname = 'projected_lidar_labels_bev_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, bev_img)
+
+  util.log.info("Projecting Front RV %s ..." % frame)
+  import time
+  start = time.time()
+  rv_img = pc.get_front_rv_debug_image(cuboids=cuboids_lidar)
+  print('rv', time.time() - start)
+  fname = 'projected_lidar_labels_front_rv_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, rv_img)
+
+
+
+  frame = 'left'
+
+  from psegs.util import plotting as pspl
+
+  util.log.info("Projecting cloud %s ..." % frame)
+
+  cloud2 = calib.cam_left_raw_to_velo.get_inverse().apply(cloud).T
+
+  uvd = calib.cam0_K.dot(cloud2.T).T
+  uvd[:, 1] /= uvd[:, 2]
+  uvd[:, 0] /= uvd[:, 2]
+
+  # import pdb; pdb.set_trace()
+
+
+  debug = img.copy()
+  pspl.draw_xy_depth_in_image(debug, uvd, marker_radius=0, alpha=0.7)
+  for c in cuboids_cam:
+    pts = c.get_box3d()
+    uvd = calib.cam0_K.dot(pts.T).T
+    uvd[:, 1] /= uvd[:, 2]
+    uvd[:, 0] /= uvd[:, 2]
+    if (uvd[:, 2] <= 1e-3).any():
+      continue
+
+    from oarphpy.plotting import hash_to_rbg
+    color = pspl.color_to_opencv(
+      np.array(hash_to_rbg(c.category_name)))
+    pspl.draw_cuboid_xy_in_image(debug, uvd[:, :2], color)
+  fname = 'projected_pts_front_cam_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, debug)
+
+
+
+
+  frame = 'yay_right'
+  util.log.info("Projecting cloud %s ..." % frame)
+
+  cloud2 = calib.cam_left_raw_to_velo.get_inverse().apply(cloud).T
+  cloud2 = calib.RT_01.apply(cloud2).T
+
+  uvd = calib.cam1_K.dot(cloud2.T).T
+  uvd[:, 1] /= uvd[:, 2]
+  uvd[:, 0] /= uvd[:, 2]
+
+
+  debug = img2.copy()
+  pspl.draw_xy_depth_in_image(debug, uvd, marker_radius=0, alpha=0.7)
+  fname = 'projected_pts_front_cam_%s.png' % frame.replace('/', '_')
+  imageio.imwrite(outdir / fname, debug)
+
diff --git a/test/datasets/test_kitti_sf.py b/test/datasets/test_kitti_sf.py
new file mode 100644
index 0000000..93224c4
--- /dev/null
+++ b/test/datasets/test_kitti_sf.py
@@ -0,0 +1,171 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs import datum
+from psegs.datasets import kitti_sf
+
+from test import testutil
+
+def test_kitti_sf_build_export():
+  kitti_sf.KITTISF15SDTableTrainOnly.save_parquet('/tmp/yay_pq_test')
+
+def test_kitti_sf_table_viz():
+  from psegs.table.sd_table_factory import ParquetSDTFactory
+  F = ParquetSDTFactory.factory_for_sd_subdirs('/tmp/yay_pq_test')
+  T = F.create_as_single_table()
+  x = T.to_rich_html()
+  open('/opt/psegs/debug.html', 'w').write(x)
+  print()
+
+
+def test_kitti_sf_create_matched_pair():
+  testutil.skip_if_fixture_absent(kitti_sf.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  uri = datum.URI.from_str(
+    'psegs://split=train&extra.kitti_sf15.frame_id=000016_10')
+  sd = kitti_sf.KITTISF15SDTable._create_matched_pair(uri)
+  mp = sd.matched_pair
+
+  pc = mp.get_point_cloud_in_world_frame()
+
+  xyz = pc.cloud[:, :3]
+  breakpoint()
+
+  import trimesh
+  import numpy as np
+  pc_tmesh_xyz = trimesh.points.PointCloud(vertices=xyz.squeeze(), colors=.3 * np.ones_like(xyz))
+  scene = trimesh.Scene()
+  scene.add_geometry(pc_tmesh_xyz)
+  b = trimesh.exchange.gltf.export_glb(scene)
+  with open('/opt/psegs/debug.mp.glb', 'wb') as f:
+    print('debug.xyz.mp.glb')
+    f.write(b)
+
+
+def test_kitti_sf_stereo_3d_viz():
+  testutil.skip_if_fixture_absent(kitti_sf.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  base_dir = kitti_sf.Fixtures.stereo_fixture_dir()
+  outdir = testutil.test_tempdir('test_kitti_sf_stereo_3d_viz')
+
+  for frame in kitti_sf.Fixtures.STEREO_TEST_FRAMES:
+    
+    disp_path = base_dir / f'training/disp_occ_0/{frame}.png'
+    disp = kitti_sf.kittisf15_load_disp(open(disp_path, 'rb'))
+
+    cam_to_cam_path = (
+      base_dir / f'training/calib_cam_to_cam/{frame.replace("_10", "")}.txt')
+    
+    K_2, K_3, baseline, R_02, T_02, R_03, T_03, P_2, P_3 = kitti_sf.kittisf15_load_calib(open(cam_to_cam_path, 'r').read())
+
+    uv_2_uv_3_depth = kitti_sf.kittisf15_to_stereo_matches(disp, baseline, K_2)
+
+    import trimesh
+    import numpy as np
+    vs = uv_2_uv_3_depth[:, (0, 1, -1)]
+    vs = vs[vs[:,-1] > 0]
+    f_x = K_2[0, 0]
+    f_y = K_2[1, 1]
+    c_x = K_2[0, 2]
+    c_y = K_2[1, 2]
+    # breakpoint()
+    uvd2_x = (vs[:, 0] - c_x) / f_x
+    uvd2_y = (vs[:, 1] - c_y) / f_y
+    uvd2_z = np.ones_like(uvd2_y)
+    uvd2xyz = np.hstack([uvd2_x[:, None], uvd2_y[:, None], uvd2_z[:, None]])
+    uvd2xyz *= vs[:, (-1,)]
+    pc_tmesh_uvd = trimesh.points.PointCloud(vertices=uvd2xyz, colors=np.zeros_like(uvd2xyz))
+    scene = trimesh.Scene()
+    scene.add_geometry(pc_tmesh_uvd)
+    b = trimesh.exchange.gltf.export_glb(scene)
+    with open('/opt/psegs/debug.glb', 'wb') as f:
+      print('debug.glb')
+      f.write(b)
+    
+    import cv2
+    import numpy as np
+    # P_2 = np.eye(3, 4)
+    # P_2[:3, :3] = K_2
+    # P_2[:, 3] = T_00
+    # P_3 = np.eye(3, 4)
+    # P_3[:3, :3] = K_3
+    # P_3[:, 3] = T_01
+    uv_2 = uv_2_uv_3_depth[:, 0:2][uv_2_uv_3_depth[:, -1] > 0]
+    uv_3 = uv_2_uv_3_depth[:, 2:4][uv_2_uv_3_depth[:, -1] > 0]
+    
+    xyzh = cv2.triangulatePoints(P_2, P_3, uv_2.T, uv_3.T)
+    xyz = xyzh.T.copy()
+    # xyz = xyz[:, :3] / xyz[:, (-1,)]
+    xyz = xyz[:, :3] / xyz[:, (-1,)]
+    # xyz = xyz[:, :3]
+    # xyz = cv2.convertPointsFromHomogeneous(xyzh.T)
+    breakpoint()
+
+
+
+
+    pc_tmesh_xyz = trimesh.points.PointCloud(vertices=xyz.squeeze(), colors=.3 * np.ones_like(xyz))
+    scene = trimesh.Scene()
+    scene.add_geometry(pc_tmesh_xyz)
+    b = trimesh.exchange.gltf.export_glb(scene)
+    with open('/opt/psegs/debug.xyz.glb', 'wb') as f:
+      print('debug.xyz.glb')
+      f.write(b)
+    
+    
+    scene = trimesh.Scene()
+    scene.add_geometry(pc_tmesh_uvd)
+    scene.add_geometry(pc_tmesh_xyz)
+    b = trimesh.exchange.gltf.export_glb(scene)
+    with open('/opt/psegs/debug.comp.glb', 'wb') as f:
+      print('debug.xyz.glb')
+      f.write(b)
+    
+    
+    breakpoint()
+    assert False
+
+
+    """
+     * compute uvd viz
+     * save as a point cloud, check binary as well as viz / GLTF
+
+     * create MatchedPair instance
+     * save trimesh viz and test
+
+     
+    """
+
+    calib_path = base_dir / ('training/calib/%s.txt' % frame)
+    calib = kitti.Calibration.from_kitti_str(open(calib_path, 'r').read())
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_2', calib.K2, calib.velo_to_cam_2_rect)
+    save_projected_lidar(
+      base_dir, outdir, frame, 'image_3', calib.K3, calib.velo_to_cam_3_rect)
+
+  # Now test!
+  expected_base = (
+    kitti.Fixtures.EXTERNAL_FIXTURES_ROOT / 
+      'test_kitti_object_lidar_camera_projection')
+  testutil.assert_img_directories_equal(outdir, expected_base)
+
+
+###############################################################################
+## DSUtil Tests
+
+def test_kitti_dsutil_smoke():
+  testutil.skip_if_fixture_absent(kitti_sf.Fixtures.ROOT)
+
+  # The above are preconditions, so this should succeed:
+  assert kitti_sf.DSUtil.emplace()
diff --git a/test/datasets/test_nuscenes.py b/test/datasets/test_nuscenes.py
new file mode 100644
index 0000000..fe1fe49
--- /dev/null
+++ b/test/datasets/test_nuscenes.py
@@ -0,0 +1,296 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+try:
+  import nuscenes
+except ImportError:
+  pytest.skip("Skipping nuscenes-only tests", allow_module_level=True)
+
+from oarphpy import util as oputil
+
+from psegs import datum
+from psegs import util
+from psegs.datasets import nuscenes as psnusc
+
+from test import testutil
+
+
+###############################################################################
+### Test Utils
+
+skip_if_no_nusc_mini = pytest.mark.skipif(
+  not psnusc.NuscFixtures.version_exists('v1.0-mini'),
+  reason="Requires NuScenes v1.0-trainval")
+
+skip_if_no_nusc_trainval = pytest.mark.skipif(
+  not psnusc.NuscFixtures.version_exists('v1.0-trainval'),
+  reason="Requires NuScenes v1.0-mini")
+
+###############################################################################
+### Test NuScenes
+
+@skip_if_no_nusc_mini
+def test_nuscenes_mini_stats():
+  nusc = psnusc.PSegsNuScenes(version='v1.0-mini', verbose=False)
+
+  TABLE_TO_EXPECTED_LENGTH = {
+    'attribute': 8,
+    'calibrated_sensor': 120,
+    'category': 32,
+    'ego_pose': 31206,
+    'instance': 911,
+    'log': 8,
+    'map': 4,
+    'sample': 404,
+    'sample_annotation': 18538,
+    'sample_data': 31206,
+    'scene': 10,
+    'sensor': 12,
+    'visibility': 4
+  }
+
+  actual = nusc.get_table_to_length()
+  if 'lidarseg' in actual:
+    TABLE_TO_EXPECTED_LENGTH['lidarseg'] = 404
+
+  assert actual == TABLE_TO_EXPECTED_LENGTH
+
+
+@skip_if_no_nusc_trainval
+def test_nuscenes_trainval_stats():
+  nusc = psnusc.PSegsNuScenes(version='v1.0-trainval', verbose=False)
+
+  TABLE_TO_EXPECTED_LENGTH = {
+    'attribute': 8,
+    'calibrated_sensor': 10200,
+    'category': 32,
+    'ego_pose': 2631083,
+    'instance': 64386,
+    'log': 68,
+    'map': 4,
+    'sample': 34149,
+    'sample_annotation': 1166187,
+    'sample_data': 2631083,
+    'scene': 850,
+    'sensor': 12,
+    'visibility': 4
+  }
+
+  actual = nusc.get_table_to_length()
+  if 'lidarseg' in actual:
+    TABLE_TO_EXPECTED_LENGTH['lidarseg'] = 34149
+
+  assert actual == TABLE_TO_EXPECTED_LENGTH
+
+# def _check_sample(sample, testname):
+#   prefix = sample.uri.segment_id
+#   # outdir = testutil.test_tempdir(testname + '_' + prefix)
+
+#   from pathlib import Path
+#   outdir = Path('/opt/psegs/test_run_output/')
+#   # oputil.cleandir(outdir)
+
+#   def save(path, img):
+#     import imageio
+#     imageio.imwrite(path, img)
+#     print(path)
+
+#   cuboids = sample.cuboid_labels
+#   for pc in sample.lidar_clouds:
+#     path = outdir / ('%s_bev.png' % pc.sensor_name)
+#     save(path, pc.get_bev_debug_image(cuboids=cuboids))
+    
+#     path = outdir / ('%s_rv.png' % pc.sensor_name)
+#     save(path, pc.get_front_rv_debug_image(cuboids=cuboids))
+
+#   for ci in sample.camera_images:
+#     path = outdir / ('%s_debug.png' % ci.sensor_name)
+#     save(
+#       path,
+#       ci.get_debug_image(
+#         clouds=sample.lidar_clouds,
+#         cuboids=cuboids))
+  
+  
+
+  # datum_rdd = T.get_segment_datum_rdd(spark, myseg)
+  # print('datum_rdd.count()', datum_rdd.count())
+  # datums = datum_rdd.take(10)
+  # import ipdb; ipdb.set_trace()
+
+def test_nuscenes_fused_lidar():
+  samples = ['psegs://segment_id=scene-0594']
+  suri = samples[0]
+
+  T = psnusc.NuscStampedDatumTableFactory
+  with testutil.LocalSpark.getOrCreate() as spark:
+    datum_rdd = T.get_segment_datum_rdd(spark, suri)
+    datum_rdd = datum_rdd.cache()
+
+    # import ipdb; ipdb.set_trace()
+
+    lidar_rdd = datum_rdd.filter(lambda sd: 'lidar' in sd.uri.topic)
+    def to_world_cloud(sd):
+      pc = sd.point_cloud
+
+      cloud = pc.get_cloud()[:, :3] # TODO: can we keep colors?
+      cloud_ego = pc.ego_to_sensor.get_inverse().apply(cloud).T
+    
+      T_world_to_ego = pc.ego_pose
+      cloud_world = T_world_to_ego.apply(cloud_ego).T
+      return cloud_world
+
+      # world_from_sensor = (
+      #   pc.ego_pose.get_inverse() @ pc.ego_to_sensor.get_inverse())
+      # return world_from_sensor.apply(pc.get_cloud()[:, :3]).T
+    clouds = lidar_rdd.map(to_world_cloud).collect()
+
+    import numpy as np
+    fused_cloud = np.vstack(clouds)
+
+    import open3d as o3d
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(fused_cloud)
+    o3d.io.write_point_cloud('/opt/psegs/test_run_output/fused.ply', pcd)
+
+
+@skip_if_no_nusc_trainval
+def test_nuscenes_create_sd():
+  
+  # SAMPLE_URIS below picked using:
+  # T = psnusc.NuscStampedDatumTableFactory # with only keyframes!!!
+  # uris = T.iter_uris_for_segment('scene-0594') 
+  # uris = [str(u) for u in sorted(uris)]
+  # first_cuboid = None
+  # for u in uris:
+  #   if 'cuboids' in u:
+  #     first_cuboid = u
+  #     break
+  # assert first_cuboid
+  # uris = [u for u in uris if 'cuboids' not in u]
+  # uris = [first_cuboid] + uris
+
+  # Essentially all the data for sample ad4b2f2f60084f479261bfce1448af5e
+  SAMPLE_URIS = [
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943354799000&topic=labels|cuboids&extra.nuscenes-is-keyframe=True&extra.nuscenes-label-channel=CAM_FRONT_LEFT&extra.nuscenes-sample-token=fe6f79aed6ea4b7b9f87be3d68248f54&extra.nuscenes-token=sample_data|d141f680981f4c018e066751c2e8a489',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943354799000&topic=camera|CAM_FRONT_LEFT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|d141f680981f4c018e066751c2e8a489',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943354799000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|d141f680981f4c018e066751c2e8a489',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943362404000&topic=camera|CAM_FRONT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|8673669e2ece4fd2be37583b670d6c89',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943362404000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|8673669e2ece4fd2be37583b670d6c89',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943364426000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|5bb7e6318ccb4ed08d56ed23e0673d43',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943364426000&topic=radar|RADAR_FRONT_LEFT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|5bb7e6318ccb4ed08d56ed23e0673d43',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943367494000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|b595147868d74be9a7b8945c04cb36ee',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943367494000&topic=radar|RADAR_FRONT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|b595147868d74be9a7b8945c04cb36ee',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943370482000&topic=camera|CAM_FRONT_RIGHT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|770d389cc3964c3bae61ff2d032f621e',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943370482000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|770d389cc3964c3bae61ff2d032f621e',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943378113000&topic=camera|CAM_BACK_RIGHT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|8484e12be28f4795afe053f1ce82887d',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943378113000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|8484e12be28f4795afe053f1ce82887d',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943387558000&topic=camera|CAM_BACK&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|43de58db5c714ae791e49712fab4be40',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943387558000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|43de58db5c714ae791e49712fab4be40',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943391357000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|b64f953571e4490eb909454bcb66a9f4',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943391357000&topic=radar|RADAR_BACK_LEFT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|b64f953571e4490eb909454bcb66a9f4',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943397405000&topic=camera|CAM_BACK_LEFT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|affd9e5eed1b480b97e411c3e473fe4a',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943397405000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|affd9e5eed1b480b97e411c3e473fe4a',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943398040000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|b99d95a1c87a4d5c9bb220f6f337203b',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943398040000&topic=radar|RADAR_BACK_RIGHT&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|b99d95a1c87a4d5c9bb220f6f337203b',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943399770000&topic=ego_pose&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=ego_pose|f953a1ecb5a046a49d5d244a57820232',
+    'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&timestamp=1537292943399770000&topic=lidar|LIDAR_TOP&extra.nuscenes-is-keyframe=True&extra.nuscenes-sample-token=ad4b2f2f60084f479261bfce1448af5e&extra.nuscenes-token=sample_data|f953a1ecb5a046a49d5d244a57820232',
+  ]
+  SAMPLE_URIS = [datum.URI.from_str(s) for s in SAMPLE_URIS]
+
+  T = psnusc.NuscStampedDatumTableFactory
+  sample = datum.Sample(datums=[T.create_stamped_datum(u) for u in SAMPLE_URIS])
+  testutil.check_sample_debug_images(sample, 'test_nuscenes_create_sd')
+
+
+def test_nuscenes_yay():
+
+  
+
+  # nusc = psnusc.PSegsNuScenes(
+  #   version='v1.0-trainval',
+  #   dataroot='/outer_root//media/seagates-ext4/au_datas/nuscenes_root/')
+
+  # import pdb; pdb.set_trace()
+
+  # from pprint import pprint
+  # pprint(nusc.get_all_sensors())
+  # pprint(nusc.get_all_classes())
+
+  # pprint(('list_lidarseg_categories', nusc.list_lidarseg_categories(sort_by='count')))
+  # pprint(('lidarseg_idx2name_mapping', nusc.lidarseg_idx2name_mapping))
+
+
+
+  # KEYFRAMES_ONLY = True
+  # with testutil.LocalSpark.getOrCreate() as spark:
+  #   import random
+  #   rand = random.Random(12)
+  #   T = psnusc.NuscStampedDatumTableFactory
+  #   suris = rand.sample(T.get_all_segment_uris(), 3)
+  #   nusc_samples = []
+  #   for suri in suris:
+  #     seg_df = T.get_segment_datum_df(spark, suri)
+  #     if KEYFRAMES_ONLY:
+  #       seg_df = seg_df.where('uri.extra.`nuscenes-is-keyframe` == "True"')
+  #     row = seg_df.select('uri.extra.nuscenes-sample-token').first()
+  #     sample_token = row[0]
+
+  #     sample_df = seg_df.where(
+  #       seg_df['uri.extra.nuscenes-sample-token'] == sample_token)
+      
+  #     sample_uri_df = sample_df.select('uri')
+  #     sample_uris = [r.uri for r in T.sd_df_to_rdd(sample_uri_df).collect()]
+  #     nusc_samples.append(datum.URI.segment_uri_from_datum_uris(sample_uris))
+
+  samples = ['psegs://dataset=nuscenes&split=train_track&segment_id=scene-0594&sel_datums=camera|CAM_BACK,1537292951937558000,camera|CAM_BACK_LEFT,1537292951947405000,camera|CAM_BACK_RIGHT,1537292951928113000,camera|CAM_FRONT,1537292951912404000,camera|CAM_FRONT_LEFT,1537292951904799000,camera|CAM_FRONT_RIGHT,1537292951920482000,ego_pose,1537292951904799000,ego_pose,1537292951912404000,ego_pose,1537292951920482000,ego_pose,1537292951928113000,ego_pose,1537292951933926000,ego_pose,1537292951937558000,ego_pose,1537292951945648000,ego_pose,1537292951947405000,ego_pose,1537292951949628000,ego_pose,1537292951954005000,ego_pose,1537292951954663000,ego_pose,1537292951976984000,labels|cuboids,1537292951904799000,labels|cuboids,1537292951912404000,labels|cuboids,1537292951920482000,labels|cuboids,1537292951928113000,labels|cuboids,1537292951933926000,labels|cuboids,1537292951937558000,labels|cuboids,1537292951945648000,labels|cuboids,1537292951947405000,labels|cuboids,1537292951949628000,labels|cuboids,1537292951954005000,labels|cuboids,1537292951954663000,labels|cuboids,1537292951976984000,lidar|LIDAR_TOP,1537292951949628000,radar|RADAR_BACK_LEFT,1537292951954005000,radar|RADAR_BACK_RIGHT,1537292951954663000,radar|RADAR_FRONT,1537292951945648000,radar|RADAR_FRONT_LEFT,1537292951976984000,radar|RADAR_FRONT_RIGHT,1537292951933926000', 'psegs://dataset=nuscenes&split=train_track&segment_id=scene-0513&sel_datums=camera|CAM_BACK,1535478901787558000,camera|CAM_BACK_LEFT,1535478901797405000,camera|CAM_BACK_RIGHT,1535478901778113000,camera|CAM_FRONT,1535478901762404000,camera|CAM_FRONT_LEFT,1535478901754799000,camera|CAM_FRONT_RIGHT,1535478901770482000,ego_pose,1535478901754799000,ego_pose,1535478901762404000,ego_pose,1535478901770480000,ego_pose,1535478901770482000,ego_pose,1535478901778113000,ego_pose,1535478901787558000,ego_pose,1535478901796360000,ego_pose,1535478901797405000,ego_pose,1535478901803288000,ego_pose,1535478901813085000,ego_pose,1535478901815802000,ego_pose,1535478901832909000,labels|cuboids,1535478901754799000,labels|cuboids,1535478901762404000,labels|cuboids,1535478901770480000,labels|cuboids,1535478901770482000,labels|cuboids,1535478901778113000,labels|cuboids,1535478901787558000,labels|cuboids,1535478901796360000,labels|cuboids,1535478901797405000,labels|cuboids,1535478901803288000,labels|cuboids,1535478901813085000,labels|cuboids,1535478901815802000,labels|cuboids,1535478901832909000,lidar|LIDAR_TOP,1535478901796360000,radar|RADAR_BACK_LEFT,1535478901770480000,radar|RADAR_BACK_RIGHT,1535478901813085000,radar|RADAR_FRONT,1535478901815802000,radar|RADAR_FRONT_LEFT,1535478901803288000,radar|RADAR_FRONT_RIGHT,1535478901832909000', 'psegs://dataset=nuscenes&split=train_detect&segment_id=scene-0750&sel_datums=camera|CAM_BACK,1535656879787558000,camera|CAM_BACK_LEFT,1535656879797405000,camera|CAM_BACK_RIGHT,1535656879778113000,camera|CAM_FRONT,1535656879762404000,camera|CAM_FRONT_LEFT,1535656879754799000,camera|CAM_FRONT_RIGHT,1535656879770482000,ego_pose,1535656879754799000,ego_pose,1535656879762404000,ego_pose,1535656879770482000,ego_pose,1535656879778113000,ego_pose,1535656879781462000,ego_pose,1535656879787558000,ego_pose,1535656879797405000,ego_pose,1535656879801090000,ego_pose,1535656879805167000,ego_pose,1535656879819687000,ego_pose,1535656879823023000,ego_pose,1535656879832112000,labels|cuboids,1535656879754799000,labels|cuboids,1535656879762404000,labels|cuboids,1535656879770482000,labels|cuboids,1535656879778113000,labels|cuboids,1535656879781462000,labels|cuboids,1535656879787558000,labels|cuboids,1535656879797405000,labels|cuboids,1535656879801090000,labels|cuboids,1535656879805167000,labels|cuboids,1535656879819687000,labels|cuboids,1535656879823023000,labels|cuboids,1535656879832112000,lidar|LIDAR_TOP,1535656879801090000,radar|RADAR_BACK_LEFT,1535656879832112000,radar|RADAR_BACK_RIGHT,1535656879805167000,radar|RADAR_FRONT,1535656879819687000,radar|RADAR_FRONT_LEFT,1535656879781462000,radar|RADAR_FRONT_RIGHT,1535656879823023000']
+
+  
+  import imageio
+  T = psnusc.NuscStampedDatumTableFactory
+  with testutil.LocalSpark.getOrCreate() as spark:
+    for suri in samples:
+      sample = T.get_sample(suri, spark=spark)
+      prefix = sample.uri.segment_id
+
+      cuboids = sample.cuboid_labels
+      for pc in sample.lidar_clouds:
+        img = pc.get_bev_debug_image(cuboids=cuboids)
+        imageio.imwrite(
+          '/opt/psegs/test_run_output/%s-%s-bev.png' % (prefix, pc.sensor_name), img)
+
+        img = pc.get_front_rv_debug_image(cuboids=cuboids)
+        imageio.imwrite(
+          '/opt/psegs/test_run_output/%s-%s-rv.png' % (prefix, pc.sensor_name), img)
+
+      for ci in sample.camera_images:
+        img = ci.get_debug_image(clouds=sample.lidar_clouds, cuboids=cuboids)
+        imageio.imwrite(
+          '/opt/psegs/test_run_output/%s-%s-debug.png' % (prefix, ci.sensor_name), img)
+        
+          
+
+      # datum_rdd = T.get_segment_datum_rdd(spark, myseg)
+      # print('datum_rdd.count()', datum_rdd.count())
+      # datums = datum_rdd.take(10)
+      # import ipdb; ipdb.set_trace()
+      print(suri)
+
+
+  # for tests, let's look at both keyframes and interpolated stuff!
+  # rename frame to sample; let's add way to get a sample
+  # from table via URI (and this should be decent no matter backing
+  # store)
\ No newline at end of file
diff --git a/test/datasets/test_tanks_and_temples.py b/test/datasets/test_tanks_and_temples.py
new file mode 100644
index 0000000..b61cf8d
--- /dev/null
+++ b/test/datasets/test_tanks_and_temples.py
@@ -0,0 +1,46 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from test import testutil
+
+from psegs.datasets import tanks_and_temples as tnt
+
+###############################################################################
+## Stamped Datum Table
+
+
+EXPECTED_SEGMENTS = (
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Barn&extra.tnt.scene=Barn',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Caterpillar&extra.tnt.scene=Caterpillar',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Church&extra.tnt.scene=Church',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Courthouse&extra.tnt.scene=Courthouse',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Ignatius&extra.tnt.scene=Ignatius',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Meetingroom&extra.tnt.scene=Meetingroom',
+  'psegs://dataset=tanks-and-temples&split=train&segment_id=Truck&extra.tnt.scene=Truck',
+)
+
+def test_tnt_all_segment_uris():
+  testutil.skip_if_fixture_absent(tnt.Fixtures.ROOT)
+  actual = tnt.TanksAndTemplesSDTable.get_all_segment_uris()
+  assert sorted(EXPECTED_SEGMENTS) == sorted(str(uri) for uri in actual)
+
+###############################################################################
+## DSUtil Tests
+
+def test_tnt_dsutil_smoke():
+  testutil.skip_if_fixture_absent(tnt.Fixtures.ROOT)
+  testutil.skip_if_fixture_absent(tnt.Fixtures.EXTERNAL_FIXTURES_ROOT)
+
+  # The above are preconditions, so this should succeed:
+  assert tnt.DSUtil.emplace()
diff --git a/test/datum/__init__.py b/test/datum/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/test/datum/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/datum/test_bbox2d.py b/test/datum/test_bbox2d.py
new file mode 100644
index 0000000..0d9d542
--- /dev/null
+++ b/test/datum/test_bbox2d.py
@@ -0,0 +1,44 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datum.bbox2d import BBox2D
+
+# BETTERME ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def test_serialization():
+  b = BBox2D(x=1, y=2, width=3)
+
+  import pickle
+  s = pickle.dumps(b)
+  bb = pickle.loads(s)
+
+  assert b == bb
+
+
+def test_x1_y1_x2_y2():
+  b1 = BBox2D.from_x1_y1_x2_y2(0, 0, 9, 9) # Inclusive!!
+  assert b1 == BBox2D(x=0, y=0, width=10, height=10)
+  
+  b2 = BBox2D(x=1, y=0, width=10, height=10)
+  assert b2.get_x1_y1_x2_y2() == (1, 0, 10, 9)
+  assert b2.get_r1_c1_r2_r2() == (0, 1, 9, 10)
+
+
+def test_add_padding():
+  b1 = BBox2D(x=0, y=0, width=1, height=1)
+  b1.add_padding(1)
+  assert b1 == BBox2D(x=-1, y=-1, width=1 + 1 + 1, height=1 + 1 + 1)
+
+  b2 = BBox2D(x=0, y=0, width=1, height=1)
+  b2.add_padding(1, 2)
+  assert b2 == BBox2D(x=-1, y=-2, width=3, height=1 + 2 + 2)
diff --git a/test/datum/test_camera_image.py b/test/datum/test_camera_image.py
new file mode 100644
index 0000000..ed801df
--- /dev/null
+++ b/test/datum/test_camera_image.py
@@ -0,0 +1,18 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def test_camera_image_to_html():
+  print('todo test_camera_image_to_html')
+
+  print('todo test depth_image_to_point_cloud')
diff --git a/test/datum/test_cuboid.py b/test/datum/test_cuboid.py
new file mode 100644
index 0000000..33ea6d1
--- /dev/null
+++ b/test/datum/test_cuboid.py
@@ -0,0 +1,140 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+from psegs.datum.cuboid import Cuboid
+from psegs.datum.transform import Transform
+
+
+def test_cuboid_box3d():
+  c1 = Cuboid(
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[10, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+  np.testing.assert_equal(
+    c1.get_box3d(),
+    np.array([
+      [11,   1,   1],  # Front face
+      [11,  -1,   1],
+      [11,  -1,  -1],
+      [11,   1,  -1],
+      [ 9,   1,   1],  # Back face
+      [ 9,  -1,   1],
+      [ 9,  -1,  -1],
+      [ 9,   1,  -1]
+    ]))
+  
+
+
+def test_cuboid_union_merge():
+
+  c1 = Cuboid(
+          track_id='c1',
+          category_name='c1',
+          ps_category='c1',
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[10, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  c2 = Cuboid(
+          track_id='c2',
+          category_name='c2',
+          ps_category='c2',
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[12, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  actual = Cuboid.get_merged(c1, c2, mode='union')
+
+  expected_union = Cuboid(
+          track_id='c1-union-c2',
+          category_name='c1',
+          ps_category='c1',
+          length_meters=4,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[11, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  assert actual == expected_union
+
+
+def test_cuboid_union_interpolate():
+
+  c1 = Cuboid(
+          track_id='c1',
+          category_name='c1',
+          ps_category='c1',
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[10, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  c2 = Cuboid(
+          track_id='c2',
+          category_name='c2',
+          ps_category='c2',
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[12, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  actual = Cuboid.get_merged(c1, c2, mode='interpolate', alpha=0.5)
+
+  expected_interp = Cuboid(
+          track_id='c1-interpolate-c2',
+          category_name='c1',
+          ps_category='c1',
+          length_meters=2,
+          width_meters=2,
+          height_meters=2,
+          obj_from_ego=
+            Transform(
+              translation=[11, 0, 0],
+              src_frame='ego',
+              dest_frame='obj'))
+
+  assert actual == expected_interp
+
+
+def test_get_interpolated():
+  for _ in range(10):
+    print('todo test_get_interpolated')
diff --git a/test/datum/test_datumutils.py b/test/datum/test_datumutils.py
new file mode 100644
index 0000000..e0285f6
--- /dev/null
+++ b/test/datum/test_datumutils.py
@@ -0,0 +1,67 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import numpy as np
+
+from psegs import datum
+from psegs.datum import datumutils as du
+
+
+def test_maybe_make_homogeneous():
+  np.testing.assert_equal(
+    du.maybe_make_homogeneous(np.array([[0, 0, 0]])),
+    np.array([[0, 0, 0, 1]]))
+  
+  np.testing.assert_equal(
+    du.maybe_make_homogeneous(np.array([[0, 0, 0, 1]])),
+    np.array([[0, 0, 0, 1]]))
+
+
+def test_datum_to_diffable_tree():
+
+  sd1 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+  sd2 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+
+  tree1 = du.datum_to_diffable_tree(sd1)
+  tree2 = du.datum_to_diffable_tree(sd2)
+
+  assert tree1 == tree2
+
+
+  sd1 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+  sd1.camera_image = None
+  sd2 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+
+  tree1 = du.datum_to_diffable_tree(sd1)
+  tree2 = du.datum_to_diffable_tree(sd2)
+
+  assert tree1 != tree2
+
+  difftxt = du.get_datum_diff_string(sd1, sd2)
+  assert "-  'camera_image': None," in difftxt
+  assert "+  'camera_image': {'K':" in difftxt
+
+
+  sd1 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+  sd1.uri.dataset = 'foo'
+  sd2 = copy.deepcopy(datum.STAMPED_DATUM_PROTO)
+
+  assert tree1 != tree2
+
+  difftxt = du.get_datum_diff_string(sd2, sd1)
+  assert "-          'dataset': ''" in difftxt
+  assert "+          'dataset': 'foo'" in difftxt
+
diff --git a/test/datum/test_frame.py b/test/datum/test_frame.py
new file mode 100644
index 0000000..cae5781
--- /dev/null
+++ b/test/datum/test_frame.py
@@ -0,0 +1,33 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datum.frame import Frame
+from psegs.datum.stamped_datum import StampedDatum
+from psegs.datum.uri import URI
+
+
+def test_frame_to_from_uri():
+  def check_eq(frame, uri_str):
+    f_uri = frame.uri
+    assert f_uri == URI.from_str(uri_str)
+    assert str(f_uri) == uri_str
+
+  check_eq(
+    Frame(
+        uri=URI(dataset='d'),
+        datums=[
+          StampedDatum(uri=URI(topic='t', timestamp=1)),
+        ]),
+    'psegs://dataset=d&sel_datums=t,1')
+
diff --git a/test/datum/test_matched_pair.py b/test/datum/test_matched_pair.py
new file mode 100644
index 0000000..b8df0e1
--- /dev/null
+++ b/test/datum/test_matched_pair.py
@@ -0,0 +1,141 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from psegs import datum
+
+from test import testutil
+
+def test_matched_pair_stereo_rect_viz_html():
+  
+  
+  from psegs.datasets import colmap as pscolmap
+
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_colmap'
+  from pathlib import Path
+  FIXTURES_DIR = Path('/outer_root/media/mr0/pwais/psegs-ios-lidar-ext/re-colmap')
+
+
+  # Dump numpy cached assets to a temp dir
+  PSEGS_ASSET_DIR = testutil.test_tempdir(
+      'test_colmap_create_sd_table_for_reconstruction')
+
+  with testutil.LocalSpark.sess() as spark:
+    sdt = pscolmap.COLMAP_SDTFactory.create_sd_table_for_reconstruction(
+              FIXTURES_DIR / 'sparse' / '0',
+              FIXTURES_DIR / 'images',
+              PSEGS_ASSET_DIR,
+              spark=spark)
+    
+    sd_df = sdt.to_spark_df()
+
+
+
+
+    sd_rdd = sdt.get_datum_rdd_matching(only_types=['matched_pair'])
+
+    from pyspark import StorageLevel
+    sd_rdd = sd_rdd.persist(StorageLevel.MEMORY_AND_DISK)
+
+    def get_cam_key(ci):
+      # Try to get a segment-distinct, if not globally distinct, key for a
+      # `camera_image`.  TODO: include affordance for user override (maybe
+      # a dataset / segment has an image id in `extra` ?) and/or include
+      # `URI`s in `MatchedPair` (but )
+      return (
+        ci.sensor_name, ci.timestamp, ci.width, ci.height,
+        tuple(ci.ego_to_sensor.get_transformation_matrix().flatten().tolist()),
+        tuple(ci.ego_pose.get_transformation_matrix().flatten().tolist()),
+      )
+
+    def sd_to_key_plotdatas(sd):
+      mp = sd.matched_pair
+      lkey = get_cam_key(mp.img1)
+      rkey = get_cam_key(mp.img2)
+      return [
+        (lkey, ('key_is_1', sd.uri, mp)),
+        (rkey, ('key_is_2', sd.uri, mp)),
+      ]
+    
+    key_plotdata_rdd = sd_rdd.flatMap(sd_to_key_plotdatas)
+    key_to_plotdatas_rdd = key_plotdata_rdd.groupByKey()
+
+    
+    # distinct_ci_key_rdd = data_rdd.flatMap(
+    #   lambda lkey_rkey_mpuri_mp: 
+    #     tuple(lkey_rkey_mpuri_mp[:2])).distinct()
+    # distinct_keys = sorted(distinct_ci_key_rdd.collect())
+
+    left_key, iter_plotdata = key_to_plotdatas_rdd.first()
+    iter_plotdata = sorted(iter_plotdata)
+
+    ci_left = None
+    ci_rights = []
+    lr_matches = []
+    mp_uris = []
+    temp = 0
+    for indicator, mp_uri, mp in iter_plotdata:
+      img = mp.get_debug_line_image()
+      import imageio
+      imageio.imwrite(f'/opt/psegs/stereo_rect_pair_viz_images/mp_viz_{temp}.jpg', img)
+      temp += 1
+
+      if indicator == 'key_is_1':
+        if ci_left is None:
+          ci_left = mp.img1
+        
+        ci_right = mp.img2
+        matches = mp.get_x1y1x2y2_extra()
+        
+      elif indicator == 'key_is_2':
+        if ci_left is None:
+          ci_left = mp.img2
+        
+        ci_right = mp.img1
+        matches = mp.get_x1y1x2y2_extra()
+
+        # Flip left-right x,y cols since the "left" image in this case is x2y2
+        cols = list(range(matches.shape[1]))
+        cols[0] = 2
+        cols[1] = 3
+        cols[2] = 0
+        cols[3] = 1
+        matches = matches[:, cols]
+        
+      else:
+        raise ValueError(indicator)
+
+      ci_rights.append(ci_right)
+      lr_matches.append(matches)
+      mp_uris.append(mp_uri)
+
+    
+    assert ci_left is not None
+
+    from psegs.datum.matched_pair import create_stereo_rect_pair_debug_view_html
+    html = create_stereo_rect_pair_debug_view_html(
+              ci_left,
+              ci_rights=ci_rights,
+              lr_matches=lr_matches,
+              mp_uris=mp_uris)
+
+    with open('/opt/psegs/mp_test.html', 'w') as f:
+      f.write(html)
+    return
+    breakpoint()
+    sd_df.createOrReplaceTempView('')
+    print()
+  
+
diff --git a/test/datum/test_pobj.py b/test/datum/test_pobj.py
new file mode 100644
index 0000000..745d1cc
--- /dev/null
+++ b/test/datum/test_pobj.py
@@ -0,0 +1,23 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datum.pobj import PObj
+
+
+def test_pobj_html_str():
+  HTML = '<b>i am an html</b>'
+
+  obj = PObj.create_html(html=HTML)
+
+  assert obj.to_html() == HTML
diff --git a/test/datum/test_point_cloud.py b/test/datum/test_point_cloud.py
new file mode 100644
index 0000000..bd92899
--- /dev/null
+++ b/test/datum/test_point_cloud.py
@@ -0,0 +1,22 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def test_point_cloud_to_html():
+  print('todo test_point_cloud_to_html') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+def test_point_cloud_get_bev_debug():
+  print('todo test_point_cloud_get_bev_debug') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+def test_point_cloud_get_rv_debug():
+  print('todo test_point_cloud_get_rv_debug') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/test/datum/test_points2d.py b/test/datum/test_points2d.py
new file mode 100644
index 0000000..65bef15
--- /dev/null
+++ b/test/datum/test_points2d.py
@@ -0,0 +1,33 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import numpy as np
+
+from psegs.datum.points2d import Points2D
+
+
+def test_serialization():
+  pts = Points2D(points_array=np.array([
+    [1., 2.],
+    [3., 4.],
+  ]))
+
+  import pickle
+  s = pickle.dumps(pts)
+  pp = pickle.loads(s)
+
+  assert pts == pp
+
diff --git a/test/datum/test_stamped_datum.py b/test/datum/test_stamped_datum.py
new file mode 100644
index 0000000..7c40c9e
--- /dev/null
+++ b/test/datum/test_stamped_datum.py
@@ -0,0 +1,54 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from psegs.datum.stamped_datum import StampedDatum
+from psegs.datum.transform import Transform
+from psegs.datum.uri import URI
+
+
+# def test_sd_to_from_uri():
+#   def check_eq(sd, s):
+#     # Check StampedDatum -> URI
+#     assert str(sd.uri) == s
+#     assert sd.uri == URI.from_str(s)
+
+#     # Check URI -> StampedDatum
+#     sd_bare = copy.deepcopy(sd)
+#     for k in StampedDatum.__slots__:
+#       default = getattr(StampedDatum(), k)
+#       setattr(sd_bare, k, default)
+#     uri_bare = sd_bare.uri
+#     assert StampedDatum.from_uri(uri_bare) == sd_bare
+#     assert StampedDatum.from_str(str(uri_bare)) == sd_bare
+
+#   check_eq(StampedDatum(), URI.PREFIX)
+
+#   check_eq(
+#     StampedDatum(
+#       dataset='d', split='s', segment_id='s', timestamp=1, topic='t'),
+#     'psegs://dataset=d&split=s&segment_id=s&timestamp=1&topic=t')
+  
+#   check_eq(
+#     StampedDatum(
+#       dataset='d', split='s', segment_id='s', timestamp=1, topic='t',
+#       transform=Transform()),
+#     'psegs://dataset=d&split=s&segment_id=s&timestamp=1&topic=t')
+
+#   # Ensure extra works
+#   check_eq(
+#     StampedDatum(dataset='d', extra={'a': 'foo', 'b': 'bar'}),
+#     'psegs://dataset=d&extra.a=foo&extra.b=bar')
+
diff --git a/test/datum/test_transform.py b/test/datum/test_transform.py
new file mode 100644
index 0000000..0c634e1
--- /dev/null
+++ b/test/datum/test_transform.py
@@ -0,0 +1,110 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import numpy as np
+
+from psegs.datum.transform import Transform
+
+
+def test_transform_force_shape():
+  t = Transform(
+        rotation=[1, 2, 3, 4, 5, 6, 7, 8, 9],
+        translation=np.array([1, 2, 3]))
+  np.testing.assert_equal(
+                t.rotation,
+                np.array([
+                  [1, 2, 3],
+                  [4, 5, 6],
+                  [7, 8, 9]]))
+  np.testing.assert_equal(
+                t.translation,
+                np.array([
+                  [1],
+                  [2],
+                  [3],
+                ]))
+
+
+def test_transform_apply_identity():
+  t = Transform()
+  pts = np.eye(3, 3)
+
+  pts_out = t.apply(pts)
+  np.testing.assert_equal(pts_out, pts)
+
+
+def test_transform_apply_translation():
+  t = Transform(translation=[1, 0, 0])
+  pts = np.eye(3, 3)
+  pts_out = t.apply(pts)
+  xhat = np.array([[1, 0, 0]]).T
+  np.testing.assert_equal(pts_out, pts + xhat)
+
+
+def test_transform_apply_rotation():
+  from scipy.spatial.transform import Rotation as R
+  import math
+
+  # A yaw of pi/4
+  rot = R.from_euler('zxy', [math.pi / 4, 0, 0]).as_matrix()
+  
+  t = Transform(rotation=rot)
+  pts = np.eye(3, 3)
+  pts_out = t.apply(pts)
+  
+  np.testing.assert_almost_equal(
+    pts_out,
+    np.array([
+      [math.sqrt(2) / 2, -math.sqrt(2) / 2, 0],
+      [math.sqrt(2) / 2,  math.sqrt(2) / 2, 0],
+      [               0,                 0, 1],
+    ]))
+
+
+def test_transform_get_xform():
+  t = Transform(translation=[1, 0, 0], src_frame='f1', dest_frame='f2')
+  assert t == t.get_xform('f1', 'f2')
+  assert t.get_inverse() == t.get_xform('f2', 'f1')
+  with pytest.raises(AssertionError):
+    t.get_xform('a', 'b')
+  
+  assert t['f1', 'f2'] == t
+  assert t['f2', 'f1'] == t.get_inverse()
+  with pytest.raises(ValueError):
+    t['moof']
+  with pytest.raises(KeyError):
+    t['a', 'b']
+
+
+def test_transform_chained():
+
+  t1 = Transform(
+    translation=[1., 1., 1.], src_frame='t1_src', dest_frame='t1_dest')
+  t2 = Transform(
+    translation=[2., 2., 2.], src_frame='t2_src', dest_frame='t2_dest')
+  
+  t2_from_t1 = t2 @ t1
+  assert \
+    t2_from_t1 == Transform(
+                    translation=[3., 3., 3.],
+                    src_frame='t1_src',
+                    dest_frame='t2_dest')
+
+  t1_from_t2 = t1 @ t2
+  assert \
+    t1_from_t2 == Transform(
+                    translation=[3., 3., 3.],
+                    src_frame='t2_src',
+                    dest_frame='t1_dest')
diff --git a/test/datum/test_uri.py b/test/datum/test_uri.py
new file mode 100644
index 0000000..79d12df
--- /dev/null
+++ b/test/datum/test_uri.py
@@ -0,0 +1,225 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from psegs.datum.uri import URI
+from psegs.datum.uri import DatumSelection
+
+
+def check_eq(uri, s):
+  assert str(uri) == s
+  
+  # Also exercise URI.__eq__() as well as string deserialization
+  assert uri == URI.from_str(s)
+
+
+def test_uri_basic():
+  check_eq(URI(), URI.PREFIX)
+
+  check_eq(
+    URI(dataset='d', split='s', segment_id='s', timestamp=1, topic='t'),
+    'psegs://dataset=d&split=s&segment_id=s&timestamp=1&topic=t')
+  
+  # String timestamps get converted
+  check_eq(
+    URI(dataset='d', split='s', segment_id='s', timestamp='1', topic='t'),
+    'psegs://dataset=d&split=s&segment_id=s&timestamp=1&topic=t')
+
+  # Special handling for extra
+  check_eq(
+    URI(dataset='d', extra={'a': 'foo', 'b': 'bar'}),
+    'psegs://dataset=d&extra.a=foo&extra.b=bar')
+  
+
+def test_uri_from_str_extended():
+  with_added_extra = URI.from_str('psegs://dataset=d', extra={'key': 'value'})
+  check_eq(
+    with_added_extra,
+    'psegs://dataset=d&extra.key=value')
+
+
+def test_uri_datums():
+
+  check_eq(
+    URI(dataset='d', sel_datums=[DatumSelection(topic='t1', timestamp=1)]),
+    'psegs://dataset=d&sel_datums=t1,1')
+
+  check_eq(
+    URI(dataset='d', sel_datums='t1,1'),
+    'psegs://dataset=d&sel_datums=t1,1')
+
+  check_eq(
+    URI(dataset='d', sel_datums=[('t1','1')]),
+    'psegs://dataset=d&sel_datums=t1,1')
+
+  check_eq(
+    URI(dataset='d', sel_datums=[{'topic': 't1', 'timestamp': 1}]),
+    'psegs://dataset=d&sel_datums=t1,1')
+
+
+def test_uri_datum_sorting():
+  dss = [
+    DatumSelection(topic='t1', timestamp=1),
+    DatumSelection(topic='t1', timestamp=2),
+    DatumSelection(topic='t2', timestamp=1),
+  ]
+  assert dss == sorted(dss)
+
+
+def test_uri_datum_to_datum_uris():
+  def check_eqs(uri, sd_uris):
+    actual_sd_uris = uri.get_datum_uris()
+    assert len(actual_sd_uris) == len(sd_uris)
+    for asd_uri, esd_uri in zip(actual_sd_uris, sd_uris):
+      check_eq(asd_uri, esd_uri)
+  
+  check_eqs(URI(), [])
+  check_eqs(URI(dataset='d'), [])
+
+  check_eqs(
+    URI(dataset='d', sel_datums=[DatumSelection(topic='t1', timestamp=1)]),
+    ['psegs://dataset=d&timestamp=1&topic=t1'])
+
+  sel_datums = [
+    DatumSelection(topic='t1', timestamp=1),
+    DatumSelection(topic='t1', timestamp=2),
+    DatumSelection(topic='t2', timestamp=1),
+  ]
+  check_eqs(
+    URI(dataset='d', sel_datums=sel_datums),
+    [
+      'psegs://dataset=d&timestamp=1&topic=t1',
+      'psegs://dataset=d&timestamp=2&topic=t1',
+      'psegs://dataset=d&timestamp=1&topic=t2',
+    ])
+
+
+def test_segment_uri_from_datum_uris():
+
+  with pytest.raises(Exception):
+    URI.segment_uri_from_datum_uris([])
+
+  assert (
+    URI.from_str('psegs://dataset=d&sel_datums=t1,1') ==
+    URI.segment_uri_from_datum_uris([
+      'psegs://dataset=d&timestamp=1&topic=t1'
+    ]))
+  
+  assert (
+    URI.from_str('psegs://dataset=d&sel_datums=t1,1,t2,1') ==
+    URI.segment_uri_from_datum_uris([
+      'psegs://dataset=d&timestamp=1&topic=t1',
+      'psegs://dataset=d&timestamp=1&topic=t2',
+    ]))
+
+  assert (
+    URI(
+      dataset='d',
+      sel_datums=[URI(topic='t1', timestamp=1), URI(topic='t2', timestamp=1)]) 
+        ==
+    URI.segment_uri_from_datum_uris([
+      'psegs://dataset=d&timestamp=1&topic=t1',
+      'psegs://dataset=d&timestamp=1&topic=t2',
+    ]))
+  
+  from pyspark import Row
+  assert (
+    URI(
+      dataset='d',
+      sel_datums=[
+        Row(topic='t1', timestamp=1, alt='yay'),
+        Row(topic='t2', timestamp=1, moof='foo')]) 
+          ==
+    URI.segment_uri_from_datum_uris([
+      'psegs://dataset=d&timestamp=1&topic=t1',
+      'psegs://dataset=d&timestamp=1&topic=t2',
+    ]))
+  
+  assert (
+    URI(
+      dataset='d',
+      sel_datums=[
+        Row(uri=URI(topic='t1', timestamp=1), alt='yay'),
+        Row(uri=URI(topic='t2', timestamp=1), moof='foo')])
+          ==
+    URI.segment_uri_from_datum_uris([
+      'psegs://dataset=d&timestamp=1&topic=t1',
+      'psegs://dataset=d&timestamp=1&topic=t2',
+    ]))
+
+
+def test_uri_sorting():
+  # A less-complete URI is always less than a more-complete one
+  assert URI() < URI(dataset='d', timestamp=0, topic='t')
+  
+  # Ties are broken using tuple-based encoding
+  u1 = URI(dataset='d', timestamp=1, topic='t')
+  u2 = URI(dataset='d', timestamp=2, topic='t')
+  assert u1 < u2
+  assert u1.as_tuple() < u2.as_tuple()
+  assert str(u1) < str(u2)  # Usually true, but NB timestamps are NOT padded!
+
+
+def test_uri_soft_match():
+  def soft_matches(left, right):
+    left = URI.from_str(left)
+    right = URI.from_str(right)
+    return left.soft_matches_segment_of(right)
+  
+  # Empty URI is a wildcard match for any
+  assert soft_matches(
+            'psegs://',
+            'psegs://segment_id=s1')
+  assert soft_matches(
+            'psegs://',
+            'psegs://dataset=s1')
+  assert soft_matches(
+            'psegs://',
+            'psegs://split=s1')
+
+  # Typically we just match on segment_id
+  assert soft_matches(
+            'psegs://segment_id=s1',
+            'psegs://segment_id=s1')
+  assert not soft_matches(
+            'psegs://segment_id=s1',
+            'psegs://segment_id=nopenope')
+  
+  # lhs can be less precise, but not rhs
+  assert soft_matches(
+            'psegs://segment_id=s1',
+            'psegs://segment_id=s1&dataset=d')
+  assert soft_matches(
+            'psegs://segment_id=s1',
+            'psegs://segment_id=s1&dataset=d2')
+  assert not soft_matches(
+            'psegs://segment_id=s1&dataset=d',
+            'psegs://segment_id=s1')
+
+  assert soft_matches(
+            'psegs://dataset=d',
+            'psegs://dataset=d&segment_id=s1')
+  assert soft_matches(
+            'psegs://split=s',
+            'psegs://split=s&segment_id=s1')
+  assert not soft_matches(
+            'psegs://dataset=d&segment_id=s1',
+            'psegs://dataset=d')
+  assert not soft_matches(
+            'psegs://dataset=d&segment_id=s1',
+            'psegs://segment_id=s1')
+  assert not soft_matches(
+            'psegs://split=s&segment_id=s1',
+            'psegs://split=s')
diff --git a/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00003-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet b/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00003-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet
new file mode 100644
index 0000000..97d3877
Binary files /dev/null and b/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00003-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet differ
diff --git a/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00004-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet b/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00004-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet
new file mode 100644
index 0000000..b94cf4c
Binary files /dev/null and b/test/fixtures/test_DiskCachedFramesVideoSegmentFactory_create_factory_for_video.parquet/part-00004-f5474658-947e-4fe3-a41c-1284139ec901.c000.lz4hadoop.parquet differ
diff --git a/test/fixtures/test_charuco/README.md b/test/fixtures/test_charuco/README.md
new file mode 100644
index 0000000..3c1b4f4
--- /dev/null
+++ b/test/fixtures/test_charuco/README.md
@@ -0,0 +1,2 @@
+The images in this directory were taken from:
+https://github.com/pwais/psegs-ios-lidar-ext/tree/master/threeDScannerApp_data/charuco-test-fixture-lowres
diff --git a/test/fixtures/test_charuco/frame_00000.jpg b/test/fixtures/test_charuco/frame_00000.jpg
new file mode 100755
index 0000000..494dc43
Binary files /dev/null and b/test/fixtures/test_charuco/frame_00000.jpg differ
diff --git a/test/fixtures/test_charuco/frame_00021.jpg b/test/fixtures/test_charuco/frame_00021.jpg
new file mode 100755
index 0000000..fa76b61
Binary files /dev/null and b/test/fixtures/test_charuco/frame_00021.jpg differ
diff --git a/test/fixtures/test_charuco/frame_00045.jpg b/test/fixtures/test_charuco/frame_00045.jpg
new file mode 100755
index 0000000..b056f21
Binary files /dev/null and b/test/fixtures/test_charuco/frame_00045.jpg differ
diff --git a/test/fixtures/test_charuco/frame_00057.jpg b/test/fixtures/test_charuco/frame_00057.jpg
new file mode 100755
index 0000000..d13ddcf
Binary files /dev/null and b/test/fixtures/test_charuco/frame_00057.jpg differ
diff --git a/test/fixtures/test_charuco/frame_00087.jpg b/test/fixtures/test_charuco/frame_00087.jpg
new file mode 100755
index 0000000..f2ce770
Binary files /dev/null and b/test/fixtures/test_charuco/frame_00087.jpg differ
diff --git a/test/fixtures/test_colmap/README.md b/test/fixtures/test_colmap/README.md
new file mode 100644
index 0000000..6a27f22
--- /dev/null
+++ b/test/fixtures/test_colmap/README.md
@@ -0,0 +1,7 @@
+The images in this directory were taken from:
+https://github.com/pwais/psegs-ios-lidar-ext/tree/master/threeDScannerApp_data/charuco-test-fixture-lowres
+The images were resized to reduce disk space use.
+
+And Colmap was run (from this directory) using:
+`colmap automatic_reconstructor --use_gpu=0 --workspace_path . --image_path=./images/ --single_camera=1 --dense=0`
+
diff --git a/test/fixtures/test_colmap/images/frame_00000.jpg b/test/fixtures/test_colmap/images/frame_00000.jpg
new file mode 100755
index 0000000..0f7495a
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00000.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00003.jpg b/test/fixtures/test_colmap/images/frame_00003.jpg
new file mode 100755
index 0000000..82c83c7
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00003.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00006.jpg b/test/fixtures/test_colmap/images/frame_00006.jpg
new file mode 100755
index 0000000..4d51d09
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00006.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00009.jpg b/test/fixtures/test_colmap/images/frame_00009.jpg
new file mode 100755
index 0000000..6586abb
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00009.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00012.jpg b/test/fixtures/test_colmap/images/frame_00012.jpg
new file mode 100755
index 0000000..7465e29
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00012.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00015.jpg b/test/fixtures/test_colmap/images/frame_00015.jpg
new file mode 100755
index 0000000..1e6c9f9
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00015.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00030.jpg b/test/fixtures/test_colmap/images/frame_00030.jpg
new file mode 100755
index 0000000..e3d3642
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00030.jpg differ
diff --git a/test/fixtures/test_colmap/images/frame_00033.jpg b/test/fixtures/test_colmap/images/frame_00033.jpg
new file mode 100755
index 0000000..44c4e69
Binary files /dev/null and b/test/fixtures/test_colmap/images/frame_00033.jpg differ
diff --git a/test/fixtures/test_colmap/sparse/0/cameras.bin b/test/fixtures/test_colmap/sparse/0/cameras.bin
new file mode 100644
index 0000000..f4e5e0d
Binary files /dev/null and b/test/fixtures/test_colmap/sparse/0/cameras.bin differ
diff --git a/test/fixtures/test_colmap/sparse/0/images.bin b/test/fixtures/test_colmap/sparse/0/images.bin
new file mode 100644
index 0000000..ba3527b
Binary files /dev/null and b/test/fixtures/test_colmap/sparse/0/images.bin differ
diff --git a/test/fixtures/test_colmap/sparse/0/points3D.bin b/test/fixtures/test_colmap/sparse/0/points3D.bin
new file mode 100644
index 0000000..d59cd8e
Binary files /dev/null and b/test/fixtures/test_colmap/sparse/0/points3D.bin differ
diff --git a/test/fixtures/test_colmap/sparse/0/project.ini b/test/fixtures/test_colmap/sparse/0/project.ini
new file mode 100644
index 0000000..7fe94e8
--- /dev/null
+++ b/test/fixtures/test_colmap/sparse/0/project.ini
@@ -0,0 +1,201 @@
+log_to_stderr=false
+random_seed=0
+log_level=2
+database_path=./database.db
+image_path=./images/
+[ImageReader]
+single_camera=false
+single_camera_per_folder=false
+existing_camera_id=-1
+default_focal_length_factor=1.2
+mask_path=
+camera_model=SIMPLE_RADIAL
+camera_params=
+camera_mask_path=
+[SiftExtraction]
+use_gpu=true
+estimate_affine_shape=true
+upright=false
+domain_size_pooling=false
+num_threads=-1
+max_image_size=2400
+max_num_features=8192
+first_octave=-1
+num_octaves=4
+octave_resolution=3
+max_num_orientations=2
+dsp_num_scales=10
+peak_threshold=0.0066666666666666671
+edge_threshold=10
+dsp_min_scale=0.16666666666666666
+dsp_max_scale=3
+gpu_index=-1
+[SiftMatching]
+use_gpu=true
+cross_check=true
+multiple_models=false
+guided_matching=true
+num_threads=-1
+max_num_matches=32768
+max_num_trials=10000
+min_num_inliers=15
+max_ratio=0.80000000000000004
+max_distance=0.69999999999999996
+max_error=4
+confidence=0.999
+min_inlier_ratio=0.25
+gpu_index=-1
+[SequentialMatching]
+quadratic_overlap=true
+loop_detection=false
+overlap=10
+loop_detection_period=10
+loop_detection_num_images=50
+loop_detection_num_nearest_neighbors=1
+loop_detection_num_checks=256
+loop_detection_num_images_after_verification=0
+loop_detection_max_num_features=-1
+vocab_tree_path=
+[SpatialMatching]
+is_gps=true
+ignore_z=true
+max_num_neighbors=50
+max_distance=100
+[BundleAdjustment]
+refine_focal_length=true
+refine_principal_point=false
+refine_extra_params=true
+refine_extrinsics=true
+max_num_iterations=100
+max_linear_solver_iterations=200
+function_tolerance=0
+gradient_tolerance=0
+parameter_tolerance=0
+[Mapper]
+ignore_watermarks=false
+multiple_models=true
+extract_colors=true
+ba_refine_focal_length=true
+ba_refine_principal_point=false
+ba_refine_extra_params=true
+ba_global_use_pba=false
+fix_existing_images=false
+tri_ignore_two_view_tracks=true
+min_num_matches=15
+max_num_models=50
+max_model_overlap=20
+min_model_size=10
+init_image_id1=-1
+init_image_id2=-1
+init_num_trials=200
+num_threads=-1
+ba_min_num_residuals_for_multi_threading=50000
+ba_local_num_images=6
+ba_local_max_num_iterations=30
+ba_global_pba_gpu_index=-1
+ba_global_images_freq=500
+ba_global_points_freq=250000
+ba_global_max_num_iterations=75
+ba_global_max_refinements=5
+ba_local_max_refinements=3
+snapshot_images_freq=0
+init_min_num_inliers=100
+init_max_reg_trials=2
+abs_pose_min_num_inliers=30
+max_reg_trials=3
+tri_max_transitivity=1
+tri_complete_max_transitivity=5
+tri_re_max_trials=1
+min_focal_length_ratio=0.10000000000000001
+max_focal_length_ratio=10
+max_extra_param=1.7976931348623157e+308
+ba_global_images_ratio=1.1000000000000001
+ba_global_points_ratio=1.1000000000000001
+ba_global_max_refinement_change=0.00050000000000000001
+ba_local_max_refinement_change=0.001
+init_max_error=4
+init_max_forward_motion=0.94999999999999996
+init_min_tri_angle=16
+abs_pose_max_error=12
+abs_pose_min_inlier_ratio=0.25
+filter_max_reproj_error=4
+filter_min_tri_angle=1.5
+local_ba_min_tri_angle=6
+tri_create_max_angle_error=2
+tri_continue_max_angle_error=2
+tri_merge_max_reproj_error=4
+tri_complete_max_reproj_error=4
+tri_re_max_angle_error=5
+tri_re_min_ratio=0.20000000000000001
+tri_min_angle=1.5
+snapshot_path=
+[PatchMatchStereo]
+geom_consistency=true
+filter=true
+write_consistency_graph=false
+max_image_size=2400
+window_radius=5
+window_step=1
+num_samples=15
+num_iterations=5
+filter_min_num_consistent=2
+depth_min=-1
+depth_max=-1
+sigma_spatial=-1
+sigma_color=0.20000000298023224
+ncc_sigma=0.60000002384185791
+min_triangulation_angle=1
+incident_angle_sigma=0.89999997615814209
+geom_consistency_regularizer=0.30000001192092896
+geom_consistency_max_cost=3
+filter_min_ncc=0.10000000149011612
+filter_min_triangulation_angle=3
+filter_geom_consistency_max_cost=1
+cache_size=32
+gpu_index=-1
+[Render]
+adapt_refresh_rate=true
+image_connections=false
+min_track_len=3
+refresh_rate=1
+projection_type=0
+max_error=2
+[ExhaustiveMatching]
+block_size=50
+[VocabTreeMatching]
+num_images=100
+num_nearest_neighbors=5
+num_checks=256
+num_images_after_verification=0
+max_num_features=-1
+vocab_tree_path=
+match_list_path=
+[TransitiveMatching]
+batch_size=1000
+num_iterations=3
+[ImagePairsMatching]
+block_size=1225
+[StereoFusion]
+max_image_size=2400
+min_num_pixels=5
+max_num_pixels=10000
+max_traversal_depth=100
+check_num_images=50
+max_reproj_error=2
+max_depth_error=0.0099999997764825821
+max_normal_error=10
+cache_size=32
+[PoissonMeshing]
+depth=13
+num_threads=-1
+point_weight=1
+color=32
+trim=10
+[DelaunayMeshing]
+num_threads=-1
+max_proj_dist=20
+max_depth_dist=0.050000000000000003
+distance_sigma_factor=1
+quality_regularization=1
+max_side_length_factor=25
+max_side_length_percentile=95
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/_SUCCESS b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc
new file mode 100644
index 0000000..215662e
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc
new file mode 100644
index 0000000..b38baf2
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc
new file mode 100644
index 0000000..65a8dce
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc
new file mode 100644
index 0000000..39cbc03
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc
new file mode 100644
index 0000000..f5f8db7
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/.part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet.crc differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet
new file mode 100644
index 0000000..10cfde8
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00000-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet
new file mode 100644
index 0000000..d4b0fea
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00001-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet
new file mode 100644
index 0000000..cf2f19b
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00002-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet
new file mode 100644
index 0000000..cf9a6ce
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00003-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet differ
diff --git a/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet
new file mode 100644
index 0000000..a8a7ed3
Binary files /dev/null and b/test/fixtures/test_colmap/test_colmap_sdt_expected.parquet/dataset=anon/split=anon/segment_id=anon_colmap_recon/part-00004-8f67ee25-9b21-4886-9994-df4c2246b63c.c000.lz4.parquet differ
diff --git a/test/fixtures/test_draw_bbox_in_image.png b/test/fixtures/test_draw_bbox_in_image.png
new file mode 100644
index 0000000..95a0f6f
Binary files /dev/null and b/test/fixtures/test_draw_bbox_in_image.png differ
diff --git a/test/fixtures/test_draw_cuboid_xy_in_image.png b/test/fixtures/test_draw_cuboid_xy_in_image.png
new file mode 100644
index 0000000..fb4f4e7
Binary files /dev/null and b/test/fixtures/test_draw_cuboid_xy_in_image.png differ
diff --git a/test/fixtures/test_draw_depth_in_image.png b/test/fixtures/test_draw_depth_in_image.png
new file mode 100644
index 0000000..108c785
Binary files /dev/null and b/test/fixtures/test_draw_depth_in_image.png differ
diff --git a/test/fixtures/test_draw_xy_depth_in_image.png b/test/fixtures/test_draw_xy_depth_in_image.png
new file mode 100644
index 0000000..c7d724a
Binary files /dev/null and b/test/fixtures/test_draw_xy_depth_in_image.png differ
diff --git a/test/fixtures/test_draw_xy_depth_in_image_radius_2.png b/test/fixtures/test_draw_xy_depth_in_image_radius_2.png
new file mode 100644
index 0000000..58933f0
Binary files /dev/null and b/test/fixtures/test_draw_xy_depth_in_image_radius_2.png differ
diff --git a/test/fixtures/test_draw_xy_depth_in_image_user_colors.png b/test/fixtures/test_draw_xy_depth_in_image_user_colors.png
new file mode 100644
index 0000000..9547be8
Binary files /dev/null and b/test/fixtures/test_draw_xy_depth_in_image_user_colors.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_all_manual.png b/test/fixtures/test_get_ortho_debug_image_all_manual.png
new file mode 100644
index 0000000..f78ef75
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_all_manual.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_autobound.png b/test/fixtures/test_get_ortho_debug_image_autobound.png
new file mode 100644
index 0000000..ef7f49a
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_autobound.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_empty_space.png b/test/fixtures/test_get_ortho_debug_image_empty_space.png
new file mode 100644
index 0000000..38b361e
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_empty_space.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_q1.png b/test/fixtures/test_get_ortho_debug_image_q1.png
new file mode 100644
index 0000000..08f614b
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_q1.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_q2.png b/test/fixtures/test_get_ortho_debug_image_q2.png
new file mode 100644
index 0000000..f0f1def
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_q2.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_q3.png b/test/fixtures/test_get_ortho_debug_image_q3.png
new file mode 100644
index 0000000..2ca4b32
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_q3.png differ
diff --git a/test/fixtures/test_get_ortho_debug_image_q4.png b/test/fixtures/test_get_ortho_debug_image_q4.png
new file mode 100644
index 0000000..8668e1d
Binary files /dev/null and b/test/fixtures/test_get_ortho_debug_image_q4.png differ
diff --git a/test/table/__init__.py b/test/table/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/test/table/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/table/test_sd_db.py b/test/table/test_sd_db.py
new file mode 100644
index 0000000..6ac0d59
--- /dev/null
+++ b/test/table/test_sd_db.py
@@ -0,0 +1,308 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from psegs.datum.camera_image import CameraImage
+from psegs.datum.cuboid import Cuboid
+from psegs.datum.point_cloud import PointCloud
+from psegs.datum.stamped_datum import StampedDatum
+from psegs.datum.transform import Transform
+from psegs.datum.uri import URI
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+import test.testutil as testutil
+
+
+
+from psegs.table.sd_db import NoKnownTable
+from psegs.table.sd_db import StampedDatumDB
+from psegs.table.sd_db import to_seg_uri_str
+
+def test_seg_to_uri_str():
+  def _check(actual, expected):
+    assert to_seg_uri_str(actual) == expected
+  
+  _check(URI(), 'psegs://')
+  _check('psegs://', 'psegs://')
+  _check(URI(dataset='a'), 'psegs://dataset=a')
+  _check('psegs://dataset=a', 'psegs://dataset=a')
+  _check('psegs://dataset=a&topic=b', 'psegs://dataset=a')
+
+  from pyspark import Row
+  _check(Row(moof=1), 'psegs://')
+  _check(Row(dataset='a'), 'psegs://dataset=a')
+  _check(Row(dataset='a', topic='b'), 'psegs://dataset=a')
+
+  with pytest.raises(Exception):
+    to_seg_uri_str('')
+    to_seg_uri_str(object())
+
+
+class TestFactoryBase(StampedDatumTableFactory):
+  TEST_DATUMS = []
+
+  @classmethod
+  def _create_datum_rdds(
+        cls, spark, existing_uri_df=None, only_segments=None):
+    if only_segments:
+      datums = []
+      for suri in only_segments:
+        datums += [
+          sd for sd in cls.TEST_DATUMS
+          if suri.soft_matches_segment_of(sd.uri)
+        ]
+    else:
+      datums = cls.TEST_DATUMS
+    return [spark.sparkContext.parallelize(datums)]
+
+  @classmethod
+  def _get_all_segment_uris(cls):
+    suris_strs = set(str(sd.uri.to_segment_uri()) for sd in cls.TEST_DATUMS)
+    return sorted(suris_strs)
+
+class T1(TestFactoryBase):
+  BASE_URI = URI(dataset='t1', split='s')
+  TEST_DATUMS = [
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt1.1', topic='c1', timestamp=1),
+      camera_image=CameraImage(sensor_name='c1', timestamp=1)),
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt1.1', topic='l1', timestamp=1),
+      point_cloud=PointCloud(sensor_name='l1', timestamp=1)),
+
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt1.2', topic='c', timestamp=1),
+      camera_image=CameraImage(sensor_name='c', timestamp=1)),
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt1.2', topic='c', timestamp=2),
+      camera_image=CameraImage(sensor_name='c', timestamp=2)),
+  ]
+
+class T2(TestFactoryBase):
+  BASE_URI = URI(dataset='t2', split='s')
+  TEST_DATUMS = [
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt2.1', topic='c1', timestamp=1),
+      camera_image=CameraImage(sensor_name='c1', timestamp=1)),
+    StampedDatum(
+      uri=BASE_URI.replaced(
+        segment_id='segt2.2', topic='c1', timestamp=1),
+      camera_image=CameraImage(sensor_name='c1', timestamp=1)),
+  ]
+
+class T3(TestFactoryBase):
+  TEST_DATUMS = ([
+      StampedDatum(
+        uri=URI(dataset='t3', split='s',
+          segment_id='segt3.1', topic='c1', timestamp=t+1),
+        camera_image=CameraImage(sensor_name='c1', timestamp=t+1))
+      for t in range(10)
+    ] + [ 
+      StampedDatum(
+        uri=URI(dataset='t3', split='s',
+          segment_id='segt3.2', topic='c1', timestamp=t+1),
+        camera_image=CameraImage(sensor_name='c1', timestamp=t+1))
+      for t in range(20)
+    ])
+
+def _create_db_simple(spark=None):
+  spark = spark or testutil.LocalSpark.getOrCreate()
+  db = StampedDatumDB([T1, T2, T3], spark=spark)
+  return db
+
+
+
+def test_db_get_sample():
+  def _check_datums(sample, expected_tt):
+    actual_tt = [(sd.uri.topic, sd.uri.timestamp) for sd in sample.datums]
+    assert sorted(actual_tt) == sorted(expected_tt)
+
+  db = _create_db_simple()
+  
+  uri = 'psegs://dataset=t1&segment_id=segt1.1&sel_datums=c1,1'
+  sample = db.get_sample(uri)
+  assert sample.uri == URI.from_str(uri)
+  _check_datums(sample, [('c1', 1)])
+
+  uri = 'psegs://dataset=t1&segment_id=segt1.2&sel_datums=c,2'
+  sample = db.get_sample(uri)
+  assert sample.uri == URI.from_str(uri)
+  _check_datums(sample, [('c', 2)])
+  
+  uri = 'psegs://dataset=t1&segment_id=segt1.2&sel_datums=c,2,c,1'
+  sample = db.get_sample(uri)
+  assert sample.uri == URI.from_str(uri)
+  _check_datums(sample, [('c', 2), ('c', 1)])
+
+  uri = 'psegs://segment_id=segt1.2'
+  sample = db.get_sample(uri)
+  assert URI.from_str(uri).soft_matches_segment_of(sample.uri)
+  _check_datums(sample, [('c', 1), ('c', 2)])
+
+  with pytest.raises(NoKnownTable):
+    uri = 'psegs://dataset=no-existe&segment_id=segt1.2'
+    sample = db.get_sample(uri)
+
+
+
+def _get_actual_uris(datum_df):
+  return [
+    r.uri for r in datum_df.select('uri').rdd.map(T1.from_row).collect()
+  ]
+
+def test_db_get_datum_df_uri_list():
+  db = _create_db_simple()
+
+  def _get_actual_uris(datum_df):
+    return [
+      r.uri for r in datum_df.select('uri').rdd.map(T1.from_row).collect()
+    ]
+
+  uris_exist = [
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=1, topic='c'),
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=2, topic='c'),
+    URI(dataset='t2', split='s', segment_id='segt2.2', timestamp=1, topic='c1'),
+  ]
+  datum_df = db.get_datum_df(uris=uris_exist)
+  actual_uris = _get_actual_uris(datum_df)
+  assert sorted(uris_exist) == sorted(actual_uris)
+
+  uris_no_exist = [
+    URI(dataset='no-exist', segment_id='no-exist', timestamp=1, topic='c1')
+  ]
+  with pytest.raises(NoKnownTable):
+    db.get_datum_df(uris=uris_no_exist)
+
+
+
+def test_db_get_datum_df_uri_rdd():
+  uris_exist = [
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=1, topic='c'),
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=2, topic='c'),
+    URI(dataset='t2', split='s', segment_id='segt2.2', timestamp=1, topic='c1'),
+  ]
+  uris_no_exist = [
+    URI(dataset='no-exist', segment_id='no-exist', timestamp=1, topic='c1')
+  ]
+  with testutil.LocalSpark.sess() as spark:
+    db = _create_db_simple(spark=spark)
+
+    uri_rdd = spark.sparkContext.parallelize(uris_exist + uris_no_exist)
+
+    datum_df = db.get_datum_df(uris=uri_rdd)
+    actual_uris = _get_actual_uris(datum_df)
+    assert sorted(uris_exist) == sorted(actual_uris)
+    
+
+def test_db_get_datum_df_uri_df():
+  uris_exist = [
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=1, topic='c'),
+    URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=2, topic='c'),
+    URI(dataset='t2', split='s', segment_id='segt2.2', timestamp=1, topic='c1'),
+  ]
+  uris_no_exist = [
+    URI(dataset='no-exist', segment_id='no-exist', timestamp=1, topic='c1')
+  ]
+  with testutil.LocalSpark.sess() as spark:
+    db = _create_db_simple(spark=spark)
+
+    from oarphpy.spark import RowAdapter
+    uris = uris_exist + uris_no_exist
+    schema = RowAdapter.to_schema(URI())
+    uri_df = spark.createDataFrame(
+              [RowAdapter.to_row(u) for u in uris], schema=schema)
+    datum_df = db.get_datum_df(uris=uri_df)
+    actual_uris = _get_actual_uris(datum_df)
+    assert sorted(uris_exist) == sorted(actual_uris)
+
+
+def test_db_get_keyed_sample_df():
+  key_uris_exist = [
+    ('k-span-segs',
+      URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=1, topic='c')),
+    ('k-span-segs',
+      URI(dataset='t2', split='s', segment_id='segt2.2', timestamp=1, topic='c1')),
+
+    ('solo-seg-datum',
+      URI(dataset='t1', split='s', segment_id='segt1.2', timestamp=2, topic='c')),
+    
+    ('two-topic',
+      URI(dataset='t1', split='s', segment_id='segt1.1', timestamp=1, topic='c1')),
+    ('two-topic',
+      URI(dataset='t1', split='s', segment_id='segt1.1', timestamp=1, topic='l1')),
+  ]
+  key_uris_exist += [
+    ('many',
+      URI(
+        dataset='t3',
+        split='s',
+        segment_id='segt3.2',
+        timestamp=t+1,
+        topic='c1'))
+    for t in range(20)
+  ]
+  uris_no_exist = [
+    ('solo-seg-datum',
+      URI(dataset='no-exist', segment_id='no-exist', timestamp=1, topic='c1')),
+  ]
+  uris_no_exist += [
+    ('no-exist-many',
+      URI(
+        dataset='t3',
+        split='s',
+        segment_id='segt3.1',
+        timestamp=t+1,
+        topic='c1'))
+    for t in range(30, 100)
+  ]
+  with testutil.LocalSpark.sess() as spark:
+    db = _create_db_simple(spark=spark)
+
+    rows = [{'key': k, 'uri': u} for k, u in key_uris_exist]
+    rows += [{'key': k, 'uri': u} for k, u in uris_no_exist]
+    from oarphpy.spark import RowAdapter
+    rows = [RowAdapter.to_row(r) for r in rows]
+    schema = RowAdapter.to_schema({'key': 's', 'uri': URI()})
+    df = spark.createDataFrame(rows, schema=schema)
+
+    key_sample_df = db.get_keyed_sample_df(df)
+
+    expected_key_to_uris_exist = {}
+    for k, u in key_uris_exist:
+      expected_key_to_uris_exist.setdefault(k, [])
+      expected_key_to_uris_exist[k].append(u)
+    
+    key_sample_df = key_sample_df.persist()
+
+    actual_keys = set(r.key for r in key_sample_df.select('key').collect())
+    assert actual_keys == set(expected_key_to_uris_exist.keys())
+
+    for key, expected_uris in expected_key_to_uris_exist.items():
+      row_df = key_sample_df.filter(key_sample_df.key == key)
+
+      datum_rows = row_df.collect()[0].asDict()['datums']
+      assert len(datum_rows) == len(expected_uris)
+
+      datums = [RowAdapter.from_row(rr) for rr in datum_rows]
+      assert sorted(d.uri for d in datums) == sorted(expected_uris)
+    
+      samp = db.datum_rows_to_sample(datum_rows)
+      assert len(samp.uri.sel_datums) == len(expected_uris)
+      assert len(samp.datums) == len(expected_uris)
diff --git a/test/table/test_sd_table.py b/test/table/test_sd_table.py
new file mode 100644
index 0000000..cb6cc37
--- /dev/null
+++ b/test/table/test_sd_table.py
@@ -0,0 +1,338 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from psegs.datum.camera_image import CameraImage
+from psegs.datum.cuboid import Cuboid
+from psegs.datum.point_cloud import PointCloud
+from psegs.datum.stamped_datum import StampedDatum
+from psegs.datum.transform import Transform
+from psegs.datum.matched_pair import MatchedPair
+from psegs.datum.points2d import Points2D
+from psegs.datum.uri import URI
+from psegs.table.sd_table_factory import StampedDatumTableFactory
+
+import test.testutil as testutil
+
+
+class TestSDTFactoryBase(StampedDatumTableFactory):
+  """Create a clean temp directory for each test table"""
+
+  @classmethod
+  def table_root(cls):
+    if not hasattr(cls, '_table_tempdir'):
+      cls._table_tempdir = testutil.test_tempdir('sd_test_' + cls.__name__)
+    return cls._table_tempdir
+
+
+def test_sd_table_simple():
+  test_datums = [
+    StampedDatum(
+      uri=URI(
+        dataset='d',
+        split='s',
+        segment_id='segment'),
+      transform=Transform()),
+  ]
+
+  class Simple(TestSDTFactoryBase):
+    @classmethod
+    def _create_datum_rdds(
+          cls, spark, existing_uri_df=None, only_segments=None):
+      return [spark.sparkContext.parallelize(test_datums)]
+    
+  with testutil.LocalSpark.sess() as spark:
+    Simple.build(spark)
+    df = Simple.as_df(spark)
+    assert df.count() == 1
+    assert df.filter((df.uri.dataset=='d')).count() == 1
+
+    datum_rdd = Simple.as_datum_rdd(spark)
+    datums = datum_rdd.collect()
+    assert len(datums) == 1
+    assert datums[0] == test_datums[0]
+
+
+def test_sd_table_one_of_every():
+  BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+  test_datums = [
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='camera|front', timestamp=1),
+      camera_image=CameraImage()),
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='labels|cuboids', timestamp=1),
+      cuboids=[Cuboid()]),
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='lidar|front', timestamp=1),
+      point_cloud=PointCloud()),
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='ego_pose', timestamp=1),
+      transform=Transform()),
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='matches', timestamp=1),
+      matched_pair=MatchedPair()),
+    StampedDatum(
+      uri=BASE_URI.replaced(topic='points', timestamp=1),
+      points_2d=Points2D()),
+  ]
+
+  class OneOfEvery(TestSDTFactoryBase):
+    @classmethod
+    def _create_datum_rdds(
+          cls, spark, existing_uri_df=None, only_segments=None):
+      return [spark.sparkContext.parallelize(test_datums)]
+    
+  with testutil.LocalSpark.sess() as spark:
+    sdt = OneOfEvery.get_segment_sd_table(spark=spark)
+
+    df = sdt.to_spark_df(spark)
+    assert df.count() == len(test_datums)
+    
+    # Let's do a basic query
+    TOPICS = [datum.uri.topic for datum in test_datums]
+    assert (
+      sorted(TOPICS) ==
+      sorted(r.topic for r in df.select('uri.topic').collect()))
+
+    datum_rdd = sdt.to_datum_rdd(spark)
+    datums = datum_rdd.collect()
+    assert sorted(datums) == sorted(test_datums)
+
+
+###############################################################################
+## Diff Tests
+
+def create_sd_table_and_df(spark, datums):
+
+  class DiffTable(TestSDTFactoryBase):
+    @classmethod
+    def _create_datum_rdds(
+          cls, spark, existing_uri_df=None, only_segments=None):
+      return [spark.sparkContext.parallelize(datums)]
+  
+  df = DiffTable.as_df(spark, force_compute=True)
+
+  # Make tests faster; default number of partitions is 
+  # usually larger than number of rows
+  df = df.repartition(10).cache()
+
+  return DiffTable, df
+
+
+def test_sd_table_diff_empty():
+  with testutil.LocalSpark.sess() as spark:
+    _, df1 = create_sd_table_and_df(spark, [])
+    _, df2 = create_sd_table_and_df(spark, [])
+    
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert difftxt == ''
+
+    
+def test_sd_table_diff_identical():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    one_of_every_datum = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=1),
+        camera_image=CameraImage()),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='labels|cuboids', timestamp=1),
+        cuboids=[Cuboid()]),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='lidar|front', timestamp=1),
+        point_cloud=PointCloud()),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='ego_pose', timestamp=1),
+        transform=Transform()),
+    ]
+    _, df1 = create_sd_table_and_df(spark, one_of_every_datum)
+    _, df2 = create_sd_table_and_df(spark, one_of_every_datum)
+    
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert difftxt == ''
+
+
+def test_sd_table_diff_mismatch_dataset():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    t1 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(dataset='d1', topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    t2 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(dataset='d2', topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    _, df1 = create_sd_table_and_df(spark, t1)
+    _, df2 = create_sd_table_and_df(spark, t2)
+
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert "Dataset/Split Mismatch" in difftxt
+    assert "- [('d1', 's')]" in difftxt
+    assert "+ [('d2', 's')]" in difftxt
+
+
+def test_sd_table_diff_mismatch_segments():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    t1 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(segment_id='seg1', topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    t2 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(segment_id='seg2', topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    _, df1 = create_sd_table_and_df(spark, t1)
+    _, df2 = create_sd_table_and_df(spark, t2)
+
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert "Segment Mismatch" in difftxt
+    assert "- [('d', 's', 'seg1')]" in difftxt
+    assert "+ [('d', 's', 'seg2')]" in difftxt
+
+
+def test_sd_table_diff_mismatch_uri_count_many():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    t1 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(1000)
+    ]
+    t2 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(2000)
+    ]
+    _, df1 = create_sd_table_and_df(spark, t1)
+    _, df2 = create_sd_table_and_df(spark, t2)
+
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert "URI Count Mismatch" in difftxt
+    assert "left count: 1000" in difftxt
+    assert "right count: 2000" in difftxt
+
+
+def test_sd_table_diff_mismatch_uri_content():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    t1 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    t2 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|rear', timestamp=t),
+        camera_image=CameraImage())
+      for t in range(10)
+    ]
+    _, df1 = create_sd_table_and_df(spark, t1)
+    _, df2 = create_sd_table_and_df(spark, t2)
+
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert "Missing URIs" in difftxt
+    assert "Missing left (10): ['psegs://dataset=d&split=s&segment_id=seg&timestamp=1&topic=camera|rear'" in difftxt
+    assert "Missing right (10): ['psegs://dataset=d&split=s&segment_id=seg&timestamp=1&topic=camera|front'" in difftxt
+
+
+def test_sd_table_diff_mismatch_uri_dupes():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    one_of_every_datum = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=1),
+        camera_image=CameraImage()),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='labels|cuboids', timestamp=1),
+        cuboids=[Cuboid()]),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='lidar|front', timestamp=1),
+        point_cloud=PointCloud()),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='ego_pose', timestamp=1),
+        transform=Transform()),
+    ]
+    _, df1 = create_sd_table_and_df(spark, one_of_every_datum)
+    _, df2 = create_sd_table_and_df(
+                spark, one_of_every_datum + one_of_every_datum)
+    
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+    assert "Dupe URIs" in difftxt
+    assert "Dupes left (0): []" in difftxt
+    assert "Dupes right (4): " in difftxt
+    assert "psegs://dataset=d&split=s&segment_id=seg&timestamp=1&topic=ego_pose', 2" in difftxt
+
+
+def test_sd_table_diff_mismatch_sd_content():
+  with testutil.LocalSpark.sess() as spark:
+    BASE_URI = URI(dataset='d', split='s', segment_id='seg')
+    t1 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=1),
+        camera_image=CameraImage(sensor_name='c1')),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='labels|cuboids', timestamp=1),
+        cuboids=[Cuboid(track_id='track_id1')]),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='lidar|front', timestamp=1),
+        point_cloud=PointCloud(sensor_name='p1')),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='ego_pose', timestamp=1),
+        transform=Transform(src_frame='src_frame1')),
+    ]
+    t2 = [
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='camera|front', timestamp=1),
+        camera_image=CameraImage(sensor_name='c2')),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='labels|cuboids', timestamp=1),
+        cuboids=[Cuboid(track_id='track_id2')]),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='lidar|front', timestamp=1),
+        point_cloud=PointCloud(sensor_name='p2')),
+      StampedDatum(
+        uri=BASE_URI.replaced(topic='ego_pose', timestamp=1),
+        transform=Transform(src_frame='src_frame2')),
+    ]
+    _, df1 = create_sd_table_and_df(spark, t1)
+    _, df2 = create_sd_table_and_df(spark, t2)
+    
+    difftxt = StampedDatumTableFactory.find_diff(df1, df2)
+
+    assert "Datum mismatch" in difftxt
+    
+    assert "-                   'sensor_name': 'c1'" in difftxt
+    assert "+                   'sensor_name': 'c2'" in difftxt
+
+    assert "-               'track_id': 'track_id1'" in difftxt
+    assert "+               'track_id': 'track_id2'" in difftxt
+    
+    assert "-                  'sensor_name': 'p1'" in difftxt
+    assert "+                  'sensor_name': 'p2'" in difftxt
+
+    assert "-                'src_frame': 'src_frame1'" in difftxt
+    assert "+                'src_frame': 'src_frame2'" in difftxt
+
diff --git a/test/table/test_sd_table_factory.py b/test/table/test_sd_table_factory.py
new file mode 100644
index 0000000..e18a275
--- /dev/null
+++ b/test/table/test_sd_table_factory.py
@@ -0,0 +1,14 @@
+# Copyright 2022 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/test/testutil.py b/test/testutil.py
new file mode 100644
index 0000000..5566bd5
--- /dev/null
+++ b/test/testutil.py
@@ -0,0 +1,175 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+from pathlib import Path
+
+from oarphpy import util as oputil
+from oarphpy.spark import SessionFactory
+
+import psegs
+from psegs import util
+from psegs.spark import Spark
+from psegs.cache import LocalDiskCache
+
+# FIXME put in mount!
+# PS_TEST_TEMPDIR_ROOT = os.path.join(tempfile.gettempdir(), 'psegs_test')
+PS_TEST_TEMPDIR_ROOT = '/opt/psegs/psegs_test'
+
+
+class LocalSpark(Spark):
+  """A local Spark session; should result in only one session being created
+  per testing run"""
+
+  SRC_ROOT_MODULES = ['psegs', 'test']
+
+def test_tempdir(name, clean=True):
+  path = os.path.join(PS_TEST_TEMPDIR_ROOT, name)
+  if clean:
+    from oarphpy.util import cleandir
+    cleandir(path)
+  return Path(path)
+
+def skip_if_fixture_absent(path):
+  if not os.path.exists(path):
+    import pytest
+    pytest.skip("This test requires %s" % path)
+
+
+def assert_img_directories_equal(actual_dir, expected_dir):
+  util.log.info("Inspecting artifacts in %s ..." % expected_dir)
+  for actual in oputil.all_files_recursive(actual_dir):
+    actual = Path(actual)
+    expected_dir = Path(expected_dir)
+    expected = expected_dir / actual.name
+
+    match = (open(actual, 'rb').read() == open(expected, 'rb').read())
+    if not match:
+      import imageio
+      actual_img = imageio.imread(actual)
+      expected_img = imageio.imread(expected)
+      
+      diff = expected_img - actual_img
+      n_pixels = (diff != 0).sum() / 3
+      diff_path = str(actual) + '.diff.png'
+      imageio.imwrite(diff_path, diff)
+      assert False, \
+        "File mismatch \n%s != %s ,\n %s pixels different, diff: %s" % (
+          actual, expected, n_pixels, diff_path)
+  
+  util.log.info("Good! %s == %s" % (actual_dir, expected_dir))
+
+
+def check_sample_debug_images(sample, expected_dir, testname=''):
+  outdir = test_tempdir(testname or sample.uri.segment_id)
+
+  def save(path, img):
+    import imageio
+    imageio.imwrite(path, img)
+    util.log.info("Saved %s" % path)
+
+  cuboids = sample.cuboid_labels
+  for pc in sample.lidar_clouds:
+    path = outdir / ('%s_bev.png' % pc.sensor_name)
+    save(path, pc.get_bev_debug_image(cuboids=cuboids))
+    
+    path = outdir / ('%s_rv.png' % pc.sensor_name)
+    save(path, pc.get_front_rv_debug_image(cuboids=cuboids))
+
+    path = outdir / ('%s_bev_painted.png' % pc.sensor_name)
+    save(path, pc.get_bev_debug_image(
+                    camera_images=sample.camera_images))
+    
+    path = outdir / ('%s_rv_painted.png' % pc.sensor_name)
+    save(path, pc.get_front_rv_debug_image(
+                    camera_images=sample.camera_images))
+
+  for ci in sample.camera_images:
+    path = outdir / ('%s_debug.png' % ci.sensor_name)
+    save(
+      path,
+      ci.get_debug_image(
+        clouds=sample.lidar_clouds,
+        cuboids=cuboids))
+
+  assert_img_directories_equal(outdir, expected_dir)
+
+
+def check_stamped_datum_dfs_equal(
+        spark,
+        sd_df_actual,
+        sd_df_expected_path='',
+        testname='',
+        sd_df_expected=None):
+  
+  from psegs.table.sd_table import StampedDatumTable
+  from psegs.spark import save_df_thunks
+
+  if not testname:
+    seg_df = sd_df_actual.select('uri.segment_id').orderBy('uri.segment_id')
+    seg_name = seg_df.first().segment_id
+    testname = seg_name
+  
+  # Make tests faster and artifacts more compact
+  sd_df_actual = sd_df_actual.repartition(5).persist()
+
+  actual_path = test_tempdir(testname)
+  util.log.info("Testing serialization of actual to %s ..." % actual_path)
+  save_df_thunks(
+        [lambda: sd_df_actual],
+        spark_save_opts=dict(
+          path=str(actual_path),
+          format='parquet',
+          partitionBy=StampedDatumTable.PARTITION_KEYS,
+          compression='lz4',
+        ))
+  
+  if sd_df_expected is None:
+    util.log.info("Fetching expected from %s ..." % sd_df_expected_path)
+    assert (
+      sd_df_expected_path and 
+        (not oputil.missing_or_empty(str(sd_df_expected_path))))
+    sd_df_expected = spark.read.parquet(str(sd_df_expected_path))
+
+  difftxt = StampedDatumTable.find_diff(sd_df_actual, sd_df_expected)
+  assert difftxt == '', \
+        "Non-zero diff!\nActual path %s\nExpected path %s\nDiff:\n%s" % (
+          actual_path, sd_df_expected_path, difftxt)
+
+def test_fixtures_dir():
+  # Path to fixtures *included* with PSegs
+  return Path(__file__).parent / 'fixtures'
+
+
+class PSegsTestLocalDiskCache(LocalDiskCache):
+  """A `LocalDiskCache` that uses a test-defined root dir"""
+
+  TEST_ROOT = Path('/tmp')
+
+  @classmethod
+  def cache_cls_for_testroot(cls, testroot):
+    class MyTestLocalDiskCache(cls):
+      TEST_ROOT = testroot
+    return MyTestLocalDiskCache
+
+  def new_filepath(self, fname, t=None):
+    dest = self.TEST_ROOT / 'psegs_local_disk_cache' / 'adhoc_files' / fname
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    return dest
+
+  def new_dirpath(self, dirpath, t=None):
+    dest = self.TEST_ROOT / 'psegs_local_disk_cache' / 'adhoc_dirs' / dirpath
+    dest.mkdir(parents=True, exist_ok=True)
+    return dest
diff --git a/test/util/__init__.py b/test/util/__init__.py
new file mode 100644
index 0000000..c0ec9ac
--- /dev/null
+++ b/test/util/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/util/test_misc.py b/test/util/test_misc.py
new file mode 100644
index 0000000..2480a0f
--- /dev/null
+++ b/test/util/test_misc.py
@@ -0,0 +1,58 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import attr
+import numpy as np
+import pytest
+
+from psegs.util import misc
+
+def test_utils_attrs_eq_no_numpy():
+
+  @attr.s(eq=False)
+  class NoNumpy(object):
+    x = attr.ib(default=1)
+    def __eq__(self, other):
+      return misc.attrs_eq(self, other)
+
+  assert NoNumpy() == NoNumpy()
+  assert NoNumpy(x=2) != NoNumpy(x=3)
+  with pytest.raises(TypeError):
+    assert NoNumpy() != object()
+
+
+def test_utils_attrs_eq_one_numpy():
+
+  @attr.s
+  class OneNumpy(object):
+    x = attr.ib(default=1)
+    y = attr.ib(default=np.ones((1, 1)))
+    def __eq__(self, other):
+      return misc.attrs_eq(self, other)
+  
+  assert OneNumpy() == OneNumpy()
+  assert OneNumpy(x=2) != OneNumpy(x=3)
+  assert OneNumpy(y=np.zeros((1,))) != OneNumpy(y=np.ones((1,)))
+
+
+def test_utils_get_png_wh():
+  with pytest.raises(ValueError):
+    misc.get_png_wh(bytearray(b''))
+  
+  from oarphpy import util as oputil
+  img = np.zeros((20, 25, 3))
+  png_bytes = oputil.to_png_bytes(img)
+
+  w, h = misc.get_png_wh(bytearray(png_bytes))
+  assert (h, w) == (20, 25)
diff --git a/test/util/test_plotting.py b/test/util/test_plotting.py
new file mode 100644
index 0000000..89cadd5
--- /dev/null
+++ b/test/util/test_plotting.py
@@ -0,0 +1,168 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+from oarphpy import util as oputil
+
+from psegs.util import plotting as pspl
+
+from test import testutil
+
+
+def check_img(actual, fixture_name):
+  FIXTURES_DIR = testutil.test_fixtures_dir()
+  OUTPUT_DIR = testutil.test_tempdir('test_plotting')
+  oputil.mkdir(OUTPUT_DIR)
+  
+  # First dump actual, in case the fixture doesn't exist yet and we're
+  # writing a new test
+  actual_bytes = oputil.to_png_bytes(actual)
+  actual_path = OUTPUT_DIR / ('actual_' + fixture_name)
+  open(actual_path, 'wb').write(actual_bytes)
+
+  expected_bytes = open(FIXTURES_DIR / fixture_name, 'rb').read()
+  assert actual_bytes == expected_bytes, "Check %s" % actual_path
+
+
+def test_draw_xy_depth_in_image():
+  # Create points for a test image:
+  #  * One point every 10 pixels in x- and y- directions
+  #  * The depth value of the pixel is the scalar value of the y-coord
+  #      interpreted as meters
+  h, w = 1000, 100
+  pts = []
+  for y in range(int(h / 10)):
+    for x in range(int(w / 10)):
+      pts.append((x * 10, y * 10, y))
+  
+  apts = np.array(pts)
+  actual = np.zeros((h, w, 3))
+  pspl.draw_xy_depth_in_image(actual, apts, marker_radius=0)
+  check_img(actual, 'test_draw_xy_depth_in_image.png')
+
+  actual_2 = np.zeros((h, w, 3))
+  pspl.draw_xy_depth_in_image(actual_2, apts, marker_radius=1)
+  check_img(actual_2, 'test_draw_xy_depth_in_image_radius_2.png')
+
+  # Test user colors
+  colors = 255 * np.cos(apts / 10)
+  actual_3 = np.zeros((h, w, 3))
+  pspl.draw_xy_depth_in_image(actual_3, apts, marker_radius=1, user_colors=colors)
+  check_img(actual_3, 'test_draw_xy_depth_in_image_user_colors.png')
+
+
+def test_draw_depth_in_image():
+  # Create a depth channel for a test image:
+  #  * Top half depth is just a linear function of xy coord
+  #  * Bottom half is all invalid depth
+  h, w = 1000, 100
+  depth = np.zeros((h, w))
+  for y in range(h):
+    for x in range(w):
+      if y < .5 * h:
+        depth[y, x] = x + y
+      else:
+        depth[y, x] = -1 if (x < .5 * w) else float('nan')
+
+  
+  actual = np.zeros((h, w, 3))
+  pspl.draw_depth_in_image(actual, depth)
+  import imageio
+  imageio.imwrite('/opt/psegs/yay.png', actual)
+  check_img(actual, 'test_draw_depth_in_image.png')
+
+
+def test_draw_cuboid_xy_in_image():
+  cube = np.array([
+    # Front
+    [50, 50],
+    [50, 75],
+    [75, 75],
+    [75, 50],
+    
+    # Back
+    [15, 15],
+    [15, 40],
+    [40, 40],
+    [40, 15],
+  ])
+
+  h, w = 100, 100
+  actual = np.zeros((h, w, 3))
+  pspl.draw_cuboid_xy_in_image(actual, cube, (128, 0, 128))
+
+  check_img(actual, 'test_draw_cuboid_xy_in_image.png')
+
+
+def test_draw_bbox_in_image():
+  from psegs.datum.bbox2d import BBox2D
+
+  img = np.zeros((100, 200, 3))
+  
+  center = BBox2D(x=80, y=40, width=20, height=20, category_name='center')
+  pspl.draw_bbox_in_image(img, center)
+
+  up_left = BBox2D(x=5, y=5, width=40, height=20, category_name='up_left')
+  pspl.draw_bbox_in_image(img, up_left)
+  
+  low_right = BBox2D(x=150, y=75, width=40, height=20, category_name='low_right')
+  pspl.draw_bbox_in_image(img, low_right)
+
+  no_txt = BBox2D(x=5, y=75, width=10, height=10, category_name='')
+  pspl.draw_bbox_in_image(img, no_txt)
+
+  check_img(img, 'test_draw_bbox_in_image.png')
+
+
+def test_get_ortho_debug_image():
+  
+  # Create a circular spiral in 3-d
+  t = np.arange(0, 2 * np.pi, 2 * np.pi / 100)
+  r = (t / (2 * np.pi))
+  uvd = np.column_stack([r * np.cos(t), r * np.sin(t), t])
+  
+  def draw_window(uvd, bounds):
+    kwargs = dict(
+                pixels_per_meter=100,
+                marker_radius=2,
+                period_meters=2 * np.pi / 10,
+                min_u=bounds[0],
+                min_v=bounds[1],
+                max_u=bounds[2],
+                max_v=bounds[3])
+    return pspl.get_ortho_debug_image(uvd, **kwargs)
+  
+  check_img(
+    draw_window(uvd, [-1.25, -1.25, 1.25, 1.25]),
+    'test_get_ortho_debug_image_all_manual.png')
+  check_img(
+    draw_window(uvd, [None, None, None, None]),
+    'test_get_ortho_debug_image_autobound.png')
+  check_img(
+    draw_window(uvd, [0, 0, 1.25, 1.25]),
+    'test_get_ortho_debug_image_q1.png')
+  check_img(
+    draw_window(uvd, [-1.25, 0, 0, 1.25]),
+    'test_get_ortho_debug_image_q2.png')
+  check_img(
+    draw_window(uvd, [-1.25, -1.25, 0, 0]),
+    'test_get_ortho_debug_image_q3.png')
+  check_img(
+    draw_window(uvd, [0, -1.25, 1.25, 0]),
+    'test_get_ortho_debug_image_q4.png')
+  check_img(
+    draw_window(uvd, [1, 1, 2, 2]),
+    'test_get_ortho_debug_image_empty_space.png')
+  
\ No newline at end of file
diff --git a/test/xform/__init__.py b/test/xform/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/xform/test_charuco.py b/test/xform/test_charuco.py
new file mode 100644
index 0000000..f94c4c3
--- /dev/null
+++ b/test/xform/test_charuco.py
@@ -0,0 +1,153 @@
+# Copyright 2023 Maintainers of PSegs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from oarphpy import util as oputil
+from psegs import datum
+from psegs.xform import charuco as psc
+
+from test import testutil
+
+try:
+  psc.check_opencv_version_for_aruco()
+  HAVE_OBJDET_ARUCO = True
+except ImportError:
+  HAVE_OBJDET_ARUCO = False
+
+skip_if_no_objdet_aruco = pytest.mark.skipif(
+  not HAVE_OBJDET_ARUCO,
+  reason="Requires modern OpenCV Aruco, see `check_opencv_version_for_aruco()`")
+
+
+def check_img(actual, fixture_name, actual_output_dir):
+  FIXTURES_DIR = testutil.test_fixtures_dir() / 'test_charuco_output'
+  oputil.mkdir(actual_output_dir)
+  
+  # First dump actual, in case the fixture doesn't exist yet and we're
+  # writing a new test
+  actual_bytes = oputil.to_png_bytes(actual)
+  actual_path = actual_output_dir / ('actual_' + fixture_name)
+  with open(actual_path, 'wb') as f:
+    f.write(actual_bytes)
+  print(actual_path)
+
+  print('fixme TODO')
+  return
+  expected_bytes = open(FIXTURES_DIR / fixture_name, 'rb').read()
+  assert actual_bytes == expected_bytes, "Check %s" % actual_path
+
+
+@skip_if_no_objdet_aruco
+def test_charuco_detect_board():
+  import cv2
+  
+  ACTUAL_OUTPUT_DIR = testutil.test_tempdir('test_charuco_detect_board')
+
+  FIXTURE_INPUT_DIR = testutil.test_fixtures_dir() / 'test_charuco' 
+  
+  board = psc.CharucoBoardParams(
+            dict_key='DICT_6X6_1000',
+            cols=11,
+            rows=8,
+            square_length_meters=0.022,
+            marker_length_meters=0.017,
+            is_legacy_pattern=True)
+  
+  img_gray = cv2.imread(
+    str(FIXTURE_INPUT_DIR / 'frame_00000.jpg'), cv2.IMREAD_GRAYSCALE)
+
+  FRAMES_TO_CHECK = (
+    'frame_00000.jpg',
+    'frame_00021.jpg',
+    'frame_00057.jpg',
+  )
+
+  for frame_fname in FRAMES_TO_CHECK:
+
+    img_gray = cv2.imread(
+      str(FIXTURE_INPUT_DIR / frame_fname), cv2.IMREAD_GRAYSCALE)
+    dets = psc.charuco_detect_board(board, img_gray)
+    debug_images = psc.charuco_create_debug_images(img_gray, dets)
+
+    # todo moveme
+    psc.charuco_should_use_board_marker_corners(dets)
+    psc.charuco_detections_to_point2ds(dets)
+
+    # TODO moveme
+
+    p2ds = psc.charuco_detections_to_point2ds(dets)
+    p2d = p2ds[0]
+    import imageio
+    imageio.imwrite('/opt/psegs/p2d_test.jpg', p2d.get_debug_points_image())
+    return
+
+
+    # need cases where some detections are null
+    import copy
+    dets_markers_null = copy.deepcopy(dets)
+    dets_markers_null.aruco_marker_corners = None
+    dets_markers_null.aruco_marker_ids = None
+    psc.charuco_detections_to_point2ds(dets_markers_null)
+
+    dets_bmarkers_null = copy.deepcopy(dets)
+    dets_bmarkers_null.charuco_marker_corners = None
+    dets_bmarkers_null.charuco_marker_ids = None
+    psc.charuco_detections_to_point2ds(dets_bmarkers_null)
+
+    dets_board_null = copy.deepcopy(dets)
+    dets_board_null.charuco_corners = None
+    dets_board_null.charuco_ids = None
+    psc.charuco_detections_to_point2ds(dets_board_null)
+
+    dets_ch_markers_has_diff = copy.deepcopy(dets)
+    dets_ch_markers_has_diff.charuco_marker_ids = (
+      copy.deepcopy(dets_ch_markers_has_diff.aruco_marker_ids))
+    dets_ch_markers_has_diff.charuco_marker_corners = (
+      copy.deepcopy(dets_ch_markers_has_diff.aruco_marker_corners))
+    for c in dets_ch_markers_has_diff.charuco_marker_corners:
+      c += 0.1
+    psc.charuco_detections_to_point2ds(dets_board_null)
+
+
+
+    # TODO moveme
+    p2ds = psc.charuco_detections_to_point2ds(dets)
+    datums = [
+      datum.StampedDatum(uri=datum.URI(), points_2d=p2d) for p2d in p2ds
+    ]
+    from psegs.datum.stamped_datum import Sample
+    from psegs.table.sd_table import StampedDatumTable
+    sdt = StampedDatumTable.from_sample(Sample(datums=datums))
+    sdf = sdt.to_spark_df()
+    sdf.show()
+
+
+
+    DEBUGS_TO_CHECK = (
+      'debug_marker_detections',
+      'debug_marker_rejections',
+      'debug_board_image',
+      'debug_board_detections')
+    for debug_to_check in DEBUGS_TO_CHECK:
+      actual = getattr(debug_images, debug_to_check)
+      fixture_name = f'{frame_fname}.{debug_to_check}.png'
+      check_img(actual, fixture_name, ACTUAL_OUTPUT_DIR)
+
+
+
+  
+
+  breakpoint()
+  print()
\ No newline at end of file