diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..30b98bc --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,57 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + workflow_dispatch: + +permissions: + contents: read + +jobs: + frontend: + runs-on: ubuntu-latest + defaults: + run: + working-directory: webapp + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 24 + cache: npm + cache-dependency-path: webapp/package-lock.json + + - name: Install frontend dependencies + run: npm ci + + - name: Frontend lint + run: npm run lint + + - name: Frontend build + run: npm run build + + backend: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Setup uv + uses: astral-sh/setup-uv@v5 + + - name: Backend lint + run: uv run --extra dev python -m ruff check backend scripts + + - name: Backend syntax check + run: python3 -m py_compile $(find backend scripts -name '*.py' -type f) diff --git a/README.md b/README.md index da5d056..3b5e058 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,9 @@ VideoDepthViewer3D is a high-performance streaming MP4 depth viewer. It decodes - **WebSocket stream:** Depth maps are quantized (uint16), optionally downsampled, and sent with a binary header. - **Frontend (Vite + Three.js/WebXR)** - **DepthBuffer:** Manages sync, handles jitter, and re-requests missing frames. - - **Mesh generation:** Grid mesh displaced in the vertex shader by the depth map. - - **WebXR:** RawXR path renders the same mesh to both eyes; Three.js XR is disabled (experimental VR). + - **Mesh generation:** Grid mesh can be reconstructed either as a classic relief mesh or with pinhole-style reprojection. The default is now **pinhole**. + - **Shared viewer controls:** The same projection/depth controls drive both the normal Three.js path and the RawXR mesh path. + - **WebXR:** RawXR path renders the same reconstructed mesh to both eyes; Three.js XR is disabled (experimental VR). ### Setup & Run 1. **Install dependencies (uv):** @@ -62,9 +63,15 @@ VideoDepthViewer3D is a high-performance streaming MP4 depth viewer. It decodes ### Display Modes - **Normal (PC 2D/3D):** Three.js draws video + depth mesh on the page. -- **SBS Stereo (0DOF):** Side-by-side split for passive stereo (no head tracking). +- **SBS Stereo (0DOF):** Side-by-side split for passive stereo (no head tracking). You can optionally swap left/right eyes with the `Swap L/R` checkbox. - **VR (RawXR):** Starts WebXR and renders the depth mesh to both eyes via the RawXR pipeline (experimental). +### Viewer Controls +- **Projection:** `pinhole` is the default. `relief` remains available for comparison. +- **View FOV Y / Source FOV Y:** Separate the display camera FOV from the source reprojection FOV. +- **Camera Dist / Model Z:** Experimental 2D/SBS-only placement controls for moving the display camera or the reconstructed mesh without changing backend throughput. +- **Z Scale / Z Bias / Z Gamma / Z Max Clip / Plane Scale / Y Offset:** Depth shaping and placement controls shared by Three.js and RawXR. + ### Configuration You can tune performance via backend environment variables and frontend URL parameters. @@ -117,8 +124,9 @@ VideoDepthViewer3D は、MP4 動画をリアルタイムに深度推定して 3D - **WebSocket ストリーム:** 深度マップを uint16 量子化し、必要に応じてダウンサンプルしてバイナリヘッダー付きで送信。 - **フロントエンド (Vite + Three.js/WebXR)** - **DepthBuffer:** 同期管理と欠落フレーム再要求で滑らかな再生を維持。 - - **メッシュ生成:** グリッドメッシュを深度マップで頂点変位。 - - **WebXR:** RawXR (WebGL2) で両目に同じメッシュを描画。同期ズレや黒画面問題を解消。 + - **メッシュ生成:** グリッドメッシュを、従来の relief 方式または pinhole 方式で再構成できます。現在のデフォルトは **pinhole** です。 + - **共通ビューア設定:** 通常の Three.js 描画と RawXR のメッシュ描画は同じ投影/深度コントロールを共有します。 + - **WebXR:** RawXR (WebGL2) で両目に同じ再構成メッシュを描画。同期ズレや黒画面問題を解消。 ### セットアップと実行 1. **依存関係のインストール (uv):** @@ -155,9 +163,15 @@ VideoDepthViewer3D は、MP4 動画をリアルタイムに深度推定して 3D ### 表示モード - **通常 (PC 2D/3D):** Three.js が動画と深度メッシュを描画。 -- **SBS Stereo (0DOF):** サイドバイサイドのパッシブ立体視(頭トラッキングなし)。 +- **SBS Stereo (0DOF):** サイドバイサイドのパッシブ立体視(頭トラッキングなし)。`Swap L/R` チェックで左右の目を入れ替えられます。 - **VR (RawXR):** WebXR を開始し、最適化されたパイプラインで描画。コントローラーで視点操作(オービット、パン、ズーム)が可能。(実験的段階) +### ビューア設定 +- **Projection:** デフォルトは `pinhole` です。比較用に `relief` も残しています。 +- **View FOV Y / Source FOV Y:** 表示カメラの FOV と、再投影に使う source 側の FOV を分離しています。 +- **Camera Dist / Model Z:** 2D/SBS 専用の実験的な配置コントロールです。バックエンドの処理負荷を変えずに、表示カメラや再構成メッシュの前後位置を調整できます。 +- **Z Scale / Z Bias / Z Gamma / Z Max Clip / Plane Scale / Y Offset:** Three.js と RawXR で共通の深度整形・配置パラメータです。 + ### 設定一覧 環境変数(バックエンド)と URL パラメータ(フロントエンド)でパフォーマンスを調整できます。 diff --git a/backend/main.py b/backend/main.py index ec878dd..a28f8c2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,16 +3,18 @@ from __future__ import annotations import os -os.environ["DA3_LOG_LEVEL"] = "WARN" from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from backend.config import get_settings from backend.routers import session as session_router from backend.routers import stream as stream_router -from backend.config import get_settings from backend.video.session import get_session_manager +os.environ["DA3_LOG_LEVEL"] = "WARN" + def create_app() -> FastAPI: app = FastAPI(title="VideoDepthViewer3D") @@ -38,9 +40,6 @@ async def clear_cached_sessions() -> None: return app - -from pydantic import BaseModel - class LogMessage(BaseModel): message: str diff --git a/backend/models/depth_model.py b/backend/models/depth_model.py index ece57e4..41974c7 100644 --- a/backend/models/depth_model.py +++ b/backend/models/depth_model.py @@ -4,7 +4,6 @@ import asyncio from dataclasses import dataclass -from pathlib import Path from typing import Optional import numpy as np diff --git a/backend/routers/stream.py b/backend/routers/stream.py index 77d9510..dbc57d5 100644 --- a/backend/routers/stream.py +++ b/backend/routers/stream.py @@ -12,7 +12,7 @@ from backend.models.depth_model import get_depth_model from backend.utils.packets import pack_depth_payload -from backend.utils.depth_ops import downsample_depth +from backend.video.io import EndOfStreamError from backend.video.session import DepthFrame, get_session_manager, SessionManager from backend.config import get_settings from backend.utils.queues import DroppingQueue @@ -50,6 +50,14 @@ async def depth_stream( stats = StatisticsCollector() + async def cancel_pending(*tasks: asyncio.Task) -> None: + pending = [task for task in tasks if task is not None and not task.done()] + for task in pending: + task.cancel() + if pending: + with suppress(Exception): + await asyncio.gather(*pending, return_exceptions=True) + async def receiver() -> None: try: while True: @@ -122,7 +130,7 @@ async def process_request_pipeline(request: dict, session) -> tuple[bytes | None timings["decode_s"] = decode_s # print(f"[Backend] Processing: {time_ms}ms. QueueWait: {timings['queue_wait_s']:.3f}s. Decode: {timings['decode_s']:.3f}s") - except StopIteration: + except EndOfStreamError: # EOF return None, timings except Exception as e: @@ -227,9 +235,9 @@ async def processor() -> None: get_task = asyncio.create_task(request_queue.get()) stop_task = asyncio.create_task(stop.wait()) done, _ = await asyncio.wait([get_task, stop_task], return_when=asyncio.FIRST_COMPLETED) + await cancel_pending(get_task, stop_task) if stop_task in done: - get_task.cancel() break request = get_task.result() @@ -275,9 +283,9 @@ async def sender() -> None: get_task = asyncio.create_task(send_queue.get()) stop_task = asyncio.create_task(stop.wait()) done, _ = await asyncio.wait([get_task, stop_task], return_when=asyncio.FIRST_COMPLETED) + await cancel_pending(get_task, stop_task) if stop_task in done: - get_task.cancel() break task = get_task.result() @@ -311,6 +319,7 @@ async def sender() -> None: int(timings.get("inflight_used", 0)), ) except Exception: + stop.set() break # Socket closed except asyncio.CancelledError: break diff --git a/backend/utils/queues.py b/backend/utils/queues.py index 6d07028..f75de4b 100644 --- a/backend/utils/queues.py +++ b/backend/utils/queues.py @@ -1,6 +1,6 @@ import asyncio from collections import deque -from typing import Generic, TypeVar, Optional +from typing import Generic, TypeVar T = TypeVar("T") diff --git a/backend/video/io.py b/backend/video/io.py index c54530a..28df234 100644 --- a/backend/video/io.py +++ b/backend/video/io.py @@ -14,6 +14,23 @@ from backend.utils.frame_info import FrameInfo, frame_info_from_av +class EndOfStreamError(Exception): + """Raised when the decoder reaches EOF for a requested timestamp.""" + + +def _is_av_eof(exc: Exception) -> bool: + av_error_mod = getattr(av, "error", None) + eof_types = tuple( + cls + for cls in ( + getattr(av, "EOFError", None), + getattr(av_error_mod, "EOFError", None), + ) + if isinstance(cls, type) + ) + return isinstance(exc, eof_types) if eof_types else False + + @dataclass(frozen=True) class VideoMetadata: width: int @@ -115,7 +132,12 @@ def _advance_to(self, time_ms: float) -> tuple[np.ndarray, FrameInfo]: frame = next(self._frame_iter) except StopIteration as exc: # pragma: no cover - EOF self._frame_iter = None - raise StopIteration from exc + raise EndOfStreamError from exc + except Exception as exc: # pragma: no cover - decoder EOF varies by PyAV build + if _is_av_eof(exc): + self._frame_iter = None + raise EndOfStreamError from exc + raise info = frame_info_from_av(frame) frames_examined += 1 actual_time = info.time_ms if info.time_ms >= 0 else time_ms diff --git a/backend/video/session.py b/backend/video/session.py index 48c5570..ef6ed91 100644 --- a/backend/video/session.py +++ b/backend/video/session.py @@ -14,7 +14,7 @@ from fastapi import UploadFile from backend.config import get_settings -from backend.video.io import DecoderPool, FrameDecoder, VideoMetadata +from backend.video.io import DecoderPool, VideoMetadata @dataclass diff --git a/pyproject.toml b/pyproject.toml index e6dfeb9..b0f8742 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "videodepthviewer3d" -version = "1.0.6" +version = "1.1.0" description = "Streaming Video Depth viewer backend" authors = [{ name = "Codex" }] readme = "README.md" diff --git a/webapp/index.html b/webapp/index.html index ad92370..c1105f4 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -34,6 +34,12 @@