video-db · lalit-videodb · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 18, 2026
diff --git a/setup.py b/setup.py
@@ -39,7 +39,7 @@
         "websockets>=11.0.3",
     ],
     extras_require={
-        "capture": ["videodb-capture-bin>=0.2.8"],
+        "capture": ["videodb-capture-bin>=0.2.10"],
     },
     classifiers=[
         "Intended Audience :: Developers",

diff --git a/videodb/__about__.py b/videodb/__about__.py
@@ -2,7 +2,7 @@
 
 
 
-__version__ = "0.4.2"
+__version__ = "0.4.3"
 __title__ = "videodb"
 __author__ = "videodb"
 __email__ = "contact@videodb.io"

diff --git a/videodb/asset.py b/videodb/asset.py
@@ -37,8 +37,8 @@ def __init__(
         end: Optional[float] = None,
     ) -> None:
         super().__init__(asset_id)
-        self.start: int = start
-        self.end: Union[int, None] = end
+        self.start: Optional[float] = start
+        self.end: Optional[float] = end
 
     def to_json(self) -> dict:
         return copy.deepcopy(self.__dict__)
@@ -63,8 +63,8 @@ def __init__(
         fade_out_duration: Optional[Union[int, float]] = 0,
     ):
         super().__init__(asset_id)
-        self.start: int = start
-        self.end: Union[int, None] = end
+        self.start: Optional[float] = start
+        self.end: Optional[float] = end
         self.disable_other_tracks: bool = disable_other_tracks
         self.fade_in_duration: Union[int, float] = validate_max_supported(
             fade_in_duration, MaxSupported.fade_duration, "fade_in_duration"

diff --git a/videodb/capture.py b/videodb/capture.py
@@ -91,7 +91,6 @@ def to_dict(self) -> Dict[str, Any]:
             "channel_id": self.id,
             "type": self.type,
             "name": self.name,
-            "record": True,
             "store": self.store,
             "is_primary": self.is_primary,
         }
@@ -134,21 +133,24 @@ def __init__(
         mics: List[AudioChannel] = None,
         displays: List[VideoChannel] = None,
         system_audio: List[AudioChannel] = None,
+        cameras: List[VideoChannel] = None,
     ):
         self.mics: ChannelList = ChannelList(mics or [])
         self.displays: ChannelList = ChannelList(displays or [])
         self.system_audio: ChannelList = ChannelList(system_audio or [])
+        self.cameras: ChannelList = ChannelList(cameras or [])
 
     def __repr__(self):
         return (
             f"Channels("
             f"mics={len(self.mics)}, "
             f"displays={len(self.displays)}, "
-            f"system_audio={len(self.system_audio)})"
+            f"system_audio={len(self.system_audio)}, "
+            f"cameras={len(self.cameras)})"
         )
 
     def all(self) -> List[Channel]:
-        """Return a flat list of all channels."""
+        """Return a flat list of all capturable channels (excludes cameras)."""
         return list(self.mics) + list(self.displays) + list(self.system_audio)
 
 
@@ -334,30 +336,34 @@ async def list_channels(self) -> Channels:
         mics = []
         displays = []
         system_audio = []
-
+        cameras = []
+
         for ch in raw_channels:
             c_type = ch.get("type")
             c_id = ch.get("channel_id") or ch.get("id")
             c_name = ch.get("name", "")
-            
+
             if not c_id:
                 logger.warning(f"Skipping channel with missing ID: {ch}")
                 continue
 
-            # Categorize based on type and name patterns
-            if c_type == "video":
+            # Categorize based on channel ID prefix
+            if c_id.startswith("mic:"):
+                mics.append(AudioChannel(id=c_id, name=c_name, client=self))
+            elif c_id.startswith("display:") or c_id.startswith("screen:"):
                 displays.append(VideoChannel(id=c_id, name=c_name, client=self))
+            elif c_id.startswith("system_audio:"):
+                system_audio.append(AudioChannel(id=c_id, name=c_name, client=self))
+            elif c_id.startswith("camera:"):
+                cameras.append(VideoChannel(id=c_id, name=c_name, client=self))
             elif c_type == "audio":
-                # Distinguish between mic and system audio based on common patterns
-                name_lower = c_name.lower()
-                if "system" in name_lower or "output" in name_lower or "speaker" in name_lower:
-                    system_audio.append(AudioChannel(id=c_id, name=c_name, client=self))
-                else:
-                    mics.append(AudioChannel(id=c_id, name=c_name, client=self))
+                mics.append(AudioChannel(id=c_id, name=c_name, client=self))
+            elif c_type == "video":
+                displays.append(VideoChannel(id=c_id, name=c_name, client=self))
             else:
                 logger.debug(f"Unknown channel type '{c_type}' for channel '{c_name}'")
-                
-        return Channels(mics=mics, displays=displays, system_audio=system_audio)
+
+        return Channels(mics=mics, displays=displays, system_audio=system_audio, cameras=cameras)
 
     async def start_session(
         self,

diff --git a/videodb/editor.py b/videodb/editor.py
@@ -1,5 +1,7 @@
 import json
+import logging
 import requests
+import warnings
 
 from typing import List, Optional, Union
 from enum import Enum
@@ -8,6 +10,8 @@
 from videodb.exceptions import InvalidRequestError
 
 
+logger = logging.getLogger(__name__)
+
 MAX_PAYLOAD_SIZE = 100 * 1024
 
 
@@ -840,6 +844,11 @@ def __init__(
     ):
         """Initialize a CaptionAsset instance.
 
+        .. note::
+            When using ``src="auto"``, the video must be indexed first
+            (e.g. via ``video.index_spoken_words()``) so that a transcript
+            is available for caption generation.
+
         :param str src: Caption source ("auto" for auto-generated or base64 encoded ass string)
         :param FontStyling font: (optional) Font styling properties
         :param str primary_color: Primary text color in ASS format (default: "&H00FFFFFF")
@@ -849,6 +858,12 @@ def __init__(
         :param Positioning position: (optional) Caption positioning properties
         :param CaptionAnimation animation: (optional) Caption animation effect
         """
+        if src == "auto":
+            warnings.warn(
+                "CaptionAsset(src='auto'): the video must be indexed "
+                "(e.g. video.index_spoken_words()) for captions to be generated.",
+                stacklevel=2,
+            )
         self.src = src
         self.font = font if font is not None else FontStyling()
         self.primary_color = primary_color

diff --git a/videodb/search.py b/videodb/search.py
@@ -48,6 +48,8 @@ def _format_results(self):
                         scene_index_id=doc.get("scene_index_id"),
                         scene_index_name=doc.get("scene_index_name"),
                         metadata=doc.get("metadata"),
+                        stream_url=doc.get("stream_link"),
+                        player_url=doc.get("player_url"),
                     )
                 )
 

diff --git a/videodb/shot.py b/videodb/shot.py
@@ -35,6 +35,8 @@ def __init__(
         scene_index_id: Optional[str] = None,
         scene_index_name: Optional[str] = None,
         metadata: Optional[dict] = None,
+        stream_url: Optional[str] = None,
+        player_url: Optional[str] = None,
     ) -> None:
         self._connection = _connection
         self.video_id = video_id
@@ -47,8 +49,8 @@ def __init__(
         self.scene_index_id = scene_index_id
         self.scene_index_name = scene_index_name
         self.metadata = metadata
-        self.stream_url = None
-        self.player_url = None
+        self.stream_url = stream_url
+        self.player_url = player_url
 
     def __repr__(self) -> str:
         repr_str = (

diff --git a/videodb/video.py b/videodb/video.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, List, Dict, Tuple, Any
+from typing import Literal, Optional, Union, List, Dict, Tuple, Any
 from videodb._utils._video import play_stream
 from videodb._constants import (
     ApiPath,
@@ -249,17 +249,20 @@ def get_transcript_text(
     def generate_transcript(
         self,
         force: bool = None,
+        language_code: Optional[str] = None,
     ) -> str:
         """Generate transcript for the video.
 
         :param bool force: Force generate new transcript
+        :param str language_code: (optional) Language code of the video
         :return: Full transcript text as string
         :rtype: str
         """
         transcript_data = self._connection.post(
             path=f"{ApiPath.video}/{self.id}/{ApiPath.transcription}",
             data={
                 "force": True if force else False,
+                "language_code": language_code,
             },
         )
         transcript = transcript_data.get("word_timestamps", [])
@@ -702,9 +705,9 @@ def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str:
     def clip(
             self,
             prompt: str,
-            content_type: str,
-            model_name: str,
-        ) -> str:
+            content_type: Literal["spoken", "visual", "multimodal"],
+            model_name: Literal["basic", "pro", "ultra"],
+        ) -> SearchResult:
             """Generate a clip from the video using a prompt.
             :param str prompt: Prompt to generate the clip
             :param str content_type: Content type for the clip. Valid options: "spoken", "visual", "multimodal"