Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"websockets>=11.0.3",
],
extras_require={
"capture": ["videodb-capture-bin>=0.2.8"],
"capture": ["videodb-capture-bin>=0.2.10"],
},
classifiers=[
"Intended Audience :: Developers",
Expand Down
2 changes: 1 addition & 1 deletion videodb/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@



__version__ = "0.4.2"
__version__ = "0.4.3"
__title__ = "videodb"
__author__ = "videodb"
__email__ = "contact@videodb.io"
Expand Down
8 changes: 4 additions & 4 deletions videodb/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def __init__(
end: Optional[float] = None,
) -> None:
super().__init__(asset_id)
self.start: int = start
self.end: Union[int, None] = end
self.start: Optional[float] = start
self.end: Optional[float] = end

def to_json(self) -> dict:
return copy.deepcopy(self.__dict__)
Expand All @@ -63,8 +63,8 @@ def __init__(
fade_out_duration: Optional[Union[int, float]] = 0,
):
super().__init__(asset_id)
self.start: int = start
self.end: Union[int, None] = end
self.start: Optional[float] = start
self.end: Optional[float] = end
self.disable_other_tracks: bool = disable_other_tracks
self.fade_in_duration: Union[int, float] = validate_max_supported(
fade_in_duration, MaxSupported.fade_duration, "fade_in_duration"
Expand Down
36 changes: 21 additions & 15 deletions videodb/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def to_dict(self) -> Dict[str, Any]:
"channel_id": self.id,
"type": self.type,
"name": self.name,
"record": True,
"store": self.store,
"is_primary": self.is_primary,
}
Expand Down Expand Up @@ -134,21 +133,24 @@ def __init__(
mics: List[AudioChannel] = None,
displays: List[VideoChannel] = None,
system_audio: List[AudioChannel] = None,
cameras: List[VideoChannel] = None,
):
self.mics: ChannelList = ChannelList(mics or [])
self.displays: ChannelList = ChannelList(displays or [])
self.system_audio: ChannelList = ChannelList(system_audio or [])
self.cameras: ChannelList = ChannelList(cameras or [])

def __repr__(self):
return (
f"Channels("
f"mics={len(self.mics)}, "
f"displays={len(self.displays)}, "
f"system_audio={len(self.system_audio)})"
f"system_audio={len(self.system_audio)}, "
f"cameras={len(self.cameras)})"
)

def all(self) -> List[Channel]:
"""Return a flat list of all channels."""
"""Return a flat list of all capturable channels (excludes cameras)."""
return list(self.mics) + list(self.displays) + list(self.system_audio)


Expand Down Expand Up @@ -334,30 +336,34 @@ async def list_channels(self) -> Channels:
mics = []
displays = []
system_audio = []

cameras = []

for ch in raw_channels:
c_type = ch.get("type")
c_id = ch.get("channel_id") or ch.get("id")
c_name = ch.get("name", "")

if not c_id:
logger.warning(f"Skipping channel with missing ID: {ch}")
continue

# Categorize based on type and name patterns
if c_type == "video":
# Categorize based on channel ID prefix
if c_id.startswith("mic:"):
mics.append(AudioChannel(id=c_id, name=c_name, client=self))
elif c_id.startswith("display:") or c_id.startswith("screen:"):
displays.append(VideoChannel(id=c_id, name=c_name, client=self))
elif c_id.startswith("system_audio:"):
system_audio.append(AudioChannel(id=c_id, name=c_name, client=self))
elif c_id.startswith("camera:"):
cameras.append(VideoChannel(id=c_id, name=c_name, client=self))
elif c_type == "audio":
# Distinguish between mic and system audio based on common patterns
name_lower = c_name.lower()
if "system" in name_lower or "output" in name_lower or "speaker" in name_lower:
system_audio.append(AudioChannel(id=c_id, name=c_name, client=self))
else:
mics.append(AudioChannel(id=c_id, name=c_name, client=self))
mics.append(AudioChannel(id=c_id, name=c_name, client=self))
elif c_type == "video":
displays.append(VideoChannel(id=c_id, name=c_name, client=self))
else:
logger.debug(f"Unknown channel type '{c_type}' for channel '{c_name}'")
return Channels(mics=mics, displays=displays, system_audio=system_audio)

return Channels(mics=mics, displays=displays, system_audio=system_audio, cameras=cameras)

async def start_session(
self,
Expand Down
15 changes: 15 additions & 0 deletions videodb/editor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import requests
import warnings

from typing import List, Optional, Union
from enum import Enum
Expand All @@ -8,6 +10,8 @@
from videodb.exceptions import InvalidRequestError


logger = logging.getLogger(__name__)

MAX_PAYLOAD_SIZE = 100 * 1024


Expand Down Expand Up @@ -840,6 +844,11 @@ def __init__(
):
"""Initialize a CaptionAsset instance.

.. note::
When using ``src="auto"``, the video must be indexed first
(e.g. via ``video.index_spoken_words()``) so that a transcript
is available for caption generation.

:param str src: Caption source ("auto" for auto-generated or base64 encoded ass string)
:param FontStyling font: (optional) Font styling properties
:param str primary_color: Primary text color in ASS format (default: "&H00FFFFFF")
Expand All @@ -849,6 +858,12 @@ def __init__(
:param Positioning position: (optional) Caption positioning properties
:param CaptionAnimation animation: (optional) Caption animation effect
"""
if src == "auto":
warnings.warn(
"CaptionAsset(src='auto'): the video must be indexed "
"(e.g. video.index_spoken_words()) for captions to be generated.",
stacklevel=2,
)
self.src = src
self.font = font if font is not None else FontStyling()
self.primary_color = primary_color
Expand Down
2 changes: 2 additions & 0 deletions videodb/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def _format_results(self):
scene_index_id=doc.get("scene_index_id"),
scene_index_name=doc.get("scene_index_name"),
metadata=doc.get("metadata"),
stream_url=doc.get("stream_link"),
player_url=doc.get("player_url"),
)
)

Expand Down
6 changes: 4 additions & 2 deletions videodb/shot.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(
scene_index_id: Optional[str] = None,
scene_index_name: Optional[str] = None,
metadata: Optional[dict] = None,
stream_url: Optional[str] = None,
player_url: Optional[str] = None,
) -> None:
self._connection = _connection
self.video_id = video_id
Expand All @@ -47,8 +49,8 @@ def __init__(
self.scene_index_id = scene_index_id
self.scene_index_name = scene_index_name
self.metadata = metadata
self.stream_url = None
self.player_url = None
self.stream_url = stream_url
self.player_url = player_url

def __repr__(self) -> str:
repr_str = (
Expand Down
11 changes: 7 additions & 4 deletions videodb/video.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union, List, Dict, Tuple, Any
from typing import Literal, Optional, Union, List, Dict, Tuple, Any
from videodb._utils._video import play_stream
from videodb._constants import (
ApiPath,
Expand Down Expand Up @@ -249,17 +249,20 @@ def get_transcript_text(
def generate_transcript(
self,
force: bool = None,
language_code: Optional[str] = None,
) -> str:
"""Generate transcript for the video.

:param bool force: Force generate new transcript
:param str language_code: (optional) Language code of the video
:return: Full transcript text as string
:rtype: str
"""
transcript_data = self._connection.post(
path=f"{ApiPath.video}/{self.id}/{ApiPath.transcription}",
data={
"force": True if force else False,
"language_code": language_code,
},
)
transcript = transcript_data.get("word_timestamps", [])
Expand Down Expand Up @@ -702,9 +705,9 @@ def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str:
def clip(
self,
prompt: str,
content_type: str,
model_name: str,
) -> str:
content_type: Literal["spoken", "visual", "multimodal"],
model_name: Literal["basic", "pro", "ultra"],
) -> SearchResult:
"""Generate a clip from the video using a prompt.
:param str prompt: Prompt to generate the clip
:param str content_type: Content type for the clip. Valid options: "spoken", "visual", "multimodal"
Expand Down
Loading