Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ and this project adheres to
- ✨(frontend) make reaction toolbar responsive on small viewports
- ✨(frontend) enable reactions on mobile devices
- ✨(frontend) introduce picture-in-picture meeting
- ✨(backend) add core.recording.event.parsers.S3Parser
- ✨(backend) add core.recording.event.parsers.S3Parser
- ✨(summary) extended support for all video / audio files #1358

### Changed

Expand Down
32 changes: 23 additions & 9 deletions src/summary/summary/core/celery_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@

from summary.core.analytics import MetadataManager, get_analytics
from summary.core.config import get_settings
from summary.core.file_service import FileService, FileServiceException
from summary.core.file_service import (
FileService,
FileServiceException,
TranscribeError,
)
from summary.core.llm_service import LLMException, LLMObservability, LLMService
from summary.core.locales import get_locale
from summary.core.models import (
Expand Down Expand Up @@ -540,14 +544,24 @@ def process_audio_transcribe_v2_task(

job_id = self.request.id

transcription_res = WhisperXResponse(
**transcribe_audio( # type: ignore
task_id=job_id,
cloud_storage_url=payload.cloud_storage_url,
language=payload.language,
raises=True,
).model_dump()
)
try:
transcription_res = WhisperXResponse(
**transcribe_audio( # type: ignore
task_id=job_id,
cloud_storage_url=payload.cloud_storage_url,
language=payload.language,
raises=True,
).model_dump()
)
Comment on lines +548 to +555
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related with this PR, can't transcribe_audio returns directly a WhisperXResponse instance?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is indeed work to be done around using from pydantic models, especially in the helpers by @cameledev. I left that for another PR.

except TranscribeError as e:
failure_payload = TranscribeWebhookFailurePayload(
job_id=job_id,
error_code=e.error_code,
)
call_webhook_v2_task.apply_async(
args=[failure_payload.model_dump(), payload.tenant_id]
)
return failure_payload.model_dump()

file_service.store_transcript(
transcript=transcription_res,
Expand Down
20 changes: 10 additions & 10 deletions src/summary/summary/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,16 @@ class Settings(BaseSettings):

# Audio recordings
recording_max_duration: Optional[int] = None
recording_allowed_extensions: Set[str] = {
".ogg",
".mp4",
".m4a",
".webm",
".ogv",
".opus",
".wav",
}
recording_video_extensions: Set[str] = {".mp4"}
codec_to_extension: dict[str, str] = Field(
default_factory=lambda: {
"aac": ".m4a",
"alac": ".m4a",
"mp3": ".mp3",
"opus": ".opus",
"vorbis": ".ogg",
"flac": ".flac",
}
)

# Celery settings
celery_broker_url: str = "redis://redis/0"
Expand Down
224 changes: 155 additions & 69 deletions src/summary/summary/core/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import tempfile
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import timedelta
from pathlib import Path
from urllib.parse import urlparse
Expand All @@ -23,6 +24,24 @@
logger = logging.getLogger(__name__)


class TranscribeError(ValueError):
"""Base class for transcribe-related errors."""

error_code: str = "unknown_error"


class MediaDurationTooLongError(TranscribeError):
"""Raised when the duration of a media file exceeds the allowed limit."""

error_code = "media_duration_too_long"


class NoAudioInFileError(TranscribeError):
"""Raised when a media file does not contain any audio."""

error_code = "no_audio_in_file"


def _get_duration_from_packets(local_path: Path) -> float:
"""Estimate duration from audio packet timestamps."""
# Run ffprobe to inspect the first audio stream in the file.
Expand Down Expand Up @@ -84,7 +103,7 @@
return max(packet_ends)


def get_media_duration(local_path: Path):
def get_media_duration_seconds(local_path: Path) -> float:
"""Get media (audio or video) file duration in seconds."""
# ruff: noqa: S607 Hard to know the ffprobe path, it depends on the deployment
result = subprocess.run(
Expand Down Expand Up @@ -114,6 +133,123 @@
return _get_duration_from_packets(local_path)


@dataclass(frozen=True)
class MediaInfo:
"""Object containing information about the media file."""

path: Path
has_audio: bool
has_video: bool
audio_duration_seconds: float | None
Comment thread
lebaudantoine marked this conversation as resolved.
audio_codec_name: str | None


def get_media_info(local_path: Path) -> MediaInfo:
"""Determines if a media file contains an audio and / or a video stream.

This function checks if the given media file contains at least one
audio / video stream. It utilizes ffmpeg to analyze the media and parse
its metadata to detect the presence of audio / video content.
If appropriate, it also retrieves the codec name for the audio stream
and its duration.
"""
# ruff: noqa: S607 Hard to know the ffprobe path, it depends on the deployment
# ruff: noqa: S603 Input can be trusted
result = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"stream=codec_type,codec_name",
"-print_format",
"json",
str(local_path),
],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
data = json.loads(result.stdout)

streams = data.get("streams", [])
has_audio = any(el["codec_type"] == "audio" for el in streams)
has_video = any(el["codec_type"] == "video" for el in streams)
audio_codec_name = next(
(
stream.get("codec_name")
for stream in streams
if stream.get("codec_type") == "audio"
),
None,
)
audio_duration_seconds = None
if has_audio:
audio_duration_seconds = get_media_duration_seconds(local_path)
return MediaInfo(
path=local_path,
has_audio=has_audio,
has_video=has_video,
audio_duration_seconds=audio_duration_seconds,
audio_codec_name=audio_codec_name,
)


def extract_audio_from_video(media_info: MediaInfo) -> Path:
"""Extracts the audio track from a video file and saves it as a separate audio file.

Based on the provided audio codec,
it determines an appropriate audio file extension.
It then runs
`ffmpeg` to extract the audio track without re-encoding it, ensuring quality is
preserved and speed.

Raises:
CalledProcessError: Raised if the `ffmpeg` command execution fails.
"""
if media_info.audio_codec_name is None:
raise ValueError("Media file must have codec name must be provided")

# .mka can contain a bunch of audio codecs, using it as a default
# to avoid potential issues with the ffmpeg copy later on
suffix = settings.codec_to_extension.get(media_info.audio_codec_name, ".mka")

with tempfile.NamedTemporaryFile(
suffix=suffix,
delete=False,
prefix="audio_extract_",
) as tmp:
output_path = Path(tmp.name)

extract_command = [
"ffmpeg",
"-i",
str(media_info.path),
"-vn",
"-acodec",
"copy",
"-y",
str(output_path),
]

try:
subprocess.run(extract_command, check=True)
except BaseException as e:

Check failure on line 238 in src/summary/summary/core/file_service.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Catch a more specific exception or reraise the exception

See more on https://sonarcloud.io/project/issues?id=suitenumerique_meet&issues=AZ5KN0CBRiXPssbquVx-&open=AZ5KN0CBRiXPssbquVx-&pullRequest=1358
if isinstance(e, FileNotFoundError):
logger.error("ffmpeg not found. Please install ffmpeg.")
elif isinstance(e, subprocess.CalledProcessError):
logger.error("Audio extraction failed: %s", e.stderr.decode())
else:
logger.error("Unexpected error during audio extraction: %s", e)

Check failure on line 244 in src/summary/summary/core/file_service.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use "logging.exception()" instead.

See more on https://sonarcloud.io/project/issues?id=suitenumerique_meet&issues=AZ5KN0CBRiXPssbquVx_&open=AZ5KN0CBRiXPssbquVx_&pullRequest=1358

if output_path.exists():
os.remove(output_path)
raise RuntimeError("Failed to extract audio from file") from e
Comment on lines +238 to +248
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in terms of python style I found the initial syntax with several except easier to scan/parse.
maybe, extract the cleanup in a function?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is to be reworked as a bigger clean of those file handles


return output_path


class FileServiceException(Exception):
"""Base exception for file service operations."""

Expand Down Expand Up @@ -141,8 +277,7 @@
self._bucket_name = settings.aws_storage_bucket_name
self._stream_chunk_size = 32 * 1024

self._allowed_extensions = settings.recording_allowed_extensions
self._max_duration = settings.recording_max_duration
self._max_duration_seconds = settings.recording_max_duration

def _download_from_minio(self, remote_object_key) -> Path:
"""Download file from MinIO to local temporary file.
Expand All @@ -158,10 +293,6 @@

extension = Path(remote_object_key).suffix.lower()

if extension not in self._allowed_extensions:
logger.warning("Invalid file extension '%s'", extension)
raise ValueError(f"Invalid file extension '{extension}'")

response = None

try:
Expand Down Expand Up @@ -204,11 +335,6 @@
raise ValueError("Invalid cloud_storage_url")

extension = Path(urlparse(cloud_storage_url).path).suffix.lower()
if extension not in self._allowed_extensions:
logger.warning(
"Invalid file extension '%s' from cloud_storage_url", extension
)
raise ValueError(f"Invalid file extension '{extension}'")

try:
with requests.get(
Expand Down Expand Up @@ -244,63 +370,22 @@
"Unexpected error while downloading object from cloud_storage_url."
) from e

def _validate_duration(self, local_path: Path) -> float:
def _validate_duration(self, duration_seconds: float) -> None:
Comment thread
FloChehab marked this conversation as resolved.
"""Validate audio file duration against configured maximum."""
duration = get_media_duration(local_path)

logger.info(
"Recording file duration: %.2f seconds",
duration,
duration_seconds,
)

if self._max_duration is not None and duration > self._max_duration:
if (
self._max_duration_seconds is not None
and duration_seconds > self._max_duration_seconds
):
error_msg = "Recording too long. Limit is %.2fs seconds" % (
self._max_duration,
self._max_duration_seconds,
)
logger.error(error_msg)
raise ValueError(error_msg)

return duration

def _extract_audio_from_video(self, video_path: Path) -> Path:
"""Extract audio from video file (e.g., MP4) and save as audio file."""
logger.info("Extracting audio from video file: %s", video_path)

with tempfile.NamedTemporaryFile(
suffix=".m4a", delete=False, prefix="audio_extract_"
) as tmp:
output_path = Path(tmp.name)

try:
command = [
"ffmpeg",
"-i",
str(video_path),
"-vn", # No video
"-acodec",
"copy",
"-y", # Overwrite output file if exists
str(output_path),
]

# ruff: noqa: S603
subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
)

logger.info("Audio successfully extracted to: %s", output_path)
return output_path

except FileNotFoundError as e:
logger.error("ffmpeg not found. Please install ffmpeg.")
if output_path.exists():
os.remove(output_path)
raise RuntimeError("ffmpeg is not installed or not in PATH") from e
except subprocess.CalledProcessError as e:
logger.error("Audio extraction failed: %s", e.stderr.decode())
if output_path.exists():
os.remove(output_path)
raise RuntimeError("Failed to extract audio.") from e
raise MediaDurationTooLongError(error_msg)

def read_json(self, object_name: str) -> dict:
"""Read and parse a JSON file from MinIO storage."""
Expand Down Expand Up @@ -336,8 +421,8 @@
and yields an open file handle with metadata. Automatically cleans up
temporary files when the context exits.
"""
downloaded_path = None
processed_path = None
downloaded_path: Path | None = None
processed_path: Path | None = None
file_handle = None

try:
Expand All @@ -356,18 +441,19 @@
else:
downloaded_path = self._download_from_minio(remote_object_key)

duration = self._validate_duration(downloaded_path)
media_info = get_media_info(downloaded_path)

extension = downloaded_path.suffix.lower()
if not media_info.has_audio:
raise NoAudioInFileError("Media file does not contain audio")
self._validate_duration(media_info.audio_duration_seconds)

if extension in settings.recording_video_extensions:
if media_info.has_video:
logger.info("Video file detected, extracting audio...")
extracted_audio_path = self._extract_audio_from_video(downloaded_path)
processed_path = extracted_audio_path
processed_path = extract_audio_from_video(media_info)
else:
processed_path = downloaded_path

metadata = {"duration": duration, "extension": extension}
metadata = {"duration": media_info.audio_duration_seconds}

file_handle = open(processed_path, "rb")
yield file_handle, metadata
Expand Down
Loading
Loading