suitenumerique · lebaudantoine · May 30, 2026 · May 21, 2026 · lebaudantoine · May 28, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,7 +15,8 @@ and this project adheres to
 - ✨(frontend) make reaction toolbar responsive on small viewports
 - ✨(frontend) enable reactions on mobile devices
 - ✨(frontend) introduce picture-in-picture meeting
-- ✨(backend) add core.recording.event.parsers.S3Parser 
+- ✨(backend) add core.recording.event.parsers.S3Parser
+- ✨(summary) extended support for all video / audio files #1358
 
 ### Changed 
 

diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py
@@ -14,7 +14,11 @@
 
 from summary.core.analytics import MetadataManager, get_analytics
 from summary.core.config import get_settings
-from summary.core.file_service import FileService, FileServiceException
+from summary.core.file_service import (
+    FileService,
+    FileServiceException,
+    TranscribeError,
+)
 from summary.core.llm_service import LLMException, LLMObservability, LLMService
 from summary.core.locales import get_locale
 from summary.core.models import (
@@ -540,14 +544,24 @@ def process_audio_transcribe_v2_task(
 
     job_id = self.request.id
 
-    transcription_res = WhisperXResponse(
-        **transcribe_audio(  # type: ignore
-            task_id=job_id,
-            cloud_storage_url=payload.cloud_storage_url,
-            language=payload.language,
-            raises=True,
-        ).model_dump()
-    )
+    try:
+        transcription_res = WhisperXResponse(
+            **transcribe_audio(  # type: ignore
+                task_id=job_id,
+                cloud_storage_url=payload.cloud_storage_url,
+                language=payload.language,
+                raises=True,
+            ).model_dump()
+        )
+    except TranscribeError as e:
+        failure_payload = TranscribeWebhookFailurePayload(
+            job_id=job_id,
+            error_code=e.error_code,
+        )
+        call_webhook_v2_task.apply_async(
+            args=[failure_payload.model_dump(), payload.tenant_id]
+        )
+        return failure_payload.model_dump()
 
     file_service.store_transcript(
         transcript=transcription_res,

diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py
@@ -56,16 +56,16 @@ class Settings(BaseSettings):
 
     # Audio recordings
     recording_max_duration: Optional[int] = None
-    recording_allowed_extensions: Set[str] = {
-        ".ogg",
-        ".mp4",
-        ".m4a",
-        ".webm",
-        ".ogv",
-        ".opus",
-        ".wav",
-    }
-    recording_video_extensions: Set[str] = {".mp4"}
+    codec_to_extension: dict[str, str] = Field(
+        default_factory=lambda: {
+            "aac": ".m4a",
+            "alac": ".m4a",
+            "mp3": ".mp3",
+            "opus": ".opus",
+            "vorbis": ".ogg",
+            "flac": ".flac",
+        }
+    )
 
     # Celery settings
     celery_broker_url: str = "redis://redis/0"

diff --git a/src/summary/summary/core/file_service.py b/src/summary/summary/core/file_service.py
@@ -7,6 +7,7 @@
 import subprocess
 import tempfile
 from contextlib import contextmanager
+from dataclasses import dataclass
 from datetime import timedelta
 from pathlib import Path
 from urllib.parse import urlparse
@@ -23,6 +24,24 @@
 logger = logging.getLogger(__name__)
 
 
+class TranscribeError(ValueError):
+    """Base class for transcribe-related errors."""
+
+    error_code: str = "unknown_error"
+
+
+class MediaDurationTooLongError(TranscribeError):
+    """Raised when the duration of a media file exceeds the allowed limit."""
+
+    error_code = "media_duration_too_long"
+
+
+class NoAudioInFileError(TranscribeError):
+    """Raised when a media file does not contain any audio."""
+
+    error_code = "no_audio_in_file"
+
+
 def _get_duration_from_packets(local_path: Path) -> float:
     """Estimate duration from audio packet timestamps."""
     # Run ffprobe to inspect the first audio stream in the file.
@@ -84,7 +103,7 @@
     return max(packet_ends)
 
 
-def get_media_duration(local_path: Path):
+def get_media_duration_seconds(local_path: Path) -> float:
     """Get media (audio or video) file duration in seconds."""
     # ruff: noqa: S607 Hard to know the ffprobe path, it depends on the deployment
     result = subprocess.run(
@@ -114,6 +133,123 @@
     return _get_duration_from_packets(local_path)
 
 
+@dataclass(frozen=True)
+class MediaInfo:
+    """Object containing information about the media file."""
+
+    path: Path
+    has_audio: bool
+    has_video: bool
+    audio_duration_seconds: float | None
+    audio_codec_name: str | None
+
+
+def get_media_info(local_path: Path) -> MediaInfo:
+    """Determines if a media file contains an audio and / or a video stream.
+
+    This function checks if the given media file contains at least one
+    audio / video stream. It utilizes ffmpeg to analyze the media and parse
+    its metadata to detect the presence of audio / video content.
+    If appropriate, it also retrieves the codec name for the audio stream
+    and its duration.
+    """
+    # ruff: noqa: S607 Hard to know the ffprobe path, it depends on the deployment
+    # ruff: noqa: S603 Input can be trusted
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "stream=codec_type,codec_name",
+            "-print_format",
+            "json",
+            str(local_path),
+        ],
+        check=False,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    data = json.loads(result.stdout)
+
+    streams = data.get("streams", [])
+    has_audio = any(el["codec_type"] == "audio" for el in streams)
+    has_video = any(el["codec_type"] == "video" for el in streams)
+    audio_codec_name = next(
+        (
+            stream.get("codec_name")
+            for stream in streams
+            if stream.get("codec_type") == "audio"
+        ),
+        None,
+    )
+    audio_duration_seconds = None
+    if has_audio:
+        audio_duration_seconds = get_media_duration_seconds(local_path)
+    return MediaInfo(
+        path=local_path,
+        has_audio=has_audio,
+        has_video=has_video,
+        audio_duration_seconds=audio_duration_seconds,
+        audio_codec_name=audio_codec_name,
+    )
+
+
+def extract_audio_from_video(media_info: MediaInfo) -> Path:
+    """Extracts the audio track from a video file and saves it as a separate audio file.
+
+    Based on the provided audio codec,
+    it determines an appropriate audio file extension.
+    It then runs
+    `ffmpeg` to extract the audio track without re-encoding it, ensuring quality is
+    preserved and speed.
+
+    Raises:
+        CalledProcessError: Raised if the `ffmpeg` command execution fails.
+    """
+    if media_info.audio_codec_name is None:
+        raise ValueError("Media file must have codec name must be provided")
+
+    # .mka can contain a bunch of audio codecs, using it as a default
+    # to avoid potential issues with the ffmpeg copy later on
+    suffix = settings.codec_to_extension.get(media_info.audio_codec_name, ".mka")
+
+    with tempfile.NamedTemporaryFile(
+        suffix=suffix,
+        delete=False,
+        prefix="audio_extract_",
+    ) as tmp:
+        output_path = Path(tmp.name)
+
+    extract_command = [
+        "ffmpeg",
+        "-i",
+        str(media_info.path),
+        "-vn",
+        "-acodec",
+        "copy",
+        "-y",
+        str(output_path),
+    ]
+
+    try:
+        subprocess.run(extract_command, check=True)
+    except BaseException as e:
+        if isinstance(e, FileNotFoundError):
+            logger.error("ffmpeg not found. Please install ffmpeg.")
+        elif isinstance(e, subprocess.CalledProcessError):
+            logger.error("Audio extraction failed: %s", e.stderr.decode())
+        else:
+            logger.error("Unexpected error during audio extraction: %s", e)
+
+        if output_path.exists():
+            os.remove(output_path)
+        raise RuntimeError("Failed to extract audio from file") from e
+
+    return output_path
+
+
 class FileServiceException(Exception):
     """Base exception for file service operations."""
 
@@ -141,8 +277,7 @@
         self._bucket_name = settings.aws_storage_bucket_name
         self._stream_chunk_size = 32 * 1024
 
-        self._allowed_extensions = settings.recording_allowed_extensions
-        self._max_duration = settings.recording_max_duration
+        self._max_duration_seconds = settings.recording_max_duration
 
     def _download_from_minio(self, remote_object_key) -> Path:
         """Download file from MinIO to local temporary file.
@@ -158,10 +293,6 @@
 
         extension = Path(remote_object_key).suffix.lower()
 
-        if extension not in self._allowed_extensions:
-            logger.warning("Invalid file extension '%s'", extension)
-            raise ValueError(f"Invalid file extension '{extension}'")
-
         response = None
 
         try:
@@ -204,11 +335,6 @@
             raise ValueError("Invalid cloud_storage_url")
 
         extension = Path(urlparse(cloud_storage_url).path).suffix.lower()
-        if extension not in self._allowed_extensions:
-            logger.warning(
-                "Invalid file extension '%s' from cloud_storage_url", extension
-            )
-            raise ValueError(f"Invalid file extension '{extension}'")
 
         try:
             with requests.get(
@@ -244,63 +370,22 @@
                 "Unexpected error while downloading object from cloud_storage_url."
             ) from e
 
-    def _validate_duration(self, local_path: Path) -> float:
+    def _validate_duration(self, duration_seconds: float) -> None:
         """Validate audio file duration against configured maximum."""
-        duration = get_media_duration(local_path)
-
         logger.info(
             "Recording file duration: %.2f seconds",
-            duration,
+            duration_seconds,
         )
 
-        if self._max_duration is not None and duration > self._max_duration:
+        if (
+            self._max_duration_seconds is not None
+            and duration_seconds > self._max_duration_seconds
+        ):
             error_msg = "Recording too long. Limit is %.2fs seconds" % (
-                self._max_duration,
+                self._max_duration_seconds,
             )
             logger.error(error_msg)
-            raise ValueError(error_msg)
-
-        return duration
-
-    def _extract_audio_from_video(self, video_path: Path) -> Path:
-        """Extract audio from video file (e.g., MP4) and save as audio file."""
-        logger.info("Extracting audio from video file: %s", video_path)
-
-        with tempfile.NamedTemporaryFile(
-            suffix=".m4a", delete=False, prefix="audio_extract_"
-        ) as tmp:
-            output_path = Path(tmp.name)
-
-        try:
-            command = [
-                "ffmpeg",
-                "-i",
-                str(video_path),
-                "-vn",  # No video
-                "-acodec",
-                "copy",
-                "-y",  # Overwrite output file if exists
-                str(output_path),
-            ]
-
-            # ruff: noqa: S603
-            subprocess.run(
-                command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
-            )
-
-            logger.info("Audio successfully extracted to: %s", output_path)
-            return output_path
-
-        except FileNotFoundError as e:
-            logger.error("ffmpeg not found. Please install ffmpeg.")
-            if output_path.exists():
-                os.remove(output_path)
-            raise RuntimeError("ffmpeg is not installed or not in PATH") from e
-        except subprocess.CalledProcessError as e:
-            logger.error("Audio extraction failed: %s", e.stderr.decode())
-            if output_path.exists():
-                os.remove(output_path)
-            raise RuntimeError("Failed to extract audio.") from e
+            raise MediaDurationTooLongError(error_msg)
 
     def read_json(self, object_name: str) -> dict:
         """Read and parse a JSON file from MinIO storage."""
@@ -336,8 +421,8 @@
         and yields an open file handle with metadata. Automatically cleans up
         temporary files when the context exits.
         """
-        downloaded_path = None
-        processed_path = None
+        downloaded_path: Path | None = None
+        processed_path: Path | None = None
         file_handle = None
 
         try:
@@ -356,18 +441,19 @@
             else:
                 downloaded_path = self._download_from_minio(remote_object_key)
 
-            duration = self._validate_duration(downloaded_path)
+            media_info = get_media_info(downloaded_path)
 
-            extension = downloaded_path.suffix.lower()
+            if not media_info.has_audio:
+                raise NoAudioInFileError("Media file does not contain audio")
+            self._validate_duration(media_info.audio_duration_seconds)
 
-            if extension in settings.recording_video_extensions:
+            if media_info.has_video:
                 logger.info("Video file detected, extracting audio...")
-                extracted_audio_path = self._extract_audio_from_video(downloaded_path)
-                processed_path = extracted_audio_path
+                processed_path = extract_audio_from_video(media_info)
             else:
                 processed_path = downloaded_path
 
-            metadata = {"duration": duration, "extension": extension}
+            metadata = {"duration": media_info.audio_duration_seconds}
 
             file_handle = open(processed_path, "rb")
             yield file_handle, metadata