Estka-ai · ValenColm · Apr 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,45 @@
+#  Virtual environments
+venv/
+.venv/
+env/
+
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+
+#  Build / packaging
+build/
+dist/
+*.egg-info/
+
+# Environment variables
+.env
+
+#  Logs
+*.log
+
+# Audio files (no subir archivos pesados)
+*.wav
+*.mp3
+*.m4a
+*.mp4
+
+# Outputs / test results
+*.json
+
+# Temporary files
+tmp/
+temp/
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# IDE / Editor
+.vscode/
+.idea/
+
+#Whisper / model cache (opcional pero recomendado)
+~/.cache/
+.cache/
diff --git a/README.md b/README.md
@@ -38,3 +38,49 @@ A continuación, la división del core para inteligencia artificial:
 ## Reglas de Contribución
 
 Todo el equipo se acoge a las directrices delineadas en el archivo principal de `skill-git.md`. Puntualmente para el worker de AI, tener en consideración que todas las tareas en desarrollo, fix y chore de IA deben desarrollarse de manera aislada en ramas `feature/`.
+
+
+## Audio Transcription Service (Whisper)
+
+This module implements the audio transcription service using Whisper.
+
+It is responsible for converting audio input into text, which will later be used for:
+
+Filler word detection (muletillas)
+Speech speed analysis
+Vocabulary evaluation
+
+
+📂 Location
+app/services/audio_service.py
+
+
+⚙️ Additional Requirements
+In addition to the base setup, this service requires:
+-- sudo apt install ffmpeg -y
+
+🎧 Audio Preparation
+
+Convert audio files to .wav format before processing:
+
+--ffmpeg -i audio_prueba.mp4 -ar 16000 -ac 1 -c:a pcm_s16le tests/sample.wav
+
+🧪 Local Test
+
+Run the transcription test with:
+
+-- PYTHONPATH=. python tests/test_audio.py
+
+
+## Project Structure
+app/
+│
+└── services/
+    └── audio_service.py   # Whisper transcription logic
+
+tests/
+│
+└── test_audio.py          # Local test script
+
+requirements.txt
+README.md
diff --git a/app/services/audio_service.py b/app/services/audio_service.py
@@ -0,0 +1,33 @@
+import whisper
+import os
+
+# Load Whisper model once at startup to avoid reloading on every request
+# This improves performance significantly in production environments
+model = whisper.load_model("base")
+
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Transcribes an audio file using OpenAI Whisper.
+
+    This service is designed to support:
+    - filler word detection
+    - speech speed analysis
+    - vocabulary analysis
+    """
+
+    # Validate that the audio file exists before processing
+    if not os.path.exists(audio_path):
+        raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+
+    try:
+        # Perform transcription using Whisper
+        result = model.transcribe(audio_path)
+
+         # Return clean text output (remove leading/trailing spaces)
+        return result["text"].strip()
+
+ # Return clean text output (remove leading/trailing spaces)
+    except Exception as e:
+        print(f"Error transcribing audio: {e}")
+        return ""
diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,4 @@ mediapipe
 openai-whisper
 pika
 python-multipart
+torch
diff --git a/tests/test_audio.py b/tests/test_audio.py
@@ -0,0 +1,21 @@
+import sys
+import os
+
+
+# Add project root to Python path to allow imports like:
+# from app.services.audio_service import transcribe_audio
+sys.path.append(os.path.abspath("."))
+
+from app.services.audio_service import transcribe_audio
+
+if __name__ == "__main__":
+
+    # Path to the test audio file
+    # Make sure this file exists before running the script
+    audio_file = "tests/sample.wav"
+
+     # Call the transcription service
+    text = transcribe_audio(audio_file)
+
+    print("Transcription:")
+    print(text)
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,4 @@ mediapipe @@
     openai-whisper
     pika
     python-multipart
+    torch