-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
63 lines (49 loc) · 1.88 KB
/
app.py
File metadata and controls
63 lines (49 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from flask import Flask, abort, request
from tempfile import NamedTemporaryFile
import whisper, torch, librosa, os
import soundfile as sf
from pydub import AudioSegment
import io
from rich.console import Console
console = Console()
# Check if NVIDIA GPU is available
torch.cuda.is_available()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# https://huggingface.co/spaces/openai/whisper/discussions/30
# Load the Whisper model:
# https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages
model = whisper.load_model("base", device=DEVICE)
app = Flask(__name__)
@app.route("/")
def hello():
return "Whisper Hello World!"
@app.route('/whisper', methods=['POST'])
def handler():
if not request.files:
abort(400)
results = []
for filename, handle in request.files.items():
console.log(f"Processing file: {filename, handle}")
# Load uploaded file into AudioSegment (auto-handles .m4a, .mp3, etc.)
audio = AudioSegment.from_file(handle.stream)
# Export to raw audio in memory as WAV
wav_io = io.BytesIO()
audio.export(wav_io, format='wav')
wav_io.seek(0)
# Load into librosa for volume analysis and potential normalization
audio_data, sr = librosa.load(wav_io, sr=None, mono=True)
peak = max(abs(audio_data))
min_peak = float(os.environ.get("MIN_AUDIO_PEAK", 0.8))
if peak < min_peak:
audio_data *= (min_peak / peak)
# Save normalized audio to a temporary WAV file
with NamedTemporaryFile(suffix=".wav", delete=False) as temp:
sf.write(temp.name, audio_data, sr)
temp.flush()
# Transcribe using Whisper
result = model.transcribe(temp.name)
results.append({
'filename': filename,
'transcript': result['text'],
})
return {'results': results}