-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
77 lines (59 loc) · 1.84 KB
/
app.py
File metadata and controls
77 lines (59 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from flask import Flask, request, send_file, render_template, jsonify
from flask_cors import CORS
import openai
import io
from .util import *
from openai import OpenAI
from pydub import AudioSegment
import base64
import time
client = OpenAI()
app = Flask(__name__)
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
CORS(app)
memory = ConversationBufferMemory(return_messages=True)
last_interaction_time = time.time()
@app.route("/transcribe", methods=["POST"])
def transcribe():
global last_interaction_time, memory
now = time.time()
# Reset memory if silent for more than 60 seconds
if now - last_interaction_time > 60:
memory.clear()
last_interaction_time = now
# Read audio and convert
audio_file = request.files['audio']
audio_bytes = audio_file.read()
webm_io = io.BytesIO(audio_bytes)
audio = AudioSegment.from_file(webm_io, format="webm")
wav_io = io.BytesIO()
audio.export(wav_io, format="wav", codec="pcm_s16le", parameters=["-ar", "16000", "-ac", "1"])
wav_io.seek(0)
wav_io.name = "audio.wav"
# Transcribe
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=wav_io,
response_format="text"
)
user_text = transcription.strip()
# Generate assistant response (uses memory)
response_text = generate_response(user_text, memory)
# Generate TTS
tts_response = client.audio.speech.create(
model="tts-1",
voice="ash",
input=response_text
)
audio_base64 = base64.b64encode(tts_response.content).decode("utf-8")
return jsonify({
"transcription": user_text,
"response_text": response_text,
"audio_base64": audio_base64
})
@app.route("/")
def index():
return render_template("voices.html")
if __name__ == "__main__":
app.run(debug=True)