From bb1168fb0ce95b0be70085b464f78940c66f3071 Mon Sep 17 00:00:00 2001
From: Qi Sun <qisu@adobe.com>
Date: Tue, 8 Jan 2019 10:16:58 -0800
Subject: [PATCH 1/5] tweak parameters a bit

---
 google_speech.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/google_speech.py b/google_speech.py
index b120884..82ff681 100644
--- a/google_speech.py
+++ b/google_speech.py
@@ -25,7 +25,9 @@
 client = texttospeech.TextToSpeechClient()
 voice = texttospeech.types.VoiceSelectionParams(
     language_code='en-US',
-    ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
+    #ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE,
+    name='en-US-Wavenet-F')
+
 audio_config = texttospeech.types.AudioConfig(
     audio_encoding=texttospeech.enums.AudioEncoding.MP3)   
 
@@ -41,13 +43,7 @@
     synthesis_input = texttospeech.types.SynthesisInput(text=audio_script)
     response = client.synthesize_speech(synthesis_input, voice, audio_config)
 
-    # The response's audio_content is binary.
     with open(file_tag+'.mp3', 'wb') as out:
-        # Write the response to the output file.
         out.write(response.audio_content)
     
-    #output_audio_file = file_tag + '.wav'
-    #command = 'cscript ' + speech_config_file + ' ' + output_script_file + ' ' + output_audio_file
-    #os.system(command)
-
     os.remove(output_script_file)

From c49f925cd82e4f3e7a405950f62910e773f3ecf6 Mon Sep 17 00:00:00 2001
From: qisun0 <qisun0@gmail.com>
Date: Mon, 28 Dec 2020 21:42:34 -0800
Subject: [PATCH 2/5] synthesis code

---
 google_speech.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/google_speech.py b/google_speech.py
index 82ff681..ebd75a6 100644
--- a/google_speech.py
+++ b/google_speech.py
@@ -23,13 +23,13 @@
 audios = re.findall('\\\\audio\s*\[(.*?)\]\s*{(.*?)}', lines, re.DOTALL)
 
 client = texttospeech.TextToSpeechClient()
-voice = texttospeech.types.VoiceSelectionParams(
+voice = texttospeech.VoiceSelectionParams(
     language_code='en-US',
     #ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE,
     name='en-US-Wavenet-F')
 
-audio_config = texttospeech.types.AudioConfig(
-    audio_encoding=texttospeech.enums.AudioEncoding.MP3)   
+audio_config = texttospeech.AudioConfig(
+    audio_encoding=texttospeech.AudioEncoding.MP3)   
 
 for audio in audios:
     file_tag = audio[0]
@@ -40,8 +40,8 @@
     fout.write(audio_script)
     fout.close()
 
-    synthesis_input = texttospeech.types.SynthesisInput(text=audio_script)
-    response = client.synthesize_speech(synthesis_input, voice, audio_config)
+    synthesis_input = texttospeech.SynthesisInput(text=audio_script)
+    response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
 
     with open(file_tag+'.mp3', 'wb') as out:
         out.write(response.audio_content)

From 5911b9fa478c4e5478c5950e1e91f1db1a2e9ac0 Mon Sep 17 00:00:00 2001
From: Qi Sun <qi@qisun.me>
Date: Thu, 20 Jan 2022 16:37:46 -0500
Subject: [PATCH 3/5] Update README.md

---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f55b324..c147b8e 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,16 @@ The code I used to dub my paper videos without requiring human help.
 
 Windows, preferably 7 or 10 for reasonable voice quality.
 
-## Usage ##
+## MS Speech Usage ##
 
 ```
 python speech.py speech.vbs example.tex
 
 ```
+## Google Speech Usage ##
+```
+python google_speech.py example.tex
+
+```
+
 The output will be in the .wav files.

From 888356d7ff5cc74bb0c8ca93a91dc52ab29c300f Mon Sep 17 00:00:00 2001
From: 405-not-found <98379785+405-not-found@users.noreply.github.com>
Date: Wed, 28 Feb 2024 09:27:35 -0500
Subject: [PATCH 4/5] Upgrade to OpenAI

---
 openAI_speech.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 openAI_speech.py

diff --git a/openAI_speech.py b/openAI_speech.py
new file mode 100644
index 0000000..6b8f0a9
--- /dev/null
+++ b/openAI_speech.py
@@ -0,0 +1,45 @@
+import sys
+import os
+import re
+from openai import OpenAI
+from pathlib import Path
+
+
+argc = len(sys.argv)
+
+if argc < 2:
+    error_message = "python input_video_script_file (tex)"
+    print(error_message)
+    raise Exception(error_message)
+
+input_video_script_file = sys.argv[1]
+
+fin = open(input_video_script_file, 'r')
+lines = fin.read()
+
+audios = re.findall('\\\\audio\s*\[(.*?)\]\s*{(.*?)}', lines, re.DOTALL)
+
+client = OpenAI()
+# Iterate over each text portion
+for i, text in enumerate(audios, start=1):
+    # Call the OpenAI Text-to-Speech API
+    file_tag = text[0]
+    audio_script = text[1]
+    output_script_file = file_tag + '.txt'
+    fout = open(output_script_file, 'w')
+    fout.write(audio_script)
+    fout.close()
+    response = client.audio.speech.create(
+        #adjust model and voice here
+        model="tts-1",
+        voice="alloy",
+        input=audio_script
+    )
+
+    # Define the path for the output audio file
+    speech_file_path = Path(f"{file_tag}.mp3")
+
+    # Save the audio content to a file, the function says to have a bug but works well on my side
+    response.stream_to_file(str(speech_file_path))
+
+    print(f"Saved speech to {speech_file_path}")

From 4de328e9de6edb0cc352c3da789bcb2d7f26f168 Mon Sep 17 00:00:00 2001
From: 405-not-found <98379785+405-not-found@users.noreply.github.com>
Date: Wed, 28 Feb 2024 09:31:09 -0500
Subject: [PATCH 5/5] Update README

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index c147b8e..76a55be 100644
--- a/README.md
+++ b/README.md
@@ -19,3 +19,13 @@ python google_speech.py example.tex
 ```
 
 The output will be in the .wav files.
+
+
+
+## OpenAI Speech Usage
+
+```
+python openAI_speech.py example.tex
+```
+
+The output will be in the .mp3 files.