forked from neuphonic/neutts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic_streaming_example.py
More file actions
87 lines (77 loc) · 2.47 KB
/
basic_streaming_example.py
File metadata and controls
87 lines (77 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import soundfile as sf
import torch
import numpy as np
from neuttsair.neutts import NeuTTSAir
import pyaudio
def main(input_text, ref_codes_path, ref_text, backbone):
assert backbone in ["neuphonic/neutts-air-q4-gguf", "neuphonic/neutts-air-q8-gguf"], "Must be a GGUF ckpt as streaming is only currently supported by llama-cpp."
# Initialize NeuTTSAir with the desired model and codec
tts = NeuTTSAir(
backbone_repo=backbone,
backbone_device="cpu",
codec_repo="neuphonic/neucodec-onnx-decoder",
codec_device="cpu"
)
# Check if ref_text is a path if it is read it if not just return string
if ref_text and os.path.exists(ref_text):
with open(ref_text, "r") as f:
ref_text = f.read().strip()
if ref_codes_path and os.path.exists(ref_codes_path):
ref_codes = torch.load(ref_codes_path)
print(f"Generating audio for input text: {input_text}")
p = pyaudio.PyAudio()
stream = p.open(
format=pyaudio.paInt16,
channels=1,
rate=24_000,
output=True
)
print("Streaming...")
for chunk in tts.infer_stream(input_text, ref_codes, ref_text):
audio = (chunk * 32767).astype(np.int16)
print(audio.shape)
stream.write(audio.tobytes())
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="NeuTTSAir Example")
parser.add_argument(
"--input_text",
type=str,
required=True,
help="Input text to be converted to speech"
)
parser.add_argument(
"--ref_codes",
type=str,
default="./samples/dave.pt",
help="Path to pre-encoded reference audio"
)
parser.add_argument(
"--ref_text",
type=str,
default="./samples/dave.txt",
help="Reference text corresponding to the reference audio",
)
parser.add_argument(
"--output_path",
type=str,
default="output.wav",
help="Path to save the output audio"
)
parser.add_argument(
"--backbone",
type=str,
default="neuphonic/neutts-air-q8-gguf",
help="Huggingface repo containing the backbone checkpoint. Must be GGUF."
)
args = parser.parse_args()
main(
input_text=args.input_text,
ref_codes_path=args.ref_codes,
ref_text=args.ref_text,
backbone=args.backbone,
)