Skip to content

Commit 4e88ab3

Browse files
authored
Merge pull request #11 from smallest-inc/exceptions
Long text handled by the SDK and readme updated for better parameters explainations.
2 parents 9dc2343 + b1a7f11 commit 4e88ab3

7 files changed

Lines changed: 84 additions & 98 deletions

File tree

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ if __name__ == "__main__":
7777
- `transliterate`: Enable text transliteration (default: False)
7878
- `remove_extra_silence`: Remove additional silence (default: True)
7979

80-
These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
80+
These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
8181

8282
For example, you can modify the speech speed and sample rate just for a particular synthesis call:
8383
```py
@@ -120,9 +120,9 @@ if __name__ == "__main__":
120120
- `speed`: Speech speed multiplier (default: 1.0)
121121
- `add_wav_header`: Include WAV header in output (default: True)
122122
- `transliterate`: Enable text transliteration (default: False)
123-
- `remove_extra_silence`: Remove additional silence (default: True)
123+
- `remove_extra_silence`: Remove additional silence (default: True)
124124

125-
These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
125+
These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
126126

127127
For example, you can modify the speech speed and sample rate just for a particular synthesis request:
128128
```py

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "smallestai"
3-
version = "1.3.0"
3+
version = "1.3.2"
44
description = "Official Python client for the Smallest AI API"
55
authors = [
66
{name = "Smallest", email = "info@smallest.ai"},

smallest/async_tts.py

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from .models import TTSModels, TTSVoices
88
from .exceptions import TTSError, APIError
9-
from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
9+
from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
1010
get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
1111

1212

@@ -47,7 +47,7 @@ def __init__(
4747
"""
4848
self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
4949
if not self.api_key:
50-
raise TTSError("API key is required")
50+
raise TTSError()
5151
self.chunk_size = 250
5252

5353
self.opts = TTSOptions(
@@ -71,47 +71,6 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
7171
if self.session:
7272
await self.session.close()
7373

74-
def _split_into_chunks(self, text: str) -> List[str]:
75-
"""
76-
Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
77-
"""
78-
chunks = []
79-
current_chunk = ""
80-
last_break_index = 0
81-
82-
i = 0
83-
while i < len(text):
84-
current_chunk += text[i]
85-
86-
if text[i] in ".,":
87-
last_break_index = i
88-
89-
if len(current_chunk) >= self.chunk_size:
90-
if last_break_index > 0:
91-
chunk = text[:last_break_index + 1].strip()
92-
chunk = chunk.replace("—", " ")
93-
chunks.append(chunk)
94-
95-
text = text[last_break_index + 1:]
96-
i = -1
97-
current_chunk = ""
98-
last_break_index = 0
99-
else:
100-
# No break point found, split at max length
101-
current_chunk = current_chunk.replace("—", " ")
102-
chunks.append(current_chunk.strip())
103-
text = text[self.chunk_size:]
104-
i = -1
105-
current_chunk = ""
106-
107-
i += 1
108-
109-
if text:
110-
text = text.replace("—", " ")
111-
chunks.append(text.strip())
112-
113-
return chunks
114-
11574

11675
def get_languages(self) -> List[str]:
11776
"""Returns a list of available languages."""
@@ -153,7 +112,7 @@ async def synthesize(
153112
setattr(opts, key, value)
154113

155114
validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
156-
chunks = self._split_into_chunks(text)
115+
chunks = split_into_chunks(text)
157116
audio_content = b""
158117

159118
for chunk in chunks:

smallest/exceptions.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
class TTSError(Exception):
22
"""Base exception for TTS SDK"""
3-
pass
3+
default_message = "API key is required. Please set the `SMALLEST_API_KEY` environment variable or visit https://waves.smallest.ai/ to obtain your API key."
4+
5+
def __init__(self, message=None):
6+
super().__init__(message or self.default_message)
47

58
class APIError(TTSError):
69
"""Raised when the API returns an error"""

smallest/stream_tts.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,40 @@ def __init__(
4646

4747
async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
4848
"""
49-
Streams the LLM output, splitting it into sentences and adding each to the queue.
49+
Streams the LLM output, splitting it into sentences based on the regex
50+
and chunk size, and adding each chunk to the queue.
5051
5152
Parameters:
5253
- llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
5354
"""
5455
buffer = ""
56+
last_break_index = 0
57+
5558
async for chunk in llm_output:
5659
buffer += chunk
57-
if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
58-
self.queue.put(buffer)
59-
buffer = ""
60+
i = 0
61+
62+
while i < len(buffer):
63+
current_chunk = buffer[:i + 1]
64+
if self.sentence_end_regex.match(current_chunk):
65+
last_break_index = i
66+
67+
if len(current_chunk) >= self.buffer_size:
68+
if last_break_index > 0:
69+
self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
70+
buffer = buffer[last_break_index + 1:]
71+
else:
72+
# No sentence boundary, split at max chunk size
73+
self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
74+
buffer = buffer[self.buffer_size:]
75+
76+
last_break_index = 0
77+
i = -1
78+
79+
i += 1
6080

6181
if buffer:
62-
self.queue.put(buffer)
82+
self.queue.put(buffer.replace("—", " ").strip())
6383

6484
self.stop_flag = True # completion flag when LLM output ends
6585

smallest/tts.py

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from .models import TTSModels, TTSVoices
88
from .exceptions import TTSError, APIError
9-
from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
9+
from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
1010
get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
1111

1212
class Smallest:
@@ -45,7 +45,7 @@ def __init__(
4545
"""
4646
self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
4747
if not self.api_key:
48-
raise TTSError("API key is required")
48+
raise TTSError()
4949

5050
self.chunk_size = 250
5151

@@ -59,47 +59,6 @@ def __init__(
5959
transliterate=transliterate,
6060
remove_extra_silence=remove_extra_silence
6161
)
62-
63-
def _split_into_chunks(self, text: str) -> List[str]:
64-
"""
65-
Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
66-
"""
67-
chunks = []
68-
current_chunk = ""
69-
last_break_index = 0
70-
71-
i = 0
72-
while i < len(text):
73-
current_chunk += text[i]
74-
75-
if text[i] in ".,":
76-
last_break_index = i
77-
78-
if len(current_chunk) >= self.chunk_size:
79-
if last_break_index > 0:
80-
chunk = text[:last_break_index + 1].strip()
81-
chunk = chunk.replace("—", " ")
82-
chunks.append(chunk)
83-
84-
text = text[last_break_index + 1:]
85-
i = -1
86-
current_chunk = ""
87-
last_break_index = 0
88-
else:
89-
# No break point found, split at max length
90-
current_chunk = current_chunk.replace("—", " ")
91-
chunks.append(current_chunk.strip())
92-
text = text[self.chunk_size:]
93-
i = -1
94-
current_chunk = ""
95-
96-
i += 1
97-
98-
if text:
99-
text = text.replace("—", " ")
100-
chunks.append(text.strip())
101-
102-
return chunks
10362

10463

10564
def get_languages(self) -> List[str]:
@@ -143,7 +102,7 @@ def synthesize(
143102

144103
validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
145104

146-
chunks = self._split_into_chunks(text)
105+
chunks = split_into_chunks(text)
147106
audio_content = b""
148107

149108
for chunk in chunks:

smallest/utils.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
1414
SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15+
CHUNK_SIZE = 250
1516
SAMPLE_WIDTH = 2
1617
CHANNELS = 1
1718

@@ -59,6 +60,50 @@ def preprocess_text(text: str) -> str:
5960
text = mpn.normalize(text)
6061
return text.strip()
6162

63+
def split_into_chunks(text: str) -> List[str]:
64+
"""
65+
Splits the input text into chunks based on sentence boundaries
66+
defined by SENTENCE_END_REGEX and the maximum chunk size.
67+
"""
68+
chunks = []
69+
current_chunk = ""
70+
last_break_index = 0
71+
72+
i = 0
73+
while i < len(text):
74+
current_chunk += text[i]
75+
76+
# Check for sentence boundary using regex
77+
if SENTENCE_END_REGEX.match(current_chunk):
78+
last_break_index = i
79+
80+
if len(current_chunk) >= CHUNK_SIZE:
81+
if last_break_index > 0:
82+
# Split at the last valid sentence boundary
83+
chunk = text[:last_break_index + 1].strip()
84+
chunk = chunk.replace("—", " ")
85+
chunks.append(chunk)
86+
87+
text = text[last_break_index + 1:]
88+
i = -1 # Reset index to process the remaining text
89+
current_chunk = ""
90+
last_break_index = 0
91+
else:
92+
# No sentence boundary found, split at max length
93+
current_chunk = current_chunk.replace("—", " ")
94+
chunks.append(current_chunk.strip())
95+
text = text[CHUNK_SIZE:]
96+
i = -1 # Reset index to process the remaining text
97+
current_chunk = ""
98+
99+
i += 1
100+
101+
if text:
102+
text = text.replace("—", " ")
103+
chunks.append(text.strip())
104+
105+
return chunks
106+
62107

63108
def get_smallest_languages() -> List[str]:
64109
return list(TTSLanguages.__args__)

0 commit comments

Comments
 (0)