Merge pull request #11 from smallest-inc/exceptions

radix132 · web-flow · commit 4e88ab3ca979 · 2024-12-05T11:02:27.000+05:30
Long text handled by the SDK and readme updated for better parameters explainations.
diff --git a/README.md b/README.md
@@ -77,7 +77,7 @@ if __name__ == "__main__":
 - `transliterate`: Enable text transliteration (default: False)
 - `remove_extra_silence`: Remove additional silence (default: True)  
 
-These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
+These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
 
 For example, you can modify the speech speed and sample rate just for a particular synthesis call:  
 ```py
@@ -120,9 +120,9 @@ if __name__ == "__main__":
 - `speed`: Speech speed multiplier (default: 1.0)
 - `add_wav_header`: Include WAV header in output (default: True)
 - `transliterate`: Enable text transliteration (default: False)
-- `remove_extra_silence`: Remove additional silence (default: True)  
+- `remove_extra_silence`: Remove additional silence (default: True)   
 
-These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
+These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.  
 
 For example, you can modify the speech speed and sample rate just for a particular synthesis request:  
 ```py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "smallestai"
-version = "1.3.0"
+version = "1.3.2"
 description = "Official Python client for the Smallest AI API"
 authors = [
     {name = "Smallest", email = "info@smallest.ai"},
diff --git a/smallest/async_tts.py b/smallest/async_tts.py
@@ -6,7 +6,7 @@
 
 from .models import TTSModels, TTSVoices
 from .exceptions import TTSError, APIError
-from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
+from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
                      get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
 
 
@@ -47,7 +47,7 @@ def __init__(
         """
         self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
         if not self.api_key:
-            raise TTSError("API key is required")
+            raise TTSError()
         self.chunk_size = 250
         
         self.opts = TTSOptions(
@@ -71,47 +71,6 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
         if self.session:
             await self.session.close()
 
-    def _split_into_chunks(self, text: str) -> List[str]:
-        """
-        Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
-        """
-        chunks = []
-        current_chunk = ""
-        last_break_index = 0
-
-        i = 0
-        while i < len(text):
-            current_chunk += text[i]
-
-            if text[i] in ".,":
-                last_break_index = i
-
-            if len(current_chunk) >= self.chunk_size:
-                if last_break_index > 0:
-                    chunk = text[:last_break_index + 1].strip()
-                    chunk = chunk.replace("—", " ")
-                    chunks.append(chunk)
-                
-                    text = text[last_break_index + 1:]
-                    i = -1
-                    current_chunk = ""
-                    last_break_index = 0
-                else:
-                    # No break point found, split at max length
-                    current_chunk = current_chunk.replace("—", " ")
-                    chunks.append(current_chunk.strip())
-                    text = text[self.chunk_size:]
-                    i = -1
-                    current_chunk = ""
-
-            i += 1
-
-        if text:
-            text = text.replace("—", " ")
-            chunks.append(text.strip())
-
-        return chunks
-
 
     def get_languages(self) -> List[str]:
         """Returns a list of available languages."""
@@ -153,7 +112,7 @@ async def synthesize(
             setattr(opts, key, value)
 
         validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
-        chunks = self._split_into_chunks(text)
+        chunks = split_into_chunks(text)
         audio_content = b""
 
         for chunk in chunks:
diff --git a/smallest/exceptions.py b/smallest/exceptions.py
@@ -1,6 +1,9 @@
 class TTSError(Exception):
     """Base exception for TTS SDK"""
-    pass
+    default_message = "API key is required. Please set the `SMALLEST_API_KEY` environment variable or visit https://waves.smallest.ai/ to obtain your API key."
+    
+    def __init__(self, message=None):
+        super().__init__(message or self.default_message)
 
 class APIError(TTSError):
     """Raised when the API returns an error"""
diff --git a/smallest/stream_tts.py b/smallest/stream_tts.py
@@ -46,20 +46,40 @@ def __init__(
 
     async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
         """
-        Streams the LLM output, splitting it into sentences and adding each to the queue.
+        Streams the LLM output, splitting it into sentences based on the regex 
+        and chunk size, and adding each chunk to the queue.
 
         Parameters:
         - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
         """
         buffer = ""
+        last_break_index = 0
+
         async for chunk in llm_output:
             buffer += chunk
-            if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
-                self.queue.put(buffer)
-                buffer = ""
+            i = 0
+
+            while i < len(buffer):
+                current_chunk = buffer[:i + 1]
+                if self.sentence_end_regex.match(current_chunk):
+                    last_break_index = i
+
+                if len(current_chunk) >= self.buffer_size:
+                    if last_break_index > 0:
+                        self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
+                        buffer = buffer[last_break_index + 1:] 
+                    else:
+                        # No sentence boundary, split at max chunk size
+                        self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
+                        buffer = buffer[self.buffer_size:] 
+
+                    last_break_index = 0
+                    i = -1 
+
+                i += 1
 
         if buffer:
-            self.queue.put(buffer)
+            self.queue.put(buffer.replace("—", " ").strip())
 
         self.stop_flag = True  # completion flag when LLM output ends
 
diff --git a/smallest/tts.py b/smallest/tts.py
@@ -6,7 +6,7 @@
 
 from .models import TTSModels, TTSVoices
 from .exceptions import TTSError, APIError
-from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, 
+from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
 get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
 
 class Smallest:
@@ -45,7 +45,7 @@ def __init__(
         """
         self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
         if not self.api_key:
-            raise TTSError("API key is required")
+            raise TTSError()
         
         self.chunk_size = 250
         
@@ -59,47 +59,6 @@ def __init__(
             transliterate=transliterate,
             remove_extra_silence=remove_extra_silence
         )
-
-    def _split_into_chunks(self, text: str) -> List[str]:
-        """
-        Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
-        """
-        chunks = []
-        current_chunk = ""
-        last_break_index = 0
-
-        i = 0
-        while i < len(text):
-            current_chunk += text[i]
-
-            if text[i] in ".,":
-                last_break_index = i
-
-            if len(current_chunk) >= self.chunk_size:
-                if last_break_index > 0:
-                    chunk = text[:last_break_index + 1].strip()
-                    chunk = chunk.replace("—", " ")
-                    chunks.append(chunk)
-                
-                    text = text[last_break_index + 1:]
-                    i = -1
-                    current_chunk = ""
-                    last_break_index = 0
-                else:
-                    # No break point found, split at max length
-                    current_chunk = current_chunk.replace("—", " ")
-                    chunks.append(current_chunk.strip())
-                    text = text[self.chunk_size:]
-                    i = -1
-                    current_chunk = ""
-
-            i += 1
-
-        if text:
-            text = text.replace("—", " ")
-            chunks.append(text.strip())
-
-        return chunks
     
         
     def get_languages(self) -> List[str]:
@@ -143,7 +102,7 @@ def synthesize(
 
         validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
 
-        chunks = self._split_into_chunks(text)
+        chunks = split_into_chunks(text)
         audio_content = b""
 
         for chunk in chunks:
diff --git a/smallest/utils.py b/smallest/utils.py
@@ -12,6 +12,7 @@
 
 API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
 SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
+CHUNK_SIZE = 250
 SAMPLE_WIDTH = 2
 CHANNELS = 1
 
@@ -59,6 +60,50 @@ def preprocess_text(text: str) -> str:
     text = mpn.normalize(text)
     return text.strip()
 
+def split_into_chunks(text: str) -> List[str]:
+        """
+        Splits the input text into chunks based on sentence boundaries 
+        defined by SENTENCE_END_REGEX and the maximum chunk size.
+        """
+        chunks = []
+        current_chunk = ""
+        last_break_index = 0
+
+        i = 0
+        while i < len(text):
+            current_chunk += text[i]
+
+            # Check for sentence boundary using regex
+            if SENTENCE_END_REGEX.match(current_chunk):
+                last_break_index = i
+
+            if len(current_chunk) >= CHUNK_SIZE:
+                if last_break_index > 0:
+                    # Split at the last valid sentence boundary
+                    chunk = text[:last_break_index + 1].strip()
+                    chunk = chunk.replace("—", " ")
+                    chunks.append(chunk)
+
+                    text = text[last_break_index + 1:]
+                    i = -1  # Reset index to process the remaining text
+                    current_chunk = ""
+                    last_break_index = 0
+                else:
+                    # No sentence boundary found, split at max length
+                    current_chunk = current_chunk.replace("—", " ")
+                    chunks.append(current_chunk.strip())
+                    text = text[CHUNK_SIZE:]
+                    i = -1  # Reset index to process the remaining text
+                    current_chunk = ""
+
+            i += 1
+
+        if text:
+            text = text.replace("—", " ")
+            chunks.append(text.strip())
+
+        return chunks
+
 
 def get_smallest_languages() -> List[str]:
     return list(TTSLanguages.__args__)