sugarlabs · Soniyakmt · May 12, 2026 · May 13, 2026
diff --git a/.github/workflows/pronunciation-tests.yml b/.github/workflows/pronunciation-tests.yml
@@ -0,0 +1,194 @@
+name: Pronunciation Tests
+
+on:
+  push:
+    branches: [ main, develop ]
+    paths:
+      - 'tests/**'
+      - 'kokoro/**'
+      - '.github/workflows/pronunciation-tests.yml'
+  pull_request:
+    branches: [ main, develop ]
+    paths:
+      - 'tests/**'
+      - 'kokoro/**'
+  workflow_dispatch:
+
+jobs:
+  test:
+    name: Run Pronunciation Tests
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.8', '3.9', '3.10', '3.11' ]
+      fail-fast: false
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install pytest pytest-cov pytest-xdist
+
+    - name: Lint test files
+      run: |
+        pip install flake8
+        # Lint test files (but be lenient)
+        flake8 tests/ --count --select=E9,F63,F7,F82 --show-source --statistics || true
+
+    - name: Run pronunciation tests
+      run: |
+        pytest tests/ \
+          -v \
+          --cov=tests \
+          --cov-report=xml \
+          --cov-report=term-missing \
+          --junit-xml=junit/test-results.xml
+      continue-on-error: true
+
+    - name: Run Hindi-specific tests
+      run: |
+        pytest tests/test_hindi_pronunciation.py -v
+      continue-on-error: true
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v3
+      if: always()
+      with:
+        file: ./coverage.xml
+        flags: unittests
+        name: codecov-umbrella
+        fail_ci_if_error: false
+
+    - name: Upload test results
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        name: test-results-${{ matrix.python-version }}
+        path: junit/
+
+  lint:
+    name: Lint and Format Check
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 black isort
+
+    - name: Check file formatting with Black
+      run: |
+        black --check tests/ || true
+
+    - name: Check import sorting with isort
+      run: |
+        isort --check-only tests/ || true
+
+    - name: Lint with flake8
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 tests/ --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-code is 0 even if there are errors (warnings)
+        flake8 tests/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+      continue-on-error: true
+
+  test-matrix:
+    name: Test Summary
+    needs: [ test ]
+    runs-on: ubuntu-latest
+    if: always()
+
+    steps:
+    - name: Download all artifacts
+      uses: actions/download-artifact@v3
+
+    - name: Prepare test report
+      run: |
+        echo "## Pronunciation Test Results" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        if [ -f "test-results-3.10/test-results.xml" ]; then
+          echo "✓ Tests completed" >> $GITHUB_STEP_SUMMARY
+        else
+          echo "⚠ See individual test results above" >> $GITHUB_STEP_SUMMARY
+        fi
+
+  check-json-validity:
+    name: Validate Test Data Files
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Validate JSON test data files
+      run: |
+        python << 'EOF'
+        import json
+        from pathlib import Path
+
+        test_data_dir = Path('tests/language_data')
+
+        for json_file in test_data_dir.glob('*.json'):
+            print(f"Validating {json_file}...")
+            try:
+                with open(json_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+
+                # Check required fields
+                required = ['language', 'language_code', 'test_cases']
+                for field in required:
+                    assert field in data, f"Missing required field: {field}"
+
+                # Check test cases
+                for i, test in enumerate(data['test_cases']):
+                    required_test_fields = ['text', 'expected_phonemes', 'tags']
+                    for field in required_test_fields:
+                        assert field in test, f"Test {i} missing field: {field}"
+
+                print(f"  ✓ Valid ({len(data['test_cases'])} tests)")
+            except json.JSONDecodeError as e:
+                print(f"  ✗ Invalid JSON: {e}")
+                exit(1)
+            except AssertionError as e:
+                print(f"  ✗ Validation error: {e}")
+                exit(1)
+
+        print("\n✓ All test data files are valid!")
+        EOF
+
+  notify:
+    name: Notify Results
+    needs: [ test, lint, check-json-validity ]
+    runs-on: ubuntu-latest
+    if: always()
+
+    steps:
+    - name: Check job status
+      run: |
+        if [ "${{ needs.test.result }}" = "failure" ] || [ "${{ needs.lint.result }}" = "failure" ] || [ "${{ needs.check-json-validity.result }}" = "failure" ]; then
+          echo "❌ Some checks failed"
+          exit 1
+        else
+          echo "✅ All checks passed"
+        fi
diff --git a/GenAI/cache.py b/GenAI/cache.py
@@ -0,0 +1,122 @@
+import json
+import hashlib
+from pathlib import Path
+from collections import OrderedDict
+from typing import Optional
+
+
+class ResponseCache:
+    def __init__(self, max_cache_size: int = 100, cache_file: Optional[str] = None):
+        # initialize the response cache
+
+        self.max_cache_size = max_cache_size
+        self.cache_file = cache_file
+        self.cache = OrderedDict()  # Ordered dict for LRU tracking
+        self.hits = 0
+        self.misses = 0
+
+        # Load existing cache from file if provided
+        if self.cache_file:
+            self._load_from_file()
+
+    def _normalize_question(self, question: str) -> str:
+        return " ".join(question.strip().lower().split())
+
+    def _get_cache_key(self, question: str) -> str:
+        normalized = self._normalize_question(question)
+        return hashlib.sha256(normalized.encode()).hexdigest()
+
+    def get(self, question: str) -> Optional[str]:
+        cache_key = self._get_cache_key(question)
+
+        if cache_key in self.cache:
+            # Move to end (mark as most recently used)
+            self.cache.move_to_end(cache_key)
+            self.hits += 1
+            return self.cache[cache_key]
+
+        self.misses += 1
+        return None
+
+    def set(self, question: str, response: str) -> None:
+        cache_key = self._get_cache_key(question)
+
+        # If key exists, update and move to end
+        if cache_key in self.cache:
+            self.cache.move_to_end(cache_key)
+            self.cache[cache_key] = response
+            return
+
+        # Add new entry
+        self.cache[cache_key] = response
+
+        # Evict oldest entry if cache is full
+        if len(self.cache) > self.max_cache_size:
+            oldest_key = next(iter(self.cache))
+            del self.cache[oldest_key]
+
+        # Persist to file if configured
+        if self.cache_file:
+            self._save_to_file()
+
+    def clear(self) -> None:
+        self.cache.clear()
+        self.hits = 0
+        self.misses = 0
+
+        # Clear cache file if it exists
+        if self.cache_file and Path(self.cache_file).exists():
+            Path(self.cache_file).unlink()
+
+    def _save_to_file(self) -> None:
+        try:
+            cache_data = {
+                "metadata": {
+                    "hits": self.hits,
+                    "misses": self.misses,
+                    "size": len(self.cache)
+                },
+                "cache": dict(self.cache)
+            }
+
+            with open(self.cache_file, 'w') as f:
+                json.dump(cache_data, f, indent=2)
+        except Exception as e:
+            print(f"Warning: Failed to save cache to {self.cache_file}: {e}")
+
+    def _load_from_file(self) -> None:
+        try:
+            cache_path = Path(self.cache_file)
+            if cache_path.exists():
+                with open(self.cache_file, 'r') as f:
+                    cache_data = json.load(f)
+
+                # Restore cache
+                if "cache" in cache_data:
+                    self.cache = OrderedDict(cache_data["cache"])
+
+                # Restore statistics
+                if "metadata" in cache_data:
+                    self.hits = cache_data["metadata"].get("hits", 0)
+                    self.misses = cache_data["metadata"].get("misses", 0)
+        except Exception as e:
+            print(f"Warning: Failed to load cache from {self.cache_file}: {e}")
+
+    def get_stats(self) -> dict:
+        # Return cache statistics.
+        total_requests = self.hits + self.misses
+        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
+
+        return {
+            "cache_size": len(self.cache),
+            "max_cache_size": self.max_cache_size,
+            "hits": self.hits,
+            "misses": self.misses,
+            "total_requests": total_requests,
+            "hit_rate_percent": round(hit_rate, 2)
+        }
+
+
+def create_cache(max_size: int = 100, cache_file: Optional[str] = None) -> ResponseCache:
+    # function to create a response cache
+    return ResponseCache(max_cache_size=max_size, cache_file=cache_file)
diff --git a/GenAI/gguf_inference.py b/GenAI/gguf_inference.py
@@ -16,8 +16,9 @@
 
 import os
 import warnings
-from typing import Dict, List
+from typing import Dict, List, Optional
 from . import profainity_check
+from .cache import create_cache, ResponseCache
 
 try:
     from llama_cpp import Llama
@@ -33,7 +34,10 @@
 class GGUFInference:
     def __init__(self, model_path: str, max_context_tokens: int = 1500,
                  generation_mode: int = 1, n_threads: int = 1,
-                 verbose: bool = False):
+                 verbose: bool = False,
+                 response_cache: Optional[ResponseCache] = None,
+                 cache_max_items: int = 100,
+                 cache_file: Optional[str] = None):
         """ARGS:
         max_context_tokens: For the model used the actual max context window
                            is 2048, but reducing cause we use an approximation
@@ -52,6 +56,7 @@ def __init__(self, model_path: str, max_context_tokens: int = 1500,
         self.conversation_history: List[Dict[str, str]] = []
         self.generation_settings: dict = self._get_generation_settings(generation_mode)
         self.blacklisted_words = profainity_check.bad_word_list()
+        self.response_cache = response_cache or create_cache(max_size=cache_max_items, cache_file=cache_file)
 
         self.model = Llama(
             model_path=model_path,
@@ -216,13 +221,21 @@ def ask_question(self, question: str, maintain_conversation: bool = True) -> str
             blocked_response = "Looks like you have typed in a blacklisted word"
             if maintain_conversation:
                 self.conversation_history.append({"student": question, "teacher": blocked_response})
+            self.response_cache.set(question, blocked_response)
             return blocked_response
 
         if maintain_conversation:
             instruction = self._truncate_history_if_needed(new_student_input=question)
         else:
             instruction = f"Student: {question}\nTeacher:"
-
+
+        # Return cached response for repeated questions in the same session
+        cached_response = self.response_cache.get(question)
+        if cached_response is not None:
+            if maintain_conversation:
+                self.conversation_history.append({"student": question, "teacher": cached_response})
+            return cached_response
+
         try:
             # Generate response
             response = self.model(instruction, **self.generation_params)
@@ -237,12 +250,14 @@ def ask_question(self, question: str, maintain_conversation: bool = True) -> str
                 blocked_response = "Sorry, I cant answer this, can we talk about something else"
                 if maintain_conversation:
                     self.conversation_history.append({"student": question, "teacher": blocked_response})
+                self.response_cache.set(question, blocked_response)
                 return blocked_response
 
             # Add to conversation history if requested
             if maintain_conversation:
                 self.conversation_history.append({"student": question, "teacher": teacher_response})
 
+            self.response_cache.set(question, teacher_response)
             return teacher_response
 
         except Exception as e:

diff --git a/specs b/specs