Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 194 additions & 0 deletions .github/workflows/pronunciation-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
name: Pronunciation Tests

on:
push:
branches: [ main, develop ]
paths:
- 'tests/**'
- 'kokoro/**'
- '.github/workflows/pronunciation-tests.yml'
pull_request:
branches: [ main, develop ]
paths:
- 'tests/**'
- 'kokoro/**'
workflow_dispatch:

jobs:
test:
name: Run Pronunciation Tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11' ]
fail-fast: false

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest pytest-cov pytest-xdist

- name: Lint test files
run: |
pip install flake8
# Lint test files (but be lenient)
flake8 tests/ --count --select=E9,F63,F7,F82 --show-source --statistics || true

- name: Run pronunciation tests
run: |
pytest tests/ \
-v \
--cov=tests \
--cov-report=xml \
--cov-report=term-missing \
--junit-xml=junit/test-results.xml
continue-on-error: true

- name: Run Hindi-specific tests
run: |
pytest tests/test_hindi_pronunciation.py -v
continue-on-error: true

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: always()
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false

- name: Upload test results
if: always()
uses: actions/upload-artifact@v3
with:
name: test-results-${{ matrix.python-version }}
path: junit/

lint:
name: Lint and Format Check
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black isort

- name: Check file formatting with Black
run: |
black --check tests/ || true

- name: Check import sorting with isort
run: |
isort --check-only tests/ || true

- name: Lint with flake8
run: |
# Stop the build if there are Python syntax errors or undefined names
flake8 tests/ --count --select=E9,F63,F7,F82 --show-source --statistics
# Exit-code is 0 even if there are errors (warnings)
flake8 tests/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
continue-on-error: true

test-matrix:
name: Test Summary
needs: [ test ]
runs-on: ubuntu-latest
if: always()

steps:
- name: Download all artifacts
uses: actions/download-artifact@v3

- name: Prepare test report
run: |
echo "## Pronunciation Test Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f "test-results-3.10/test-results.xml" ]; then
echo "✓ Tests completed" >> $GITHUB_STEP_SUMMARY
else
echo "⚠ See individual test results above" >> $GITHUB_STEP_SUMMARY
fi

check-json-validity:
name: Validate Test Data Files
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Validate JSON test data files
run: |
python << 'EOF'
import json
from pathlib import Path

test_data_dir = Path('tests/language_data')

for json_file in test_data_dir.glob('*.json'):
print(f"Validating {json_file}...")
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)

# Check required fields
required = ['language', 'language_code', 'test_cases']
for field in required:
assert field in data, f"Missing required field: {field}"

# Check test cases
for i, test in enumerate(data['test_cases']):
required_test_fields = ['text', 'expected_phonemes', 'tags']
for field in required_test_fields:
assert field in test, f"Test {i} missing field: {field}"

print(f" ✓ Valid ({len(data['test_cases'])} tests)")
except json.JSONDecodeError as e:
print(f" ✗ Invalid JSON: {e}")
exit(1)
except AssertionError as e:
print(f" ✗ Validation error: {e}")
exit(1)

print("\n✓ All test data files are valid!")
EOF

notify:
name: Notify Results
needs: [ test, lint, check-json-validity ]
runs-on: ubuntu-latest
if: always()

steps:
- name: Check job status
run: |
if [ "${{ needs.test.result }}" = "failure" ] || [ "${{ needs.lint.result }}" = "failure" ] || [ "${{ needs.check-json-validity.result }}" = "failure" ]; then
echo "❌ Some checks failed"
exit 1
else
echo "✅ All checks passed"
fi
122 changes: 122 additions & 0 deletions GenAI/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import json
import hashlib
from pathlib import Path
from collections import OrderedDict
from typing import Optional


class ResponseCache:
def __init__(self, max_cache_size: int = 100, cache_file: Optional[str] = None):
# initialize the response cache

self.max_cache_size = max_cache_size
self.cache_file = cache_file
self.cache = OrderedDict() # Ordered dict for LRU tracking
self.hits = 0
self.misses = 0

# Load existing cache from file if provided
if self.cache_file:
self._load_from_file()

def _normalize_question(self, question: str) -> str:
return " ".join(question.strip().lower().split())

def _get_cache_key(self, question: str) -> str:
normalized = self._normalize_question(question)
return hashlib.sha256(normalized.encode()).hexdigest()

def get(self, question: str) -> Optional[str]:
cache_key = self._get_cache_key(question)

if cache_key in self.cache:
# Move to end (mark as most recently used)
self.cache.move_to_end(cache_key)
self.hits += 1
return self.cache[cache_key]

self.misses += 1
return None

def set(self, question: str, response: str) -> None:
cache_key = self._get_cache_key(question)

# If key exists, update and move to end
if cache_key in self.cache:
self.cache.move_to_end(cache_key)
self.cache[cache_key] = response
return

# Add new entry
self.cache[cache_key] = response

# Evict oldest entry if cache is full
if len(self.cache) > self.max_cache_size:
oldest_key = next(iter(self.cache))
del self.cache[oldest_key]

# Persist to file if configured
if self.cache_file:
self._save_to_file()

def clear(self) -> None:
self.cache.clear()
self.hits = 0
self.misses = 0

# Clear cache file if it exists
if self.cache_file and Path(self.cache_file).exists():
Path(self.cache_file).unlink()

def _save_to_file(self) -> None:
try:
cache_data = {
"metadata": {
"hits": self.hits,
"misses": self.misses,
"size": len(self.cache)
},
"cache": dict(self.cache)
}

with open(self.cache_file, 'w') as f:
json.dump(cache_data, f, indent=2)
except Exception as e:
print(f"Warning: Failed to save cache to {self.cache_file}: {e}")

def _load_from_file(self) -> None:
try:
cache_path = Path(self.cache_file)
if cache_path.exists():
with open(self.cache_file, 'r') as f:
cache_data = json.load(f)

# Restore cache
if "cache" in cache_data:
self.cache = OrderedDict(cache_data["cache"])

# Restore statistics
if "metadata" in cache_data:
self.hits = cache_data["metadata"].get("hits", 0)
self.misses = cache_data["metadata"].get("misses", 0)
except Exception as e:
print(f"Warning: Failed to load cache from {self.cache_file}: {e}")

def get_stats(self) -> dict:
# Return cache statistics.
total_requests = self.hits + self.misses
hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0

return {
"cache_size": len(self.cache),
"max_cache_size": self.max_cache_size,
"hits": self.hits,
"misses": self.misses,
"total_requests": total_requests,
"hit_rate_percent": round(hit_rate, 2)
}


def create_cache(max_size: int = 100, cache_file: Optional[str] = None) -> ResponseCache:
# function to create a response cache
return ResponseCache(max_cache_size=max_size, cache_file=cache_file)
21 changes: 18 additions & 3 deletions GenAI/gguf_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import os
import warnings
from typing import Dict, List
from typing import Dict, List, Optional
from . import profainity_check
from .cache import create_cache, ResponseCache

try:
from llama_cpp import Llama
Expand All @@ -33,7 +34,10 @@
class GGUFInference:
def __init__(self, model_path: str, max_context_tokens: int = 1500,
generation_mode: int = 1, n_threads: int = 1,
verbose: bool = False):
verbose: bool = False,
response_cache: Optional[ResponseCache] = None,
cache_max_items: int = 100,
cache_file: Optional[str] = None):
"""ARGS:
max_context_tokens: For the model used the actual max context window
is 2048, but reducing cause we use an approximation
Expand All @@ -52,6 +56,7 @@ def __init__(self, model_path: str, max_context_tokens: int = 1500,
self.conversation_history: List[Dict[str, str]] = []
self.generation_settings: dict = self._get_generation_settings(generation_mode)
self.blacklisted_words = profainity_check.bad_word_list()
self.response_cache = response_cache or create_cache(max_size=cache_max_items, cache_file=cache_file)

self.model = Llama(
model_path=model_path,
Expand Down Expand Up @@ -216,13 +221,21 @@ def ask_question(self, question: str, maintain_conversation: bool = True) -> str
blocked_response = "Looks like you have typed in a blacklisted word"
if maintain_conversation:
self.conversation_history.append({"student": question, "teacher": blocked_response})
self.response_cache.set(question, blocked_response)
return blocked_response

if maintain_conversation:
instruction = self._truncate_history_if_needed(new_student_input=question)
else:
instruction = f"Student: {question}\nTeacher:"


# Return cached response for repeated questions in the same session
cached_response = self.response_cache.get(question)
if cached_response is not None:
if maintain_conversation:
self.conversation_history.append({"student": question, "teacher": cached_response})
return cached_response

try:
# Generate response
response = self.model(instruction, **self.generation_params)
Expand All @@ -237,12 +250,14 @@ def ask_question(self, question: str, maintain_conversation: bool = True) -> str
blocked_response = "Sorry, I cant answer this, can we talk about something else"
if maintain_conversation:
self.conversation_history.append({"student": question, "teacher": blocked_response})
self.response_cache.set(question, blocked_response)
return blocked_response

# Add to conversation history if requested
if maintain_conversation:
self.conversation_history.append({"student": question, "teacher": teacher_response})

self.response_cache.set(question, teacher_response)
return teacher_response

except Exception as e:
Expand Down
1 change: 1 addition & 0 deletions specs
Submodule specs added at bbf950
Loading