diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..432bc18 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,30 @@ +name: Build and Test + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Build + run: npm run build diff --git a/.github/workflows/jekyll-docker.yml b/.github/workflows/jekyll-docker.yml deleted file mode 100644 index 3d0eedb..0000000 --- a/.github/workflows/jekyll-docker.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Jekyll site CI - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Build the site in the jekyll/builder container - run: | - docker run \ - -v ${{ github.workspace }}:/srv/jekyll -v ${{ github.workspace }}/_site:/srv/jekyll/_site \ - jekyll/builder:latest /bin/bash -c "chmod -R 777 /srv/jekyll && jekyll build --future" diff --git a/broadcast-ai/README.md b/broadcast-ai/README.md new file mode 100644 index 0000000..f5f4c80 --- /dev/null +++ b/broadcast-ai/README.md @@ -0,0 +1,149 @@ +# Palestinian AI Voice Broadcasting Platform + +## What This Does + +This platform creates authentic Arabic speech for broadcasting, trained specifically on Palestinian dialect and Quranic pronunciation patterns. It combines multiple voice datasets to produce natural-sounding news announcements and can generate synchronized video content. + +## Getting Started + +**Hardware needed:** +- Computer with Python 3.8 or newer +- At least 8 GB memory (more is better for model training) +- Optional: NVIDIA GPU makes training much faster + +**Installation steps:** + +Execute the setup automation: +```bash +./setup.sh +``` + +The script handles ffmpeg installation and Python package configuration automatically. + +## How To Use This System + +### Step 1: Collect Voice Data + +Your audio samples go into six specialized folders. Each needs a `metadata.csv` file mapping audio filenames to their Arabic transcriptions using pipe delimiter format: `audiofile.wav|النص العربي` + +Reference the `.csv.example` files in each folder to see the expected format. + +**The six dataset categories:** +- `dataset_quran` → Quranic verses with proper tajweed +- `dataset_speaker` → General broadcaster voice samples +- `dataset_speaker_news` → Formal news presentation style +- `dataset_speaker_palestinian` → Authentic Palestinian colloquial speech +- `dataset_speaker_realistic` → Natural conversational patterns +- `dataset_authority` → Official statement delivery style + +### Step 2: Build Your Model + +Run the training process: +```bash +python train.py +``` + +The trainer consolidates all your datasets and adapts the XTTS v2 foundation model. Output goes to `models/final_broadcast_model/`. Expect this to take time - possibly hours based on your dataset size and hardware. + +### Step 3: Create Audio Content + +Generate test audio: +```bash +python generate.py +``` + +The system produces `output/demo.wav` with professional audio treatment including frequency filtering and broadcast loudness standards (EBU R128 at -16 LUFS target). + +### Step 4: Launch Broadcasting (Optional) + +To enable video with lip synchronization, you need the Wav2Lip tool installed and an anchor presenter image at `input/anchor.jpg`. + +Start the broadcast generator: +```bash +python run_tv_channel.py & +``` + +Start the web interface: +```bash +python tv_server.py +``` + +View output at localhost port 3000. + +## Programming Interface + +Import the voice generation function: + +```python +from generate import generate_voice + +generated_file = generate_voice( + text="نص عربي هنا", + style="news", + output_name="output_filename.wav" +) +``` + +The function returns the path to your generated audio file. + +## File Organization + +- `train.py` → Dataset merging and model fine-tuning +- `generate.py` → Speech synthesis with audio processing +- `run_tv_channel.py` → Automated segment generation loop +- `tv_server.py` → HTTP streaming endpoint (Flask-based) +- `requirements.txt` → Python dependencies list +- `setup.sh` → Automated environment configuration + +Generated content appears in `output/`, trained models in `models/`, source recordings in `dataset_*/wavs/`. + +## Audio Quality Tips + +**Recording standards:** +- Sample at 16000 Hz minimum +- Remove background noise before use +- Normalize volume across all samples +- Use consistent microphone setup + +**Dataset composition:** +- Target 100+ recordings per category for good results +- Balance formal and informal speaking styles +- Mix different sentence structures and lengths +- Include Arabic diacritics for Quran dataset accuracy + +## Configuration Changes + +**Switch reference voice:** +Modify `REFERENCE_WAV` path in `generate.py` to point at your preferred sample. + +**Adjust audio filters:** +Edit the `af_chain` variable in `generate.py` to customize frequency response and loudness targets. + +**Update news content:** +Modify `NEWS_HEADLINES` array in `run_tv_channel.py` to change broadcast content pool. + +## Common Issues + +**Training fails with memory error:** +Your system needs more RAM or reduce the batch size parameter. + +**Cannot find model:** +Complete the training step first before attempting generation. + +**Video generation skips lip sync:** +Verify Wav2Lip installation and checkpoint file presence at `models/wav2lip.pth`. + +**FFmpeg command not found:** +Install via your package manager (apt, brew, yum) or download from official site. + +## Web Server Endpoints + +- Root `/` serves the Arabic RTL player interface +- `/stream` provides direct media access +- `/health` returns JSON status for monitoring + +## Technical Notes + +The system merges diverse Arabic speech patterns to capture authentic Palestinian broadcasting style while maintaining clear pronunciation through Quranic conditioning. Audio post-processing applies industry-standard loudness normalization suitable for broadcast transmission. + +Model training adapts the multilingual XTTS v2 architecture with your custom Arabic datasets. The generator uses your reference voice sample to guide prosody and timbre during synthesis. diff --git a/broadcast-ai/generate.py b/broadcast-ai/generate.py index 8ecbfb8..6f4e475 100644 --- a/broadcast-ai/generate.py +++ b/broadcast-ai/generate.py @@ -85,16 +85,23 @@ def generate_voice( "loudnorm=I=-16:LRA=11:TP=-1.5" ) - subprocess.run( - [ - "ffmpeg", "-y", - "-i", raw_path, - "-af", af_chain, - final_path, - ], - check=True, - capture_output=True, - ) + try: + subprocess.run( + [ + "ffmpeg", "-y", + "-i", raw_path, + "-af", af_chain, + final_path, + ], + check=True, + capture_output=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + error_type = "not found" if isinstance(e, FileNotFoundError) else "failed" + print(f"[WARN] ffmpeg {error_type} - skipping audio post-processing") + print(f"[WARN] Using raw audio output instead") + # If ffmpeg fails or is not installed, use the raw audio file + final_path = raw_path print(f"[GEN] {style} → {final_path}") return final_path diff --git a/broadcast-ai/run_tv_channel.py b/broadcast-ai/run_tv_channel.py index 082f5dd..82627ed 100644 --- a/broadcast-ai/run_tv_channel.py +++ b/broadcast-ai/run_tv_channel.py @@ -56,20 +56,25 @@ def _ensure_anchor_video(): return os.makedirs("input", exist_ok=True) - subprocess.run( - [ - "ffmpeg", "-y", - "-loop", "1", - "-i", anchor_img, - "-c:v", "libx264", - "-t", "15", - "-pix_fmt", "yuv420p", - ANCHOR_VIDEO, - ], - check=True, - capture_output=True, - ) - print(f"[ANCHOR] Created {ANCHOR_VIDEO}") + try: + subprocess.run( + [ + "ffmpeg", "-y", + "-loop", "1", + "-i", anchor_img, + "-c:v", "libx264", + "-t", "15", + "-pix_fmt", "yuv420p", + ANCHOR_VIDEO, + ], + check=True, + capture_output=True, + ) + print(f"[ANCHOR] Created {ANCHOR_VIDEO}") + except subprocess.CalledProcessError as e: + print(f"[ERROR] Failed to create anchor video: {e}") + except FileNotFoundError: + print(f"[ERROR] ffmpeg not found - cannot create anchor video") def _generate_segment(headline: str) -> str | None: @@ -85,18 +90,26 @@ def _generate_segment(headline: str) -> str | None: print("[WARN] Wav2Lip checkpoint not found — skipping lip-sync") return audio_path - subprocess.run( - [ - "python", WAV2LIP_INFERENCE, - "--checkpoint_path", WAV2LIP_CHECKPOINT, - "--face", ANCHOR_VIDEO, - "--audio", audio_path, - "--outfile", VIDEO_OUTPUT, - ], - check=True, - ) - - return VIDEO_OUTPUT + try: + subprocess.run( + [ + "python", WAV2LIP_INFERENCE, + "--checkpoint_path", WAV2LIP_CHECKPOINT, + "--face", ANCHOR_VIDEO, + "--audio", audio_path, + "--outfile", VIDEO_OUTPUT, + ], + check=True, + ) + return VIDEO_OUTPUT + except subprocess.CalledProcessError as e: + print(f"[WARN] Wav2Lip processing failed: {e}") + print(f"[WARN] Falling back to audio only") + return audio_path + except FileNotFoundError: + print(f"[WARN] Python interpreter or Wav2Lip script not found") + print(f"[WARN] Falling back to audio only") + return audio_path # --------------------------------------------------------------------------- @@ -119,8 +132,12 @@ def run(): result = _generate_segment(headline) if result: print(f"[TV] Segment ready: {result}") + except KeyboardInterrupt: + print("[TV] Shutting down broadcast loop") + break except Exception as exc: - print(f"[TV] Segment failed: {exc}") + print(f"[TV] Unexpected error during segment generation ({type(exc).__name__}): {exc}") + print("[TV] Continuing to next segment...") time.sleep(LOOP_PAUSE_SECONDS) diff --git a/broadcast-ai/setup.sh b/broadcast-ai/setup.sh index d7fd0f5..e7ae96b 100755 --- a/broadcast-ai/setup.sh +++ b/broadcast-ai/setup.sh @@ -8,23 +8,45 @@ echo "============================================" echo " Palestinian AI Broadcast — Setup" echo "============================================" +# --- Python version check ---------------------------------------------------- +echo "[1/5] Checking Python version..." +if ! command -v python3 &>/dev/null; then + echo "[ERROR] Python 3 is required but not found" + exit 1 +fi +PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}' | cut -d'.' -f1,2) +echo "✓ Python $PYTHON_VERSION detected" + # --- System dependencies ---------------------------------------------------- -echo "[1/3] Installing system packages..." +echo "[2/5] Installing system packages..." if command -v apt-get &>/dev/null; then sudo apt-get update -qq sudo apt-get install -y -qq ffmpeg + echo "✓ ffmpeg installed via apt-get" elif command -v yum &>/dev/null; then sudo yum install -y ffmpeg + echo "✓ ffmpeg installed via yum" +elif command -v brew &>/dev/null; then + brew install ffmpeg + echo "✓ ffmpeg installed via brew" else echo "[WARN] Cannot detect package manager. Install ffmpeg manually." fi +# Verify ffmpeg installation +if command -v ffmpeg &>/dev/null; then + echo "✓ ffmpeg verified: $(ffmpeg -version | head -n1)" +else + echo "[WARN] ffmpeg not found in PATH. Some features may not work." +fi + # --- Python dependencies ---------------------------------------------------- -echo "[2/3] Installing Python dependencies..." -pip install -q -r requirements.txt +echo "[3/5] Installing Python dependencies..." +pip3 install -q -r requirements.txt +echo "✓ Python packages installed" # --- Directory structure ----------------------------------------------------- -echo "[3/3] Verifying directory structure..." +echo "[4/5] Creating directory structure..." dirs=( dataset_quran/wavs dataset_speaker/wavs @@ -40,17 +62,30 @@ dirs=( for d in "${dirs[@]}"; do mkdir -p "$d" done +echo "✓ Directory structure ready" + +# --- Create example CSV files ----------------------------------------------- +echo "[5/5] Creating example metadata files..." +for dataset in dataset_quran dataset_speaker dataset_speaker_news dataset_speaker_palestinian dataset_speaker_realistic dataset_authority; do + if [ ! -f "$dataset/metadata.csv.example" ]; then + cat > "$dataset/metadata.csv.example" << 'EOF' +audio_001.wav|النص العربي هنا +audio_002.wav|مثال آخر للنص العربي +EOF + echo "✓ Created $dataset/metadata.csv.example" + fi +done echo "" echo "============================================" -echo " Setup complete!" +echo " ✓ Setup complete!" echo "============================================" echo "" echo "Next steps:" echo " 1. Add WAV files and metadata.csv to each dataset_* directory" echo " (see *.csv.example files for the format)" echo " 2. Place an anchor image at input/anchor.jpg" -echo " 3. Run: python train.py" -echo " 4. Run: python run_tv_channel.py (Terminal 1)" -echo " 5. Run: python tv_server.py (Terminal 2)" +echo " 3. Run: python3 train.py" +echo " 4. Run: python3 run_tv_channel.py (Terminal 1)" +echo " 5. Run: python3 tv_server.py (Terminal 2)" echo "" diff --git a/qudscast-ai/setup.sh b/qudscast-ai/setup.sh index 4934d0b..656c253 100644 --- a/qudscast-ai/setup.sh +++ b/qudscast-ai/setup.sh @@ -1,34 +1,63 @@ #!/bin/bash # QudsCast AI - Setup Script +set -e echo "========================================" echo " QudsCast AI - Setup" echo "========================================" -# Install backend dependencies -echo "Installing backend dependencies..." -npm install +# Check Node.js version +echo "[1/7] Checking Node.js version..." +NODE_VERSION=$(node --version | cut -d'v' -f2 | cut -d'.' -f1) +if [ "$NODE_VERSION" -lt 18 ]; then + echo "ERROR: Node.js 18+ is required (found: $(node --version))" + exit 1 +fi +echo "✓ Node.js $(node --version) detected" -# Install frontend dependencies -echo "Installing frontend dependencies..." -cd frontend && npm install && cd .. +# Setup environment file +echo "[2/7] Setting up environment configuration..." +if [ ! -f .env ]; then + cp .env.example .env + echo "✓ Created .env from .env.example" +else + echo "✓ .env already exists" +fi + +# Create storage directories +echo "[3/7] Creating storage directories..." +mkdir -p storage/voices storage/jingles storage/audio storage/videos +echo "✓ Storage directories ready" + +# Install all dependencies +echo "[4/7] Installing dependencies..." +npm run install:all # Build frontend -echo "Building frontend..." +echo "[5/7] Building frontend..." cd frontend && npm run build && cd .. +echo "✓ Frontend build complete" # Install Python dependencies (optional) -echo "Installing Python TTS dependencies..." -pip install gtts 2>/dev/null || pip3 install gtts 2>/dev/null || echo "Python TTS installation skipped" +echo "[6/7] Installing Python TTS dependencies..." +if command -v pip3 &>/dev/null; then + pip3 install gtts 2>/dev/null && echo "✓ Python TTS installed" || echo "⚠ Python TTS installation skipped" +elif command -v pip &>/dev/null; then + pip install gtts 2>/dev/null && echo "✓ Python TTS installed" || echo "⚠ Python TTS installation skipped" +else + echo "⚠ pip not found - Python TTS installation skipped" +fi # Make scripts executable +echo "[7/7] Setting script permissions..." chmod +x backend/audio/generate_radio_mp3.sh chmod +x backend/video/generate_video.sh chmod +x backend/tts/generate_voice.py +echo "✓ Scripts are executable" echo "" echo "========================================" -echo " Setup Complete!" +echo " ✓ Setup Complete!" echo "========================================" echo "" echo "To start the application:"