From c5a7e84f0564bd6b63963dc928311c449a84c0f0 Mon Sep 17 00:00:00 2001
From: Michael Feng <z5690425@ad.unsw.edu.au>
Date: Sat, 30 May 2026 19:50:23 +1000
Subject: [PATCH] refactor: switch AI to OpenAI; drop Gemini /predict backend

- Default the Vercel AI SDK provider to OpenAI (openai/gpt-4o-mini) via
  lib/ai/provider.ts getModel(); add @ai-sdk/openai direct-key path
  (OPENAI_API_KEY), keep AI Gateway + google/ fallback. Make the persona
  client and /api/gemini route provider-agnostic (no hardcoded Gemini ids).
- Drop the Gemini-embedding /predict service entirely (it required a Gemini
  key and a model trained on Gemini embeddings): remove api.py,
  prediction_service.py, requirements_api.txt, Procfile, app/api/predict,
  and the orphaned app/sentiment-analyzer page.
- Make services/ml_api (provider-free TF-IDF engine, /analyze) the deployed
  backend: repoint render.yaml and dev:ml to services.ml_api.main:app.
- Update env.example and docs (README, RENDER_DEPLOY, API_README) accordingly.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 Procfile                          |   1 -
 README.md                         |  50 ++--
 RENDER_DEPLOY.md                  |  88 +++---
 api.py                            | 331 ----------------------
 app/api/gemini/route.ts           |   6 +-
 app/api/predict/route.ts          |  60 ----
 app/sentiment-analyzer/layout.tsx |  19 --
 app/sentiment-analyzer/page.tsx   | 453 ------------------------------
 docs/API_README.md                | 355 ++++++++++-------------
 env.example                       |  73 ++---
 lib/ai/provider.ts                |  47 ++--
 lib/google-ai/client.ts           |  30 +-
 package-lock.json                 |  17 ++
 package.json                      |   3 +-
 prediction_service.py             | 329 ----------------------
 render.yaml                       |  34 +--
 requirements_api.txt              |  30 --
 17 files changed, 311 insertions(+), 1615 deletions(-)
 delete mode 100644 Procfile
 delete mode 100644 api.py
 delete mode 100644 app/api/predict/route.ts
 delete mode 100644 app/sentiment-analyzer/layout.tsx
 delete mode 100644 app/sentiment-analyzer/page.tsx
 delete mode 100644 prediction_service.py
 delete mode 100644 requirements_api.txt

diff --git a/Procfile b/Procfile
deleted file mode 100644
index c8de277..0000000
--- a/Procfile
+++ /dev/null
@@ -1 +0,0 @@
-web: gunicorn api:app --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT --workers ${WEB_CONCURRENCY:-1} --timeout 120 --access-logfile - --error-logfile -
diff --git a/README.md b/README.md
index 410ee2f..2b6c274 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,14 @@ This project analyzes LinkedIn posts to classify them as generating positive or
 
 ## 🏗️ Architecture
 
+> **Note:** the sections below describe the original research/training workflow
+> in `attempt2.ipynb` (Gemini embeddings + XGBoost). The **deployed runtime
+> backend** is now `services/ml_api` — a provider-free TF-IDF recruiting-signal
+> engine that needs no AI provider key. See `docs/API_README.md` for its
+> endpoints (`/health`, `/analyze`, `/analyze/compare`, `/history`) and
+> `RENDER_DEPLOY.md` for deployment. The Next.js app reaches it via the
+> same-origin proxy `app/api/analyze` (`ML_API_URL`).
+
 ```
 LinkedIn Posts → Label Generation → Feature Engineering → Model Training → Prediction
      ↓              (VADER + Engagement)     ↓                    ↓
@@ -214,26 +222,30 @@ Supabase schema for logging requests/responses: `docs/supabase.sql` (table `anal
 
 ## AI provider abstraction (Vercel AI SDK)
 
-The Next.js AI layer uses the **Vercel AI SDK** (`ai` + `@ai-sdk/google` + `zod`),
-so the model provider is swappable. `lib/ai/provider.ts` exposes `getModel()`,
-which resolves a model from the `AI_MODEL` env var (default
-`google/gemini-2.0-flash`).
+The Next.js AI layer uses the **Vercel AI SDK** (`ai` + `@ai-sdk/openai` +
+`@ai-sdk/google` + `zod`), so the model provider is swappable. `lib/ai/provider.ts`
+exposes `getModel()`, which resolves a model from the `AI_MODEL` env var
+(default `openai/gpt-4o-mini`). The persona-critique / variant-eval client at
+`lib/google-ai/client.ts` (legacy dir name, now provider-agnostic) uses this
+resolver with `generateObject` + zod.
 
 How a model is resolved:
 - If `AI_GATEWAY_API_KEY` is set (or, on Vercel, OIDC enables the Gateway), the
   `provider/model` string is routed through the **Vercel AI Gateway**, which adds
   failover and cost tracking.
-- Otherwise it falls back to the `@ai-sdk/google` provider using a direct key.
-  The key is read from `GOOGLE_GENERATIVE_AI_API_KEY` (preferred), with
-  `GEMINI_API_KEY` as a fallback.
+- Otherwise it falls back to a direct provider key. For the default
+  `openai/...` model the key is read from `OPENAI_API_KEY` (via `@ai-sdk/openai`).
+  A `google/...` model still works via `@ai-sdk/google`, reading
+  `GOOGLE_GENERATIVE_AI_API_KEY` (preferred), with `GEMINI_API_KEY` as a fallback.
 
-**Switching providers** is a one-line change: set `AI_MODEL` (e.g.
-`openai/gpt-4o-mini`) and supply the relevant provider key or use the Gateway.
+**Switching providers/models** is a one-line change: set `AI_MODEL` and supply the
+matching provider key (or use the Gateway).
 
 ```
-AI_MODEL=google/gemini-2.0-flash      # default
-AI_GATEWAY_API_KEY=...                # optional: route via Vercel AI Gateway
-GOOGLE_GENERATIVE_AI_API_KEY=...       # direct Google key (GEMINI_API_KEY is a fallback)
+AI_MODEL=openai/gpt-4o-mini           # default
+OPENAI_API_KEY=...                    # direct OpenAI key (used by the default model)
+AI_GATEWAY_API_KEY=...                # optional: route any provider/model via Vercel AI Gateway
+GOOGLE_GENERATIVE_AI_API_KEY=...      # only if you switch AI_MODEL to google/... (GEMINI_API_KEY is a fallback)
 ```
 
 ## Rate limiting
@@ -241,8 +253,8 @@ GOOGLE_GENERATIVE_AI_API_KEY=...       # direct Google key (GEMINI_API_KEY is a
 Two independent limiters protect the public surface:
 
 **Next.js inbound limiter** (`lib/ratelimit.ts`) — an in-memory, per-client-IP
-limiter applied to all public POST routes: `/api/gemini`, `/api/predict`,
-`/api/analyze`, `/api/analyze-with-images`, `/api/ab-tests`, `/api/personas`,
+limiter applied to the public POST routes, including `/api/analyze`,
+`/api/gemini`, `/api/analyze-with-images`, `/api/ab-tests`, `/api/personas`, and
 `/api/drafts`. Over-limit requests get a `429` with a `Retry-After` header.
 Configure with:
 ```
@@ -256,10 +268,6 @@ There is also a separate **outbound** throttle on calls to the AI provider,
 configured with `GEMINI_RATE_LIMIT_MAX_REQUESTS` (default `15`) and
 `GEMINI_RATE_LIMIT_WINDOW_MS` (default `60000`).
 
-**FastAPI `/predict` limiter** (`api.py`) — a `slowapi` per-IP limit on
-`POST /predict`, returning `429` when exceeded. Configure with
-`RATE_LIMIT_PREDICT` (default `30/minute`).
-> Caveat: the default store is in-memory **per gunicorn worker**, so with N
-> workers the effective global limit is ~N× the configured value. Set
-> `RATELIMIT_STORAGE_URI` (e.g. `redis://host:6379/0`) for a consistent global
-> limit. See `RENDER_DEPLOY.md` for deployment details.
+The `services/ml_api` backend has no built-in limiter of its own; it is reached
+only through the Next.js proxy (`app/api/analyze`), so the inbound limiter above
+covers it. See `RENDER_DEPLOY.md` for deployment details.
diff --git a/RENDER_DEPLOY.md b/RENDER_DEPLOY.md
index 0677b2c..9d710bf 100644
--- a/RENDER_DEPLOY.md
+++ b/RENDER_DEPLOY.md
@@ -1,55 +1,58 @@
-# Deploying the PR Sentiment API to Render
+# Deploying the Lyra ML API to Render
 
-This is the FastAPI backend (`api.py` + `prediction_service.py`) that serves the
-trained LinkedIn PR sentiment classifier. The trained artifacts live in
-`output/` and are committed to the repo, so no external storage is needed.
+This is the FastAPI backend (`services/ml_api`) — a **provider-free TF-IDF
+recruiting-signal engine**. It needs no AI provider key and no external database:
+the trained model artifacts live in `output/models/` (committed to the repo) and
+request/response logging uses local sqlite.
 
 ## What ships
 
 | File | Purpose |
 |------|---------|
 | `render.yaml` | Render Blueprint — defines the web service, build/start commands, health check, env vars. |
-| `Procfile` | Same start command, for non-Blueprint / generic buildpack deploys. |
-| `runtime.txt` | Pins Python 3.12.3. |
-| `requirements_api.txt` | Python deps (scikit-learn pinned to **1.6.1** to match the pickled model). |
-| `output/*.pkl`, `output/*.npy` | Trained model + scaler + encoders. |
+| `services/ml_api/requirements.txt` | Python deps for the API. |
+| `output/models/*` | Trained TF-IDF models + metadata (`metadata.json`, `*.joblib`, `train_tfidf_matrix.npz`, …). |
+
+The Blueprint provisions a single web service named **`lyra-ml-api`** on the
+Render **free** plan, running:
+
+```
+gunicorn services.ml_api.main:app -k uvicorn.workers.UvicornWorker
+```
+
+(bound to `$PORT`, `${WEB_CONCURRENCY:-1}` workers, `/health` health check).
 
 ## One-time setup
 
-1. Push this branch to GitHub.
+1. Push this repo to GitHub.
 2. In Render: **New +** → **Blueprint** → select the repo. Render reads `render.yaml`.
-3. Set the **`GEMINI_API_KEY`** secret in the dashboard (it's `sync: false`, so it is
-   never stored in the repo). Get a key at https://aistudio.google.com/app/apikey.
-4. (Recommended) Set **`ALLOWED_ORIGINS`** to your frontend origin(s),
-   comma-separated, instead of `*`.
-5. Deploy. Render runs the health check against `/health`; the service only
-   reports healthy once the model has loaded.
+3. (Recommended) Set **`FRONTEND_ORIGIN`** to your deployed frontend origin so the
+   browser can reach the API directly if needed (the app normally calls it
+   server-side via `app/api/analyze`, so CORS rarely matters in production).
+4. Deploy. Render runs the health check against `/health`; the service only
+   reports healthy once the models have loaded.
+
+> No secrets are required — there is no AI provider key for this service.
 
 ## Environment variables
 
 | Var | Required | Default | Notes |
 |-----|----------|---------|-------|
-| `GEMINI_API_KEY` | ✅ | — | App refuses to start without it (fail-fast). |
-| `MODEL_DIR` | | `output` | Directory holding the `.pkl`/`.npy` artifacts. |
-| `ALLOWED_ORIGINS` | | `*` | Comma-separated origins. With `*`, credentials are disabled (CORS spec). |
-| `WEB_CONCURRENCY` | | `1` | gunicorn workers. Each worker loads the model — raise only after checking memory. |
-| `LOG_LEVEL` | | `INFO` | |
-| `RATE_LIMIT_PREDICT` | | `30/minute` | Per-IP limit on `POST /predict` (slowapi/limits syntax, e.g. `100/hour`, `5/second`). Over-limit requests get a 429. |
-| `RATELIMIT_STORAGE_URI` | | (in-memory) | Shared rate-limit store, e.g. `redis://host:6379/0`. Without it the store is per-process — see caveat below. |
+| `MODEL_DIR` | | `output/models` | Directory holding the trained TF-IDF artifacts. |
+| `FRONTEND_ORIGIN` | | `http://localhost:3000` | Frontend origin allowed by CORS. Mainly matters if the browser hits the API directly. |
+| `WEB_CONCURRENCY` | | `1` | gunicorn workers. Each worker loads the models — raise only after checking memory. |
 | `PORT` | (Render-injected) | `8000` | Bound automatically by the start command. |
-
-> **Rate-limit caveat:** the default store is in-memory **per gunicorn worker**, so
-> with `WEB_CONCURRENCY` = N the effective global limit is roughly N× the
-> configured `RATE_LIMIT_PREDICT`. Set `RATELIMIT_STORAGE_URI` to a shared Redis
-> instance for a single, consistent global limit across all workers and instances.
+| `PYTHON_VERSION` | | `3.12.3` | Pins the Python runtime for the build. |
 
 ## Verify after deploy
 
 ```bash
 curl https://<your-service>.onrender.com/health
-curl -X POST https://<your-service>.onrender.com/predict \
+# -> {"status":"ok","models_loaded":true}
+
+curl -X POST https://<your-service>.onrender.com/analyze \
   -H 'Content-Type: application/json' \
-  -d '{"text":"Excited to announce our new platform! #AI","has_media":1,"media_count":1}'
+  -d '{"post_text":"We are scaling our AI team fast. Expect late nights but huge impact."}'
 ```
 
 Interactive docs: `https://<your-service>.onrender.com/docs`
@@ -58,19 +61,19 @@ Interactive docs: `https://<your-service>.onrender.com/docs`
 
 ```bash
 python -m venv venv && source venv/bin/activate
-pip install -r requirements_api.txt
-export GEMINI_API_KEY=your-key
-python api.py            # dev server on :8000 (set PORT/RELOAD to override)
+pip install -r services/ml_api/requirements.txt
+# dev server on :8000 (reads PORT/MODEL_DIR from env)
+python -m services.ml_api.main
 # or, mirror production:
-gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
+gunicorn services.ml_api.main:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
 ```
 
 ## Wiring the Next.js frontend
 
 The frontend never calls the FastAPI service directly. Instead:
 
-- `app/sentiment-analyzer/page.tsx` POSTs to the same-origin route `/api/predict`.
-- `app/api/predict/route.ts` forwards the request to `${ML_API_URL}/predict`.
+- The app POSTs to the same-origin route `app/api/analyze`.
+- `app/api/analyze` forwards the request to `${ML_API_URL}/analyze`.
 
 So you only set **one** env var on the Next.js host (e.g. Vercel):
 
@@ -79,16 +82,5 @@ ML_API_URL=https://<your-service>.onrender.com
 ```
 
 Locally, `ML_API_URL` defaults to `http://localhost:8000`. Run both together
-with `npm run dev` (starts `next dev` + `uvicorn api:app` via `concurrently`),
-which also needs `GEMINI_API_KEY` exported for the Python side.
-
-## Model caveat (read before demoing)
-
-The model currently in `output/` is the **full-embedding (768-dim) classifier**
-— the PCA/regularization fixes described in `FIXES_APPLIED.md` were *documented
-but never saved* (`pca_reducer.pkl` is absent, and the saved model reports 784
-input features = 768 embeddings + 16 metadata). It therefore still carries the
-documented overfitting (~84% train / ~45% test). The serving pipeline is
-correct and dimensionally consistent; if you re-run the notebook to actually
-apply PCA, save `pca_reducer.pkl` into `output/` and the service will pick it up
-automatically (it already branches on the file's presence).
+with `npm run dev` (starts `next dev` + `uvicorn services.ml_api.main:app` via
+`concurrently`).
diff --git a/api.py b/api.py
deleted file mode 100644
index 8d054de..0000000
--- a/api.py
+++ /dev/null
@@ -1,331 +0,0 @@
-"""
-FastAPI Server for LinkedIn PR Sentiment Classification
-
-This API provides endpoints for predicting whether a LinkedIn post will
-generate positive or negative PR using machine learning.
-"""
-
-import logging
-import os
-from contextlib import asynccontextmanager
-from datetime import datetime, timezone
-from typing import Dict, Optional
-
-import uvicorn
-from fastapi import FastAPI, HTTPException, Request, status
-from fastapi.concurrency import run_in_threadpool
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, Field, field_validator
-from slowapi import Limiter, _rate_limit_exceeded_handler
-from slowapi.errors import RateLimitExceeded
-from slowapi.middleware import SlowAPIMiddleware
-from slowapi.util import get_remote_address
-
-from prediction_service import PRClassifierService
-
-logging.basicConfig(
-    level=os.getenv("LOG_LEVEL", "INFO").upper(),
-    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
-)
-logger = logging.getLogger("pr_api")
-
-
-# --- Rate limiting (slowapi) -------------------------------------------------
-# Per-client inbound rate limiting keyed by remote IP address. Mainly guards
-# the expensive /predict endpoint (Gemini embedding + XGBoost inference).
-#
-# RATE_LIMIT_PREDICT controls the /predict limit and is env-configurable so it
-# can be tuned without a code change (slowapi/limits syntax, e.g. "30/minute",
-# "100/hour", "5/second").
-#
-# NOTE: the default storage is in-memory and therefore PER-PROCESS. Under
-# gunicorn with multiple UvicornWorkers (or multiple Render instances), each
-# worker keeps its own counter, so the effective limit is multiplied by the
-# worker/instance count. For a globally consistent limit across workers/
-# instances, point RATELIMIT_STORAGE_URI at a shared store such as Redis
-# (e.g. "redis://host:6379/0"), which is passed through to the Limiter below.
-RATE_LIMIT_PREDICT = os.getenv("RATE_LIMIT_PREDICT", "30/minute")
-_RATELIMIT_STORAGE_URI = os.getenv("RATELIMIT_STORAGE_URI")  # optional shared store
-
-limiter = Limiter(
-    key_func=get_remote_address,
-    storage_uri=_RATELIMIT_STORAGE_URI,  # None -> in-memory (per-process) default
-)
-
-
-# Global service instance (populated during the lifespan startup)
-prediction_service: Optional[PRClassifierService] = None
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Initialize the prediction service once, before the app serves traffic."""
-    global prediction_service
-
-    model_dir = os.getenv("MODEL_DIR", "output")
-    api_key = os.getenv("GEMINI_API_KEY")
-
-    if not api_key:
-        # Fail fast: without the key the service can never produce a prediction,
-        # so we don't want the platform to report a "healthy" deploy.
-        raise RuntimeError(
-            "GEMINI_API_KEY environment variable not set. "
-            "Set it in the Render dashboard (or your local environment) "
-            "before starting the server."
-        )
-
-    try:
-        prediction_service = PRClassifierService(model_dir=model_dir, api_key=api_key)
-        logger.info("Prediction service initialized with model from: %s", model_dir)
-    except Exception:
-        logger.exception("Failed to initialize prediction service")
-        raise
-
-    yield
-
-    prediction_service = None
-
-
-# Initialize FastAPI app
-app = FastAPI(
-    title="LinkedIn PR Sentiment Classifier API",
-    description="Predict PR sentiment (positive/negative) for LinkedIn posts using AI",
-    version="1.0.0",
-    docs_url="/docs",
-    redoc_url="/redoc",
-    lifespan=lifespan,
-)
-
-# CORS: configurable via ALLOWED_ORIGINS (comma-separated). Defaults to "*".
-# The CORS spec forbids combining a wildcard origin with credentials, so we only
-# enable credentials when explicit origins are listed.
-_origins_env = os.getenv("ALLOWED_ORIGINS", "*").strip()
-if _origins_env == "*":
-    _allow_origins = ["*"]
-    _allow_credentials = False
-else:
-    _allow_origins = [o.strip() for o in _origins_env.split(",") if o.strip()]
-    _allow_credentials = True
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=_allow_origins,
-    allow_credentials=_allow_credentials,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-# Wire up rate limiting: register the limiter on app state (slowapi looks it up
-# there), install the middleware, and return HTTP 429 when a limit is exceeded.
-app.state.limiter = limiter
-app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
-app.add_middleware(SlowAPIMiddleware)
-
-
-def _utc_now() -> str:
-    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
-
-
-# Pydantic models for request/response
-class PredictionRequest(BaseModel):
-    """Request model for PR prediction"""
-    text: str = Field(..., description="LinkedIn post text content", min_length=1, max_length=10000)
-
-    # Optional metadata fields
-    post_hour: Optional[int] = Field(12, ge=0, le=23, description="Hour of posting (0-23)")
-    post_day_of_week: Optional[int] = Field(2, ge=0, le=6, description="Day of week (0=Monday, 6=Sunday)")
-    post_month: Optional[int] = Field(1, ge=1, le=12, description="Month (1-12)")
-    has_media: Optional[int] = Field(0, ge=0, le=1, description="Has media (0 or 1)")
-    media_count: Optional[int] = Field(0, ge=0, description="Number of media items")
-    media_type: Optional[str] = Field("none", description="Media type (none/image/video)")
-    post_type: Optional[str] = Field("regular", description="Post type (regular/article)")
-    author_follower_count: Optional[int] = Field(1000, ge=0, description="Author follower count")
-    avg_sentiment: Optional[float] = Field(0.0, ge=-1.0, le=1.0, description="Average comment sentiment")
-    median_sentiment: Optional[float] = Field(0.0, ge=-1.0, le=1.0, description="Median comment sentiment")
-    num_comments_analyzed: Optional[int] = Field(0, ge=0, description="Number of comments analyzed")
-
-    @field_validator("text")
-    @classmethod
-    def text_not_empty(cls, v: str) -> str:
-        if not v or not v.strip():
-            raise ValueError("Text cannot be empty or whitespace only")
-        return v.strip()
-
-    model_config = {
-        "json_schema_extra": {
-            "example": {
-                "text": "Excited to announce our new AI-powered analytics platform! This will transform how businesses understand their customers. #AI #Innovation",
-                "post_hour": 14,
-                "post_day_of_week": 2,
-                "has_media": 1,
-                "media_count": 1,
-            }
-        }
-    }
-
-
-class PredictionResponse(BaseModel):
-    """Response model for PR prediction"""
-    prediction: str = Field(..., description="Predicted sentiment: 'positive' or 'negative'")
-    confidence: float = Field(..., description="Confidence score (0-1)")
-    probabilities: Dict[str, float] = Field(..., description="Probability for each class")
-    features_extracted: Dict = Field(..., description="Extracted features from input")
-    timestamp: str = Field(..., description="Prediction timestamp")
-
-    model_config = {
-        "json_schema_extra": {
-            "example": {
-                "prediction": "positive",
-                "confidence": 0.85,
-                "probabilities": {"negative": 0.15, "positive": 0.85},
-                "features_extracted": {
-                    "text_length": 152,
-                    "emoji_count": 0,
-                    "url_count": 0,
-                    "hashtag_count": 2,
-                    "mention_count": 0,
-                    "embedding_dimension": 768,
-                },
-                "timestamp": "2025-12-15T10:30:00Z",
-            }
-        }
-    }
-
-
-class HealthResponse(BaseModel):
-    """Health check response"""
-    status: str
-    message: str
-    model_loaded: bool
-    timestamp: str
-
-
-@app.get("/", tags=["General"])
-async def root():
-    """Root endpoint with API information"""
-    return {
-        "name": "LinkedIn PR Sentiment Classifier API",
-        "version": "1.0.0",
-        "description": "Predict PR sentiment for LinkedIn posts",
-        "endpoints": {
-            "health": "/health",
-            "predict": "/predict (POST)",
-            "docs": "/docs",
-        },
-    }
-
-
-@app.get("/health", response_model=HealthResponse, tags=["General"])
-async def health_check():
-    """Health check endpoint (used by Render's health checks)"""
-    model_loaded = prediction_service is not None
-
-    return HealthResponse(
-        status="healthy" if model_loaded else "unhealthy",
-        message="Service is running" if model_loaded else "Model not loaded",
-        model_loaded=model_loaded,
-        timestamp=_utc_now(),
-    )
-
-
-@app.post("/predict", response_model=PredictionResponse, tags=["Prediction"])
-@limiter.limit(RATE_LIMIT_PREDICT)
-async def predict_pr_sentiment(request: Request, payload: PredictionRequest):
-    """
-    Predict PR sentiment for a LinkedIn post.
-
-    Analyzes the provided post text and returns a prediction of whether it will
-    generate positive or negative PR, along with confidence scores and extracted
-    features.
-    """
-    if prediction_service is None:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Prediction service not initialized",
-        )
-
-    try:
-        # The prediction makes a synchronous network call to the Gemini
-        # embedding API; run it off the event loop so concurrent requests
-        # aren't blocked.
-        result = await run_in_threadpool(
-            prediction_service.predict,
-            text=payload.text,
-            post_hour=payload.post_hour,
-            post_day_of_week=payload.post_day_of_week,
-            post_month=payload.post_month,
-            has_media=payload.has_media,
-            media_count=payload.media_count,
-            media_type=payload.media_type,
-            post_type=payload.post_type,
-            author_follower_count=payload.author_follower_count,
-            avg_sentiment=payload.avg_sentiment,
-            median_sentiment=payload.median_sentiment,
-            num_comments_analyzed=payload.num_comments_analyzed,
-        )
-
-        result["timestamp"] = _utc_now()
-        return PredictionResponse(**result)
-
-    except ValueError as e:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Invalid input: {str(e)}",
-        )
-    except RuntimeError as e:
-        logger.error("Prediction failed: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Prediction failed: {str(e)}",
-        )
-    except Exception as e:
-        logger.exception("Unexpected error during prediction")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Unexpected error: {str(e)}",
-        )
-
-
-@app.get("/model-info", tags=["General"])
-async def model_info():
-    """Get information about the loaded model"""
-    if prediction_service is None:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Prediction service not initialized",
-        )
-
-    embedding_dim = 30 if prediction_service.pca else 768
-    return {
-        "model_directory": prediction_service.model_dir,
-        "pca_enabled": prediction_service.pca is not None,
-        "embedding_model": "Gemini models/embedding-001",
-        "classifier_model": "XGBoost",
-        "features": {
-            "embedding_dimension": embedding_dim,
-            "metadata_features": len(prediction_service.metadata_features),
-            "total_features": embedding_dim + len(prediction_service.metadata_features),
-        },
-        "encoders": {
-            "post_types": list(prediction_service.post_type_encoder.classes_),
-            "media_types": list(prediction_service.media_type_encoder.classes_),
-        },
-    }
-
-
-# Local development entrypoint.
-# In production (Render) the app is served by gunicorn/uvicorn via the start
-# command, which binds to $PORT — see render.yaml.
-if __name__ == "__main__":
-    if not os.getenv("GEMINI_API_KEY"):
-        print("❌ Error: GEMINI_API_KEY environment variable not set")
-        print("   Please set it with: export GEMINI_API_KEY='your-api-key'")
-        raise SystemExit(1)
-
-    uvicorn.run(
-        "api:app",
-        host=os.getenv("HOST", "0.0.0.0"),
-        port=int(os.getenv("PORT", "8000")),
-        reload=os.getenv("RELOAD", "false").lower() == "true",
-        log_level=os.getenv("LOG_LEVEL", "info").lower(),
-    )
diff --git a/app/api/gemini/route.ts b/app/api/gemini/route.ts
index 9b054d3..cdab5af 100644
--- a/app/api/gemini/route.ts
+++ b/app/api/gemini/route.ts
@@ -1,6 +1,6 @@
 import { generateText } from 'ai'
 import { NextRequest, NextResponse } from 'next/server'
-import { getModel } from '@/lib/ai/provider'
+import { getModel, DEFAULT_AI_MODEL } from '@/lib/ai/provider'
 import { withRateLimit } from '@/lib/google-ai/rate-limiter'
 import { enforceRateLimit } from '@/lib/ratelimit'
 
@@ -30,7 +30,7 @@ export async function POST(request: NextRequest) {
     return NextResponse.json({
       success: true,
       response: text,
-      model: model || process.env.AI_MODEL || 'google/gemini-2.0-flash',
+      model: model || process.env.AI_MODEL || DEFAULT_AI_MODEL,
     })
   } catch (error: any) {
     console.error('Gemini API error:', error)
@@ -48,6 +48,6 @@ export async function GET() {
   return NextResponse.json({
     message: 'AI text generation endpoint (Vercel AI SDK)',
     usage: 'POST with { "prompt": "your prompt", "model": "optional provider/model string" }',
-    defaultModel: process.env.AI_MODEL || 'google/gemini-2.0-flash',
+    defaultModel: process.env.AI_MODEL || DEFAULT_AI_MODEL,
   })
 }
diff --git a/app/api/predict/route.ts b/app/api/predict/route.ts
deleted file mode 100644
index b2a2690..0000000
--- a/app/api/predict/route.ts
+++ /dev/null
@@ -1,60 +0,0 @@
-import { NextRequest, NextResponse } from 'next/server'
-import { enforceRateLimit } from '@/lib/ratelimit'
-
-// Server-side proxy to the PR-sentiment FastAPI service (api.py /predict).
-// Keeps the backend URL server-side (no CORS dependency from the browser) and
-// lets us swap localhost for the deployed Render URL via one env var.
-const ML_API_URL = process.env.ML_API_URL || 'http://localhost:8000'
-
-// The embedding step calls the Gemini API, so allow generous headroom.
-const DEFAULT_TIMEOUT_MS = 30000
-
-export async function POST(req: NextRequest) {
-  const limited = enforceRateLimit(req)
-  if (limited) return limited
-
-  let body: { text?: string } & Record<string, unknown>
-  try {
-    body = await req.json()
-  } catch {
-    return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
-  }
-
-  if (!body?.text || !String(body.text).trim()) {
-    return NextResponse.json({ error: 'text is required' }, { status: 400 })
-  }
-
-  const controller = new AbortController()
-  const timeout = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS)
-
-  try {
-    const response = await fetch(`${ML_API_URL}/predict`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify(body),
-      signal: controller.signal,
-    })
-    clearTimeout(timeout)
-
-    const data = await response.json().catch(() => null)
-
-    if (!response.ok) {
-      return NextResponse.json(
-        {
-          error: 'Upstream ML API error',
-          detail: data?.detail ?? response.statusText,
-        },
-        { status: response.status === 422 ? 422 : 502 }
-      )
-    }
-
-    return NextResponse.json(data)
-  } catch (error: any) {
-    clearTimeout(timeout)
-    const isAbort = error?.name === 'AbortError'
-    return NextResponse.json(
-      { error: isAbort ? 'ML API timeout' : 'Failed to call ML API', detail: String(error) },
-      { status: 502 }
-    )
-  }
-}
diff --git a/app/sentiment-analyzer/layout.tsx b/app/sentiment-analyzer/layout.tsx
deleted file mode 100644
index 1dc6eaf..0000000
--- a/app/sentiment-analyzer/layout.tsx
+++ /dev/null
@@ -1,19 +0,0 @@
-import type { Metadata } from "next";
-
-export const metadata: Metadata = {
-  title: "LinkedIn PR Sentiment Analyzer - AI-Powered Post Analysis",
-  description: "Predict whether your LinkedIn post will generate positive or negative PR using AI-powered sentiment analysis with Gemini embeddings and XGBoost",
-  keywords: ["LinkedIn", "PR", "Sentiment Analysis", "AI", "Machine Learning", "XGBoost", "Gemini"],
-};
-
-export default function SentimentAnalyzerLayout({
-  children,
-}: {
-  children: React.ReactNode;
-}) {
-  return children;
-}
-
-
-
-
diff --git a/app/sentiment-analyzer/page.tsx b/app/sentiment-analyzer/page.tsx
deleted file mode 100644
index 18e4ff5..0000000
--- a/app/sentiment-analyzer/page.tsx
+++ /dev/null
@@ -1,453 +0,0 @@
-'use client';
-
-import { useState } from 'react';
-
-interface PredictionResult {
-  prediction: string;
-  confidence: number;
-  probabilities: {
-    positive: number;
-    negative: number;
-  };
-  features_extracted: {
-    text_length: number;
-    emoji_count: number;
-    url_count: number;
-    hashtag_count: number;
-    mention_count: number;
-    embedding_dimension: number;
-  };
-  timestamp: string;
-}
-
-interface FormData {
-  text: string;
-  post_hour: number;
-  post_day_of_week: number;
-  post_month: number;
-  has_media: number;
-  media_count: number;
-  media_type: string;
-  post_type: string;
-  author_follower_count: number;
-}
-
-// Calls the same-origin Next.js proxy (app/api/predict/route.ts), which forwards
-// to the FastAPI service at ML_API_URL. Avoids hardcoding the backend URL and
-// any browser CORS dependency.
-const PREDICT_ENDPOINT = '/api/predict';
-
-const examples = {
-  1: {
-    text: "Excited to announce our new AI-powered analytics platform! This will transform how businesses understand their customers. Join us at the launch event next week! 🚀 #AI #Innovation #TechNews",
-    has_media: 1,
-    media_count: 1,
-    media_type: "image"
-  },
-  2: {
-    text: "We deeply regret the service outage that affected our customers yesterday. We take full responsibility and are implementing measures to prevent this from happening again. Your trust is our priority.",
-    has_media: 0,
-    media_count: 0,
-    media_type: "none"
-  },
-  3: {
-    text: "Proud to share that our team won the Best Workplace Award 2024! This achievement reflects our commitment to creating an inclusive and innovative environment. Thank you to everyone who made this possible! 🏆✨",
-    has_media: 1,
-    media_count: 2,
-    media_type: "image"
-  }
-};
-
-export default function SentimentAnalyzer() {
-  const [formData, setFormData] = useState<FormData>({
-    text: '',
-    post_hour: 12,
-    post_day_of_week: 2,
-    post_month: 1,
-    has_media: 0,
-    media_count: 0,
-    media_type: 'none',
-    post_type: 'regular',
-    author_follower_count: 1000
-  });
-
-  const [result, setResult] = useState<PredictionResult | null>(null);
-  const [loading, setLoading] = useState(false);
-  const [error, setError] = useState<string | null>(null);
-  const [showAdvanced, setShowAdvanced] = useState(false);
-
-  const loadExample = (num: 1 | 2 | 3) => {
-    const example = examples[num];
-    setFormData(prev => ({
-      ...prev,
-      text: example.text,
-      has_media: example.has_media,
-      media_count: example.media_count,
-      media_type: example.media_type
-    }));
-  };
-
-  const handleSubmit = async (e: React.FormEvent) => {
-    e.preventDefault();
-    setLoading(true);
-    setError(null);
-    setResult(null);
-
-    try {
-      const response = await fetch(PREDICT_ENDPOINT, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-        },
-        body: JSON.stringify(formData)
-      });
-
-      if (!response.ok) {
-        const errorData = await response.json();
-        throw new Error(errorData.detail || 'Prediction failed');
-      }
-
-      const data: PredictionResult = await response.json();
-      setResult(data);
-    } catch (err) {
-      setError(err instanceof Error ? err.message : 'An error occurred');
-    } finally {
-      setLoading(false);
-    }
-  };
-
-  return (
-    <div className="min-h-screen bg-gradient-to-br from-[#0b1220] via-[#0c1628] to-[#0a0f1c] text-slate-100 py-8 px-4">
-      <div className="max-w-7xl mx-auto">
-        {/* Header */}
-        <div className="text-center text-slate-50 mb-12">
-          <h1 className="text-5xl font-bold mb-3">
-            LinkedIn PR Sentiment Analyzer
-          </h1>
-          <p className="text-xl opacity-80">
-            Predict whether your LinkedIn post will generate positive or negative PR using AI
-          </p>
-        </div>
-
-        {/* Main Content */}
-        <div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
-          {/* Input Section */}
-          <div className="bg-white/5 backdrop-blur-xl border border-white/10 rounded-3xl shadow-2xl p-8">
-            <h2 className="text-2xl font-bold text-sky-200 mb-6 flex items-center gap-3">
-              <svg className="w-8 h-8" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
-                <path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"></path>
-                <path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"></path>
-              </svg>
-              Input Your LinkedIn Post
-            </h2>
-
-            <form onSubmit={handleSubmit} className="space-y-6">
-              {/* Text Area */}
-              <div>
-                <label htmlFor="text" className="block mb-2 font-semibold text-slate-200">
-                  Post Text *
-                </label>
-                <textarea
-                  id="text"
-                  value={formData.text}
-                  onChange={(e) => setFormData({ ...formData, text: e.target.value })}
-                  placeholder="Enter your LinkedIn post text here..."
-                  required
-                  className="w-full px-4 py-3 bg-white/5 border border-white/15 text-slate-50 placeholder:text-slate-400 rounded-xl focus:border-sky-400 focus:ring-4 focus:ring-sky-500/30 outline-none transition-all min-h-[150px] resize-y"
-                />
-              </div>
-
-              {/* Advanced Options Toggle */}
-              <div
-                onClick={() => setShowAdvanced(!showAdvanced)}
-                className="text-center text-sky-300 font-semibold cursor-pointer hover:underline"
-              >
-                ⚙️ Advanced Options (Optional) {showAdvanced ? '▲' : '▼'}
-              </div>
-
-              {/* Advanced Options */}
-              {showAdvanced && (
-                <div className="grid grid-cols-2 gap-4 border-t border-white/10 pt-6">
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Post Hour (0-23)
-                    </label>
-                    <input
-                      type="number"
-                      min="0"
-                      max="23"
-                      value={formData.post_hour}
-                      onChange={(e) => setFormData({ ...formData, post_hour: parseInt(e.target.value) })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    />
-                  </div>
-
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Day of Week
-                    </label>
-                    <select
-                      value={formData.post_day_of_week}
-                      onChange={(e) => setFormData({ ...formData, post_day_of_week: parseInt(e.target.value) })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    >
-                      <option value="0">Monday</option>
-                      <option value="1">Tuesday</option>
-                      <option value="2">Wednesday</option>
-                      <option value="3">Thursday</option>
-                      <option value="4">Friday</option>
-                      <option value="5">Saturday</option>
-                      <option value="6">Sunday</option>
-                    </select>
-                  </div>
-
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Has Media?
-                    </label>
-                    <select
-                      value={formData.has_media}
-                      onChange={(e) => setFormData({ ...formData, has_media: parseInt(e.target.value) })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    >
-                      <option value="0">No</option>
-                      <option value="1">Yes</option>
-                    </select>
-                  </div>
-
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Media Count
-                    </label>
-                    <input
-                      type="number"
-                      min="0"
-                      value={formData.media_count}
-                      onChange={(e) => setFormData({ ...formData, media_count: parseInt(e.target.value) })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    />
-                  </div>
-
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Media Type
-                    </label>
-                    <select
-                      value={formData.media_type}
-                      onChange={(e) => setFormData({ ...formData, media_type: e.target.value })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    >
-                      <option value="none">None</option>
-                      <option value="image">Image</option>
-                      <option value="video">Video</option>
-                    </select>
-                  </div>
-
-                  <div>
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Post Type
-                    </label>
-                    <select
-                      value={formData.post_type}
-                      onChange={(e) => setFormData({ ...formData, post_type: e.target.value })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    >
-                      <option value="regular">Regular</option>
-                      <option value="article">Article</option>
-                    </select>
-                  </div>
-
-                  <div className="col-span-2">
-                    <label className="block mb-2 font-semibold text-slate-200 text-sm">
-                      Follower Count
-                    </label>
-                    <input
-                      type="number"
-                      min="0"
-                      value={formData.author_follower_count}
-                      onChange={(e) => setFormData({ ...formData, author_follower_count: parseInt(e.target.value) })}
-                      className="w-full px-4 py-2 bg-white/5 border border-white/15 text-slate-50 rounded-lg focus:border-sky-400 focus:ring-2 focus:ring-sky-500/30 outline-none"
-                    />
-                  </div>
-                </div>
-              )}
-
-              {/* Submit Button */}
-              <button
-                type="submit"
-                disabled={loading}
-                className="w-full py-4 bg-gradient-to-r from-sky-500 to-blue-600 text-white font-bold text-lg rounded-xl hover:shadow-lg hover:-translate-y-0.5 transition-all disabled:opacity-60 disabled:cursor-not-allowed"
-              >
-                {loading ? '🔄 Analyzing...' : '🚀 Analyze Sentiment'}
-              </button>
-            </form>
-
-            {/* Examples */}
-            <div className="mt-6 p-4 bg-white/5 border border-white/10 rounded-xl">
-              <h4 className="font-semibold text-sky-100 mb-3 text-sm">
-                📝 Try Example Posts:
-              </h4>
-              <div className="space-y-2">
-                <button
-                  type="button"
-                  onClick={() => loadExample(1)}
-                  className="w-full text-left px-3 py-2 bg-white/5 text-slate-100 hover:bg-white/10 rounded-lg text-sm transition-all hover:translate-x-1"
-                >
-                  Positive: Product Launch Announcement
-                </button>
-                <button
-                  type="button"
-                  onClick={() => loadExample(2)}
-                  className="w-full text-left px-3 py-2 bg-white/5 text-slate-100 hover:bg-white/10 rounded-lg text-sm transition-all hover:translate-x-1"
-                >
-                  Negative: Service Outage Apology
-                </button>
-                <button
-                  type="button"
-                  onClick={() => loadExample(3)}
-                  className="w-full text-left px-3 py-2 bg-white/5 text-slate-100 hover:bg-white/10 rounded-lg text-sm transition-all hover:translate-x-1"
-                >
-                  Positive: Award Achievement
-                </button>
-              </div>
-            </div>
-          </div>
-
-          {/* Output Section */}
-          <div className="bg-white/5 backdrop-blur-xl border border-white/10 rounded-3xl shadow-2xl p-8">
-            <h2 className="text-2xl font-bold text-sky-200 mb-6 flex items-center gap-3">
-              <svg className="w-8 h-8" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
-                <polyline points="9 11 12 14 22 4"></polyline>
-                <path d="M21 12v7a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11"></path>
-              </svg>
-              Prediction Results
-            </h2>
-
-            {/* Loading State */}
-            {loading && (
-              <div className="text-center py-12">
-                <div className="inline-block w-12 h-12 border-4 border-white/20 border-t-sky-400 rounded-full animate-spin"></div>
-                <p className="mt-4 text-slate-300">Analyzing your post...</p>
-              </div>
-            )}
-
-            {/* Error State */}
-            {error && (
-              <div className="bg-red-500/10 border border-red-400/60 text-red-100 rounded-xl p-4">
-                <strong>Error:</strong> {error}
-              </div>
-            )}
-
-            {/* Results */}
-            {result && !loading && (
-              <div className="space-y-6 animate-fadeIn">
-                {/* Prediction Badge */}
-                <div>
-                  <span
-                    className={`inline-block px-6 py-3 rounded-full text-xl font-bold ${
-                      result.prediction === 'positive'
-                        ? 'bg-gradient-to-r from-emerald-500 to-green-400 text-white'
-                        : 'bg-gradient-to-r from-red-500 to-orange-500 text-white'
-                    }`}
-                  >
-                    {result.prediction === 'positive' ? '✅ Positive PR' : '⚠️ Negative PR'}
-                  </span>
-                </div>
-
-                {/* Confidence Bar */}
-                <div>
-                  <h3 className="font-bold text-slate-100 mb-2">Confidence Level</h3>
-                  <div className="w-full h-10 bg-white/10 rounded-full overflow-hidden">
-                    <div
-                      className="h-full bg-gradient-to-r from-sky-500 to-blue-600 flex items-center justify-end pr-4 text-white font-semibold transition-all duration-1000"
-                      style={{ width: `${result.confidence * 100}%` }}
-                    >
-                      {(result.confidence * 100).toFixed(1)}%
-                    </div>
-                  </div>
-                </div>
-
-                {/* Probabilities */}
-                <div>
-                  <h3 className="font-bold text-slate-100 mb-3">Probabilities</h3>
-                  <div className="grid grid-cols-2 gap-4">
-                    <div className="p-4 bg-gradient-to-br from-emerald-500/15 to-green-500/10 border border-emerald-300/40 rounded-xl text-center">
-                      <div className="text-sm text-emerald-100 mb-1">Positive PR</div>
-                      <div className="text-3xl font-bold text-emerald-200">
-                        {(result.probabilities.positive * 100).toFixed(1)}%
-                      </div>
-                    </div>
-                    <div className="p-4 bg-gradient-to-br from-red-500/15 to-orange-500/10 border border-orange-300/40 rounded-xl text-center">
-                      <div className="text-sm text-orange-100 mb-1">Negative PR</div>
-                      <div className="text-3xl font-bold text-orange-200">
-                        {(result.probabilities.negative * 100).toFixed(1)}%
-                      </div>
-                    </div>
-                  </div>
-                </div>
-
-                {/* Features */}
-                <div className="bg-white/5 border border-white/10 rounded-xl p-6">
-                  <h3 className="font-bold text-sky-200 mb-4">📊 Extracted Features</h3>
-                  <div className="grid grid-cols-2 gap-3">
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">Text Length</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.text_length}</span>
-                    </div>
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">Emoji Count</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.emoji_count}</span>
-                    </div>
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">URL Count</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.url_count}</span>
-                    </div>
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">Hashtag Count</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.hashtag_count}</span>
-                    </div>
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">Mention Count</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.mention_count}</span>
-                    </div>
-                    <div className="flex justify-between items-center bg-white/5 border border-white/10 rounded-lg p-3">
-                      <span className="text-sm text-slate-300">Embedding Dim</span>
-                      <span className="font-semibold text-slate-50">{result.features_extracted.embedding_dimension}</span>
-                    </div>
-                  </div>
-                </div>
-              </div>
-            )}
-
-            {/* Empty State */}
-            {!result && !loading && !error && (
-              <div className="text-center py-12 text-slate-400">
-                <svg className="w-20 h-20 mx-auto mb-4 opacity-50" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="1.5">
-                  <path d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"></path>
-                </svg>
-                <p>Enter a post and click analyze to see results</p>
-              </div>
-            )}
-          </div>
-        </div>
-      </div>
-
-      <style jsx>{`
-        @keyframes fadeIn {
-          from {
-            opacity: 0;
-            transform: translateY(20px);
-          }
-          to {
-            opacity: 1;
-            transform: translateY(0);
-          }
-        }
-        .animate-fadeIn {
-          animation: fadeIn 0.5s ease-out;
-        }
-      `}</style>
-    </div>
-  );
-}
-
diff --git a/docs/API_README.md b/docs/API_README.md
index 352fec3..a70b3dd 100644
--- a/docs/API_README.md
+++ b/docs/API_README.md
@@ -1,51 +1,42 @@
-# LinkedIn PR Sentiment Classifier API
+# Lyra ML API
 
-A FastAPI-based REST API that predicts whether a LinkedIn post will generate positive or negative PR using machine learning (XGBoost + Gemini embeddings).
+A FastAPI service (`services/ml_api`) that scores recruiting/PR post text using a
+**provider-free TF-IDF engine**. It predicts an audience/role distribution,
+narrative flags, and a Helpful/Harmless/Harmful risk class, with n-gram evidence
+for each. No AI provider key is required and logging uses local sqlite.
 
 ## 🚀 Quick Start
 
 ### Prerequisites
 
-- Python 3.8+
-- Gemini API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
-- Trained model files in the `output/` directory
+- Python 3.12
+- Trained model files in the `output/models/` directory (`metadata.json`,
+  `role_model.joblib`, `narrative_model.joblib`, `risk_model.joblib`, …)
 
 ### Installation
 
 1. **Install dependencies:**
 ```bash
-pip install -r requirements_api.txt
+pip install -r services/ml_api/requirements.txt
 ```
 
-2. **Set up environment variables:**
+2. **(Optional) Configure environment variables:**
 ```bash
-# Copy the example environment file
-cp env.example .env
-
-# Edit .env and add your Gemini API key
-export GEMINI_API_KEY='your_gemini_api_key_here'
-```
-
-3. **Verify model files exist:**
-```bash
-ls output/
-# Should contain:
-# - pr_classifier_model.pkl
-# - feature_scaler.pkl
-# - post_type_encoder.pkl
-# - media_type_encoder.pkl
+# Override the model directory or CORS origin if needed.
+export MODEL_DIR=output/models
+export FRONTEND_ORIGIN=http://localhost:3000
 ```
 
 ### Running the API
 
 **Development mode:**
 ```bash
-python api.py
+python -m services.ml_api.main
 ```
 
 **Production mode with Gunicorn:**
 ```bash
-gunicorn -w 4 -k uvicorn.workers.UvicornWorker api:app --bind 0.0.0.0:8000
+gunicorn services.ml_api.main:app -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
 ```
 
 The API will be available at `http://localhost:8000`
@@ -68,104 +59,128 @@ GET /health
 **Response:**
 ```json
 {
-  "status": "healthy",
-  "message": "Service is running",
-  "model_loaded": true,
-  "timestamp": "2025-12-15T10:30:00Z"
+  "status": "ok",
+  "models_loaded": true
 }
 ```
 
-#### 2. Make Prediction
+#### 2. Analyze a Post
 ```http
-POST /predict
+POST /analyze
 Content-Type: application/json
 ```
 
+Optional query param: `?save=false` to skip logging the run to sqlite (defaults
+to `true`).
+
 **Request Body (Minimal):**
 ```json
 {
-  "text": "Excited to announce our new AI-powered analytics platform! #AI #Innovation"
+  "post_text": "We are scaling our AI team fast. Expect late nights but huge impact."
 }
 ```
 
 **Request Body (Full):**
 ```json
 {
-  "text": "Excited to announce our new AI-powered analytics platform!",
-  "post_hour": 14,
-  "post_day_of_week": 2,
-  "post_month": 12,
-  "has_media": 1,
-  "media_count": 1,
-  "media_type": "image",
-  "post_type": "regular",
-  "author_follower_count": 5000,
-  "avg_sentiment": 0.5,
-  "median_sentiment": 0.6,
-  "num_comments_analyzed": 10
+  "post_text": "We are scaling our AI team fast. Expect late nights but huge impact.",
+  "company_hint": "meta",
+  "variant_id": "A",
+  "user_id": "user-123"
 }
 ```
 
-**Response:**
+| Field | Type | Required | Notes |
+|-------|------|----------|-------|
+| `post_text` | string | ✅ | Post text (trimmed; must be non-empty). |
+| `company_hint` | string | | Optional audience/company hint; echoed back as `audience`. |
+| `variant_id` | string | | Optional `"A"` or `"B"`. |
+| `user_id` | string | | Optional identifier for logging. |
+
+**Response (shape):**
 ```json
 {
-  "prediction": "positive",
-  "confidence": 0.85,
-  "probabilities": {
-    "negative": 0.15,
-    "positive": 0.85
+  "input_text": "We are scaling our AI team fast. Expect late nights but huge impact.",
+  "audience": "meta",
+  "role_distribution_top5": [{ "role": "engineering", "pct": 42.13 }],
+  "role_distribution_all": [{ "role": "engineering", "pct": 42.13 }],
+  "confidence_entropy": 2.31,
+  "risk": {
+    "risk_class": "Harmless",
+    "risk_probs": { "Helpful": 0.21, "Harmless": 0.62, "Harmful": 0.17 },
+    "risk_level": "Low",
+    "primary_risk_reason": "Top harmful driver: late nights (+0.140)"
   },
-  "features_extracted": {
-    "text_length": 152,
-    "emoji_count": 0,
-    "url_count": 0,
-    "hashtag_count": 2,
-    "mention_count": 0,
-    "embedding_dimension": 30
+  "narratives": {
+    "narrative_probs": { "burnout": 0.31 },
+    "narrative_flags": { "burnout": true }
   },
-  "timestamp": "2025-12-15T10:30:00Z"
+  "evidence": {
+    "risk_top_ngrams": [{ "ngram": "late nights", "weight": 0.14 }],
+    "narrative_top_ngrams": { "burnout": [{ "ngram": "late nights", "weight": 0.22 }] },
+    "role_top_ngrams": { "engineering": [{ "ngram": "ai team", "weight": 0.18 }] }
+  },
+  "meta": {
+    "model_dir_used": "/app/output/models",
+    "timestamp_iso": "2026-05-30T10:30:00Z",
+    "latency_ms": 42,
+    "request_id": "1f0c..."
+  }
 }
 ```
 
-#### 3. Model Information
+`risk_level` is derived from the max risk probability: `High` ≥ 0.75,
+`Medium` ≥ 0.55, otherwise `Low`.
+
+#### 3. Compare Two Variants
 ```http
-GET /model-info
+POST /analyze/compare
+Content-Type: application/json
 ```
 
-**Response:**
+Optional query param: `?save=false` (defaults to `true`).
+
+**Request Body:**
 ```json
 {
-  "model_directory": "output",
-  "pca_enabled": false,
-  "embedding_model": "Gemini models/embedding-001",
-  "classifier_model": "XGBoost",
-  "features": {
-    "embedding_dimension": 768,
-    "metadata_features": 16,
-    "total_features": 784
-  },
-  "encoders": {
-    "post_types": ["regular", "article"],
-    "media_types": ["none", "image", "video"]
+  "baseline_text": "We are hiring engineers.",
+  "variant_text": "We are hiring engineers for 24/7 on-call roles."
+}
+```
+
+**Response (shape):** the full `/analyze` result for each side plus a `delta`:
+```json
+{
+  "baseline": { "...": "full /analyze response" },
+  "variant": { "...": "full /analyze response" },
+  "delta": {
+    "role_pct_delta": { "engineering": -3.21 },
+    "risk_prob_delta": { "Helpful": -0.04, "Harmless": -0.06, "Harmful": 0.10 },
+    "changed_top_phrases": [{ "ngram": "on-call", "weight": 0.19 }]
   }
 }
 ```
 
-## 🧪 Testing
+#### 4. History
+```http
+GET /history?limit=50
+```
 
-Run the test suite:
-```bash
-# Make sure the API is running in another terminal
-python test_api.py
+`limit` defaults to `50` (range 1–200). Returns previously logged runs:
+```json
+{
+  "rows": [{ "...": "logged run" }],
+  "count": 1
+}
 ```
 
-Test with curl:
+## 🧪 Testing
+
+Test with curl (see also `services/ml_api/curl_examples.md`):
 ```bash
-curl -X POST "http://localhost:8000/predict" \
+curl -X POST "http://localhost:8000/analyze" \
   -H "Content-Type: application/json" \
-  -d '{
-    "text": "Excited to announce our new product launch! #Innovation"
-  }'
+  -d '{"post_text":"Hiring for cloud infra SREs."}'
 ```
 
 Test with Python:
@@ -173,176 +188,88 @@ Test with Python:
 import requests
 
 response = requests.post(
-    "http://localhost:8000/predict",
-    json={"text": "Great news! We just won an industry award!"}
+    "http://localhost:8000/analyze",
+    json={"post_text": "We are scaling our AI team fast."},
 )
 
 result = response.json()
-print(f"Prediction: {result['prediction']}")
-print(f"Confidence: {result['confidence']:.2%}")
+print(f"Risk class: {result['risk']['risk_class']}")
+print(f"Risk level: {result['risk']['risk_level']}")
 ```
 
-## 📊 Request Parameters
-
-### Required
-- **text** (string): LinkedIn post text content (1-10,000 characters)
-
-### Optional Metadata
-- **post_hour** (int): Hour of posting (0-23), default: 12
-- **post_day_of_week** (int): Day of week (0=Monday, 6=Sunday), default: 2
-- **post_month** (int): Month (1-12), default: 1
-- **has_media** (int): Has media (0 or 1), default: 0
-- **media_count** (int): Number of media items, default: 0
-- **media_type** (string): Media type (none/image/video), default: "none"
-- **post_type** (string): Post type (regular/article), default: "regular"
-- **author_follower_count** (int): Author follower count, default: 1000
-- **avg_sentiment** (float): Average comment sentiment (-1 to 1), default: 0.0
-- **median_sentiment** (float): Median comment sentiment (-1 to 1), default: 0.0
-- **num_comments_analyzed** (int): Number of comments, default: 0
-
 ## 🔧 Configuration
 
-Environment variables (see `env.example`):
-
 | Variable | Description | Default | Required |
 |----------|-------------|---------|----------|
-| `GEMINI_API_KEY` | Google Gemini API key | - | Yes |
-| `MODEL_DIR` | Directory with model files | `output` | No |
-| `API_HOST` | API server host | `0.0.0.0` | No |
-| `API_PORT` | API server port | `8000` | No |
+| `MODEL_DIR` | Directory with trained model files | `output/models` | No |
+| `FRONTEND_ORIGIN` | Frontend origin allowed by CORS | `http://localhost:3000` | No |
+| `WEB_CONCURRENCY` | gunicorn worker count | `1` | No |
+| `PORT` | API server port | `8000` | No |
 
 ## 🏗️ Architecture
 
 ```
-User Request (POST /predict)
+User Request (POST /analyze)
     ↓
-FastAPI Endpoint
+FastAPI Endpoint (services/ml_api/main.py)
     ↓
-PRClassifierService
-    ├── Feature Extraction
-    │   ├── Gemini Embedding (768-dim)
-    │   ├── Text Features (emoji, URL, hashtag counts)
-    │   ├── Temporal Features (hour, day, month)
-    │   └── Media Features
-    ├── Optional PCA (768 → 30 dimensions)
-    ├── Feature Scaling (StandardScaler)
-    └── XGBoost Prediction
+Predictor (services/ml_api/predictor.py)
+    ├── TF-IDF vectorization
+    ├── Role distribution model
+    ├── Narrative multi-label model
+    ├── Risk model (Helpful / Harmless / Harmful)
+    └── N-gram evidence (top contributing phrases)
+        ↓
+JSON Response (distribution + risk + narratives + evidence)
         ↓
-JSON Response (prediction + confidence)
+(optional) sqlite logging
 ```
 
 ## 📁 Project Structure
 
 ```
-.
-├── api.py                    # FastAPI server
-├── prediction_service.py     # Core prediction logic
-├── requirements_api.txt      # Python dependencies
-├── env.example              # Environment template
-├── test_api.py              # Test suite
-├── API_README.md            # This file
-└── output/                  # Model files
-    ├── pr_classifier_model.pkl
-    ├── feature_scaler.pkl
-    ├── post_type_encoder.pkl
-    └── media_type_encoder.pkl
+services/ml_api/
+├── main.py            # FastAPI app + endpoints
+├── predictor.py       # Core scoring logic
+├── schemas.py         # Request/response models
+├── explain.py         # N-gram contribution helpers
+├── db.py              # sqlite logging
+├── requirements.txt   # Python dependencies
+└── curl_examples.md   # Ready-to-run curl examples
+output/models/         # Trained TF-IDF artifacts
 ```
 
 ## 🐛 Troubleshooting
 
-### Server won't start
+### Server won't start / `/health` returns 500
 
-**Error:** `GEMINI_API_KEY environment variable not set`
+**Error:** `Models not loaded` (or `Model directory not found`)
 ```bash
-# Solution: Set the API key
-export GEMINI_API_KEY='your_key_here'
-```
-
-**Error:** `Failed to load model files`
-```bash
-# Solution: Verify model files exist
-ls output/*.pkl
-
-# If missing, run the training notebook first
-# (attempt2.ipynb)
-```
-
-### Predictions fail
-
-**Error:** `Failed to generate embedding from Gemini API`
-- Check API key is valid
-- Check internet connection
-- Verify Gemini API quota/limits
+# Solution: verify the model artifacts exist
+ls output/models/
 
-**Error:** `Prediction service not initialized`
-- Restart the server
-- Check server logs for startup errors
-
-### Performance issues
-
-If predictions are slow:
-1. Use a faster server (more CPU/RAM)
-2. Consider caching embeddings for repeated texts
-3. Deploy with multiple Gunicorn workers
-
-## 📝 Example Use Cases
-
-### 1. Pre-post Review
-```python
-# Before posting on LinkedIn, check PR sentiment
-text = "Our company is restructuring operations..."
-response = requests.post(url, json={"text": text})
-
-if response.json()["prediction"] == "negative":
-    print("⚠️ Warning: This post may generate negative PR")
+# If MODEL_DIR points elsewhere, set it explicitly
+export MODEL_DIR=output/models
 ```
 
-### 2. Batch Analysis
-```python
-# Analyze multiple draft posts
-posts = [
-    "Announcing our Q4 results...",
-    "Proud to share our sustainability report...",
-    "Updates on recent challenges..."
-]
-
-for post in posts:
-    result = requests.post(url, json={"text": post}).json()
-    print(f"{post[:50]}: {result['prediction']} ({result['confidence']:.0%})")
-```
+### Requests fail
 
-### 3. Integration with Content Calendar
-```python
-# Filter posts by predicted sentiment
-scheduled_posts = load_content_calendar()
-
-safe_posts = []
-for post in scheduled_posts:
-    result = predict(post['text'])
-    if result['prediction'] == 'positive' and result['confidence'] > 0.7:
-        safe_posts.append(post)
-```
+**Error:** `Prediction failed` / `Compare failed`
+- Check the server logs for the underlying traceback.
+- Confirm the model artifacts in `MODEL_DIR` match the deployed code.
 
 ## 🔒 Security Considerations
 
-- **API Key:** Never commit `.env` file or expose API keys
-- **Rate Limiting:** Consider adding rate limiting for production
-- **CORS:** Configure `allow_origins` appropriately for your domain
-- **Authentication:** Add authentication middleware for production use
-- **Input Validation:** API validates input length and format
+- **No provider key:** this service holds no AI provider credentials.
+- **Rate limiting:** the API has no built-in limiter; it is protected at the
+  Next.js proxy layer (`app/api/analyze`, see `lib/ratelimit.ts`).
+- **CORS:** set `FRONTEND_ORIGIN` to your deployed frontend origin.
+- **Input Validation:** `post_text` is trimmed and required to be non-empty.
 
 ## 📄 License
 
 This API is part of the Lyra Hackathon project.
 
-## 🤝 Support
-
-For issues or questions:
-1. Check the interactive docs at `/docs`
-2. Review the test suite in `test_api.py`
-3. Check server logs for error details
-
 ---
 
-Built with FastAPI, XGBoost, and Google Gemini AI
-
+Built with FastAPI and scikit-learn (TF-IDF).
diff --git a/env.example b/env.example
index 6d29c59..29426c1 100644
--- a/env.example
+++ b/env.example
@@ -1,65 +1,48 @@
-# LinkedIn PR Sentiment Classifier API - Environment Configuration
+# Lyra — Environment Configuration
 
 # =====================================================================
-# Python FastAPI service (set these on Render)
+# Lyra ML API — services/ml_api (set these on Render)
+# Provider-free TF-IDF recruiting-signal engine. No AI provider key needed.
 # =====================================================================
 
-# Google Gemini API Key (REQUIRED)
-# Get your API key from: https://makersuite.google.com/app/apikey
-GEMINI_API_KEY=your_gemini_api_key_here
+# Directory containing the trained TF-IDF model artifacts.
+# Default: output/models
+MODEL_DIR=output/models
 
-# Model Directory (OPTIONAL)
-# Directory containing the trained model files
-# Default: output
-MODEL_DIR=output
+# Frontend origin allowed by the ML API's CORS (OPTIONAL).
+# Default: http://localhost:3000. The app normally calls the API server-side
+# (via app/api/analyze), so this only matters for direct browser calls.
+FRONTEND_ORIGIN=http://localhost:3000
 
-# API Server Configuration (OPTIONAL)
-# Host/port for the FastAPI server. On Render, PORT is injected automatically.
-# Defaults: HOST=0.0.0.0, PORT=8000
-HOST=0.0.0.0
+# Host/port. On Render, PORT is injected automatically. Default port 8000.
 PORT=8000
 
-# CORS allowed origins for the FastAPI service (OPTIONAL)
-# Comma-separated list, or "*" (default). With "*", credentials are disabled.
-# In production set this to your frontend origin(s).
-ALLOWED_ORIGINS=*
-
-# Logging Level (OPTIONAL)
-# Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
-# Default: INFO
-LOG_LEVEL=INFO
-
-# Rate limit for the FastAPI /predict endpoint (OPTIONAL)
-# slowapi/limits syntax, e.g. "100/hour", "5/second". Default: "30/minute".
-RATE_LIMIT_PREDICT=30/minute
-
-# Shared rate-limit storage backend for slowapi (OPTIONAL)
-# Default is in-memory per-process; set e.g. redis://host:6379/0 for a store
-# shared across gunicorn workers/instances (a consistent global limit).
-RATELIMIT_STORAGE_URI=
-
 # =====================================================================
 # Next.js frontend (set these on Vercel)
 # =====================================================================
 
-# Next.js -> FastAPI backend URL (used by app/api/predict proxy route)
+# Next.js -> ML API URL (used by the app/api/analyze proxy route)
 # Local default: http://localhost:8000. In production set to the Render URL,
-# e.g. https://pr-sentiment-api.onrender.com
+# e.g. https://lyra-ml-api.onrender.com
 ML_API_URL=http://localhost:8000
 
-# AI provider/model selector (OPTIONAL)
-# Single knob to switch AI provider/model. Default: google/gemini-2.0-flash.
-# e.g. openai/gpt-4o-mini (provide the relevant key/gateway when switching).
-AI_MODEL=google/gemini-2.0-flash
+# --- AI provider for the LLM features (persona critique, generation, vision) ---
+
+# OpenAI API key (REQUIRED for the AI features below).
+# Read by @ai-sdk/openai for the direct-key path.
+OPENAI_API_KEY=your_openai_api_key_here
+
+# AI provider/model selector — the single knob to switch provider/model.
+# Default: openai/gpt-4o-mini. e.g. openai/gpt-4o, anthropic/claude-..., google/...
+# (provide the matching provider key or an AI Gateway key when switching).
+AI_MODEL=openai/gpt-4o-mini
 
 # Vercel AI Gateway key (OPTIONAL)
 # Routes provider/model strings through the Gateway (failover, cost tracking).
 # On Vercel, OIDC can enable the Gateway without setting this.
 AI_GATEWAY_API_KEY=
 
-# Direct Google key read by @ai-sdk/google (OPTIONAL)
-# Preferred name for the direct Google key; GEMINI_API_KEY is used as a
-# fallback if this is unset.
+# Optional alternate direct provider key (for AI_MODEL=google/...)
 GOOGLE_GENERATIVE_AI_API_KEY=
 
 # Inbound rate limiter for public Next.js POST routes (OPTIONAL)
@@ -70,13 +53,9 @@ RATE_LIMIT_WINDOW_MS=60000
 
 # Outbound throttle on calls to the AI provider (OPTIONAL)
 # Max requests per window and the window length in ms.
-# Defaults: GEMINI_RATE_LIMIT_MAX_REQUESTS=15, GEMINI_RATE_LIMIT_WINDOW_MS=60000.
+# Defaults: 15 requests / 60000 ms.
 GEMINI_RATE_LIMIT_MAX_REQUESTS=15
 GEMINI_RATE_LIMIT_WINDOW_MS=60000
 
-# Neon Postgres connection string (pooled)
+# Neon Postgres connection string (pooled) — used by the Next.js app (drizzle).
 DATABASE_URL=postgresql://USER:PASSWORD@HOST/DB?sslmode=require
-
-
-
-
diff --git a/lib/ai/provider.ts b/lib/ai/provider.ts
index c2ee0b6..4c54ece 100644
--- a/lib/ai/provider.ts
+++ b/lib/ai/provider.ts
@@ -6,27 +6,30 @@
  *
  * How model resolution works (per Vercel guidance, AI Gateway first):
  *
- * 1. By DEFAULT we pass the `AI_MODEL` string (e.g. "google/gemini-2.0-flash")
+ * 1. By DEFAULT we pass the `AI_MODEL` string (e.g. "openai/gpt-4o-mini")
  *    straight to the AI SDK. When an AI Gateway credential is present
  *    (`AI_GATEWAY_API_KEY`, or Vercel OIDC token in production) the SDK routes
  *    "provider/model" strings through the Gateway automatically. This gives you
  *    provider failover, cost tracking, and unified billing for free.
  *
  * 2. As a concrete DIRECT-KEY fallback (so this works locally with no Gateway),
- *    if a Google key is set (`GEMINI_API_KEY` / `GOOGLE_GENERATIVE_AI_API_KEY`)
- *    and NO Gateway key is present, we resolve the model through the
- *    `@ai-sdk/google` provider directly. The leading "google/" prefix is
- *    stripped before handing the bare model id to the provider.
+ *    if a provider key is set and NO Gateway key is present, we resolve the
+ *    model through that provider's `@ai-sdk/*` package directly. The leading
+ *    "<provider>/" prefix is stripped before handing the bare model id over.
+ *    Currently OpenAI (`OPENAI_API_KEY`) and Google (`GOOGLE_GENERATIVE_AI_API_KEY`
+ *    / `GEMINI_API_KEY`) are wired; add another by installing its `@ai-sdk/*`
+ *    package and extending the fallback branch below.
  *
- * To swap to, say, OpenAI via the Gateway: set `AI_MODEL="openai/gpt-4o-mini"`
- * and provide `AI_GATEWAY_API_KEY`. To swap the direct provider you'd add the
- * matching `@ai-sdk/<provider>` package and extend the fallback branch below.
+ * To swap providers via the Gateway: set `AI_MODEL` (e.g. "anthropic/claude-..")
+ * and provide `AI_GATEWAY_API_KEY`. To swap the direct provider, set `AI_MODEL`
+ * and the matching provider key (e.g. `OPENAI_API_KEY`).
  */
+import { createOpenAI } from '@ai-sdk/openai'
 import { createGoogleGenerativeAI } from '@ai-sdk/google'
 import type { LanguageModel } from 'ai'
 
 /** Default model string. Gateway-style "provider/model" form. */
-export const DEFAULT_AI_MODEL = 'google/gemini-2.0-flash'
+export const DEFAULT_AI_MODEL = 'openai/gpt-4o-mini'
 
 /**
  * Resolve the configured model for use with the AI SDK
@@ -39,25 +42,29 @@ export function getModel(override?: string): LanguageModel {
   const modelId = override || process.env.AI_MODEL || DEFAULT_AI_MODEL
 
   const hasGatewayKey = Boolean(process.env.AI_GATEWAY_API_KEY)
-  const googleApiKey =
-    process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GEMINI_API_KEY
 
   // Prefer the AI Gateway: pass the "provider/model" string through unchanged.
-  // (On Vercel, OIDC also enables the Gateway even without AI_GATEWAY_API_KEY,
-  //  in which case the string form below still routes correctly.)
-  if (hasGatewayKey || !googleApiKey) {
+  // (On Vercel, OIDC also enables the Gateway even without AI_GATEWAY_API_KEY.)
+  if (hasGatewayKey) {
     return modelId
   }
 
-  // Direct-key fallback: route Google models through @ai-sdk/google so the app
-  // works with only a Gemini key and no Gateway configured.
-  if (modelId.startsWith('google/')) {
+  // Direct-key fallbacks so the app works with just a provider key and no Gateway.
+  const openaiApiKey = process.env.OPENAI_API_KEY
+  if (modelId.startsWith('openai/') && openaiApiKey) {
+    const openai = createOpenAI({ apiKey: openaiApiKey })
+    return openai(modelId.slice('openai/'.length))
+  }
+
+  const googleApiKey =
+    process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GEMINI_API_KEY
+  if (modelId.startsWith('google/') && googleApiKey) {
     const google = createGoogleGenerativeAI({ apiKey: googleApiKey })
     return google(modelId.slice('google/'.length))
   }
 
-  // Non-Google model requested but only a Google key is available: fall back to
-  // the string form and let the SDK/Gateway figure it out (will error clearly
-  // if no credential matches, which is the correct, non-silent behavior).
+  // No matching direct key: fall back to the string form and let the
+  // SDK/Gateway resolve it (errors clearly if no credential matches — the
+  // correct, non-silent behavior).
   return modelId
 }
diff --git a/lib/google-ai/client.ts b/lib/google-ai/client.ts
index 0b01084..1af02a9 100644
--- a/lib/google-ai/client.ts
+++ b/lib/google-ai/client.ts
@@ -8,9 +8,13 @@
  * The exported function signatures and return types are unchanged so existing
  * consumers (council-processor, ab-test-engine) keep working untouched.
  *
- * Note: a default `model` argument is still accepted for backward compatibility,
- * but the resolved model now comes from `getModel(model)` — passing a Gemini
- * model id like "gemini-2.0-flash-exp" still works (it is normalized below).
+ * The optional `model` argument overrides the provider/model for a single call;
+ * when omitted, the resolved model comes from `getModel()` (i.e. the `AI_MODEL`
+ * env var, default openai/gpt-4o-mini). Pass a Gateway-style "provider/model"
+ * string to override (e.g. "openai/gpt-4o").
+ *
+ * NOTE: this directory is named `google-ai/` for historical reasons; it is now
+ * provider-agnostic (OpenAI by default).
  */
 import { generateObject } from 'ai'
 import { z } from 'zod'
@@ -35,18 +39,6 @@ export interface MultimodalContent {
   imageUrls?: string[]
 }
 
-// Default kept for signature compatibility. When a bare Gemini model id is
-// passed we normalize it to the Gateway "google/<model>" form so getModel()
-// can route it correctly.
-const DEFAULT_MODEL = 'gemini-2.0-flash-exp'
-
-function normalizeModel(model: string): string {
-  // Already provider-prefixed (e.g. "google/...", "openai/...") — leave as-is.
-  if (model.includes('/')) return model
-  // Bare Gemini id from legacy callers — assume Google.
-  return `google/${model}`
-}
-
 // Zod schemas drive structured generation (no more manual JSON parsing).
 const critiqueSchema = z.object({
   cringe_score: z
@@ -102,7 +94,7 @@ function buildContentParts(
 export async function getPersonaCritique(
   systemPrompt: string,
   content: string | MultimodalContent,
-  model: string = DEFAULT_MODEL
+  model?: string
 ): Promise<CritiqueResponse> {
   const multimodalContent: MultimodalContent =
     typeof content === 'string' ? { text: content } : content
@@ -128,7 +120,7 @@ Provide a cringe_score (0-100), an excitement_score (0-100), a detailed critique
     const { object } = await withRateLimit(
       () =>
         generateObject({
-          model: getModel(normalizeModel(model)),
+          model: getModel(model),
           schema: critiqueSchema,
           messages: [
             {
@@ -161,7 +153,7 @@ Provide a cringe_score (0-100), an excitement_score (0-100), a detailed critique
 export async function getPersonaVariantEvaluation(
   systemPrompt: string,
   variantContent: string | MultimodalContent,
-  model: string = DEFAULT_MODEL
+  model?: string
 ): Promise<VariantEvaluationResponse> {
   const multimodalContent: MultimodalContent =
     typeof variantContent === 'string' ? { text: variantContent } : variantContent
@@ -187,7 +179,7 @@ Provide a score (0-100) and optional brief feedback about this variant.`
     const { object } = await withRateLimit(
       () =>
         generateObject({
-          model: getModel(normalizeModel(model)),
+          model: getModel(model),
           schema: variantEvaluationSchema,
           messages: [
             {
diff --git a/package-lock.json b/package-lock.json
index 725119e..a7a6c2c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,6 +10,7 @@
       "license": "ISC",
       "dependencies": {
         "@ai-sdk/google": "^3.0.80",
+        "@ai-sdk/openai": "^3.0.67",
         "@google/genai": "^1.33.0",
         "@neondatabase/serverless": "^1.1.0",
         "@supabase/ssr": "^0.8.0",
@@ -69,6 +70,22 @@
         "zod": "^3.25.76 || ^4.1.8"
       }
     },
+    "node_modules/@ai-sdk/openai": {
+      "version": "3.0.67",
+      "resolved": "https://registry.npmjs.org/@ai-sdk/openai/-/openai-3.0.67.tgz",
+      "integrity": "sha512-oAiGC9eWG7IgtdsdS74bOCnAAHarAfTJhWN9x5INwnWPekL802AvF+0I5DvLzIF1MIRmNw4N8mPSL/GUVbX9Mw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/provider": "3.0.10",
+        "@ai-sdk/provider-utils": "4.0.27"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.76 || ^4.1.8"
+      }
+    },
     "node_modules/@ai-sdk/provider": {
       "version": "3.0.10",
       "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-3.0.10.tgz",
diff --git a/package.json b/package.json
index 3e11f98..cccf41d 100644
--- a/package.json
+++ b/package.json
@@ -6,7 +6,7 @@
   "scripts": {
     "dev": "concurrently \"npm run dev:web\" \"npm run dev:ml\"",
     "dev:web": "next dev",
-    "dev:ml": "uvicorn api:app --reload --host 0.0.0.0 --port 8000",
+    "dev:ml": "uvicorn services.ml_api.main:app --reload --host 0.0.0.0 --port 8000",
     "build": "next build",
     "start": "next start",
     "lint": "eslint .",
@@ -25,6 +25,7 @@
   "homepage": "https://github.com/f1shyfang/lyra_hackathon#readme",
   "dependencies": {
     "@ai-sdk/google": "^3.0.80",
+    "@ai-sdk/openai": "^3.0.67",
     "@google/genai": "^1.33.0",
     "@neondatabase/serverless": "^1.1.0",
     "@supabase/ssr": "^0.8.0",
diff --git a/prediction_service.py b/prediction_service.py
deleted file mode 100644
index d5bc72b..0000000
--- a/prediction_service.py
+++ /dev/null
@@ -1,329 +0,0 @@
-"""
-LinkedIn PR Sentiment Prediction Service
-
-This module provides feature extraction and prediction functionality for
-classifying LinkedIn posts as positive or negative PR using Gemini embeddings
-and XGBoost classifier.
-"""
-
-import logging
-import os
-import re
-import numpy as np
-import pandas as pd
-from datetime import datetime
-from typing import Dict, Optional, List
-import google.generativeai as genai
-import joblib
-
-logger = logging.getLogger("pr_api.prediction_service")
-
-
-class PRClassifierService:
-    """Service for predicting PR sentiment of LinkedIn posts"""
-    
-    def __init__(self, model_dir: str = "output", api_key: Optional[str] = None):
-        """
-        Initialize the PR classifier service
-        
-        Args:
-            model_dir: Directory containing the model files
-            api_key: Gemini API key (if not provided, uses GEMINI_API_KEY env var)
-        """
-        self.model_dir = model_dir
-        self.api_key = api_key or os.getenv("GEMINI_API_KEY")
-        
-        if not self.api_key:
-            raise ValueError("GEMINI_API_KEY must be provided or set as environment variable")
-        
-        # Configure Gemini API
-        genai.configure(api_key=self.api_key)
-        
-        # Load model and preprocessors
-        self._load_models()
-        
-        # Define metadata features (must match training order)
-        self.metadata_features = [
-            'text_length', 'emoji_count', 'url_count', 'hashtag_count', 'mention_count',
-            'post_hour', 'post_day_of_week', 'post_month',
-            'has_media', 'media_count', 'media_type_encoded', 'post_type_encoded',
-            'author_follower_count',
-            'avg_sentiment', 'median_sentiment', 'num_comments_analyzed'
-        ]
-    
-    def _load_models(self):
-        """Load trained model and preprocessing objects"""
-        try:
-            self.model = joblib.load(os.path.join(self.model_dir, "pr_classifier_model.pkl"))
-            self.scaler = joblib.load(os.path.join(self.model_dir, "feature_scaler.pkl"))
-            self.post_type_encoder = joblib.load(os.path.join(self.model_dir, "post_type_encoder.pkl"))
-            self.media_type_encoder = joblib.load(os.path.join(self.model_dir, "media_type_encoder.pkl"))
-            
-            # Try to load PCA if it exists (optional)
-            pca_path = os.path.join(self.model_dir, "pca_reducer.pkl")
-            if os.path.exists(pca_path):
-                self.pca = joblib.load(pca_path)
-                logger.info("PCA reducer loaded")
-            else:
-                self.pca = None
-                logger.info("No PCA reducer found (using full embeddings)")
-
-            logger.info("Model and preprocessors loaded successfully")
-
-        except Exception as e:
-            raise RuntimeError(f"Failed to load model files from {self.model_dir}: {e}")
-    
-    def get_gemini_embedding(self, text: str, task_type: str = "RETRIEVAL_DOCUMENT") -> Optional[List[float]]:
-        """
-        Generate embedding for text using Gemini API
-        
-        Args:
-            text: Input text to embed
-            task_type: Type of embedding task
-            
-        Returns:
-            768-dimensional embedding vector or None if failed
-        """
-        try:
-            if not text or pd.isna(text):
-                return None
-            
-            result = genai.embed_content(
-                model="models/embedding-001",
-                content=str(text),
-                task_type=task_type
-            )
-            return result['embedding']
-
-        except Exception as e:
-            logger.error("Error generating embedding: %s", e)
-            return None
-    
-    def count_emojis(self, text: str) -> int:
-        """Count emoji characters in text"""
-        if pd.isna(text) or not text:
-            return 0
-        
-        emoji_pattern = re.compile("["
-            u"\U0001F600-\U0001F64F"  # emoticons
-            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-            u"\U0001F680-\U0001F6FF"  # transport & map symbols
-            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-            u"\U00002702-\U000027B0"
-            u"\U000024C2-\U0001F251"
-            "]+", flags=re.UNICODE)
-        
-        return len(emoji_pattern.findall(text))
-    
-    def count_urls(self, text: str) -> int:
-        """Count URLs in text"""
-        if pd.isna(text) or not text:
-            return 0
-        
-        url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
-        return len(url_pattern.findall(str(text)))
-    
-    def count_hashtags(self, text: str) -> int:
-        """Count hashtags in text"""
-        if pd.isna(text) or not text:
-            return 0
-        
-        return len(re.findall(r'#\w+', str(text)))
-    
-    def count_mentions(self, text: str) -> int:
-        """Count @ mentions in text"""
-        if pd.isna(text) or not text:
-            return 0
-        
-        return len(re.findall(r'@\w+', str(text)))
-    
-    def extract_metadata_features(
-        self,
-        text: str,
-        post_hour: int = 12,
-        post_day_of_week: int = 2,
-        post_month: int = 1,
-        has_media: int = 0,
-        media_count: int = 0,
-        media_type: str = "none",
-        post_type: str = "regular",
-        author_follower_count: int = 1000,
-        avg_sentiment: float = 0.0,
-        median_sentiment: float = 0.0,
-        num_comments_analyzed: int = 0
-    ) -> np.ndarray:
-        """
-        Extract metadata features from post
-        
-        Args:
-            text: Post text content
-            post_hour: Hour of posting (0-23), default 12
-            post_day_of_week: Day of week (0=Mon, 6=Sun), default 2 (Wednesday)
-            post_month: Month (1-12), default 1
-            has_media: Binary flag for media presence, default 0
-            media_count: Number of media items, default 0
-            media_type: Type of media (none/image/video/etc), default "none"
-            post_type: Type of post (regular/article/etc), default "regular"
-            author_follower_count: Follower count, default 1000
-            avg_sentiment: Average comment sentiment, default 0
-            median_sentiment: Median comment sentiment, default 0
-            num_comments_analyzed: Number of comments, default 0
-            
-        Returns:
-            Array of 16 metadata features
-        """
-        # Text-based features
-        text_length = len(str(text)) if text else 0
-        emoji_count = self.count_emojis(text)
-        url_count = self.count_urls(text)
-        hashtag_count = self.count_hashtags(text)
-        mention_count = self.count_mentions(text)
-        
-        # Encode categorical features
-        try:
-            # Handle unknown categories gracefully
-            if post_type not in self.post_type_encoder.classes_:
-                post_type = "regular"
-            post_type_encoded = self.post_type_encoder.transform([post_type])[0]
-        except:
-            post_type_encoded = 0
-        
-        try:
-            if media_type not in self.media_type_encoder.classes_:
-                media_type = "none"
-            media_type_encoded = self.media_type_encoder.transform([media_type])[0]
-        except:
-            media_type_encoded = 0
-        
-        # Combine all features in correct order
-        features = np.array([
-            text_length,
-            emoji_count,
-            url_count,
-            hashtag_count,
-            mention_count,
-            post_hour,
-            post_day_of_week,
-            post_month,
-            has_media,
-            media_count,
-            media_type_encoded,
-            post_type_encoded,
-            author_follower_count,
-            avg_sentiment,
-            median_sentiment,
-            num_comments_analyzed
-        ], dtype=float)
-        
-        return features
-    
-    def predict(
-        self,
-        text: str,
-        post_hour: int = 12,
-        post_day_of_week: int = 2,
-        post_month: int = 1,
-        has_media: int = 0,
-        media_count: int = 0,
-        media_type: str = "none",
-        post_type: str = "regular",
-        author_follower_count: int = 1000,
-        avg_sentiment: float = 0.0,
-        median_sentiment: float = 0.0,
-        num_comments_analyzed: int = 0
-    ) -> Dict:
-        """
-        Predict PR sentiment for a LinkedIn post
-        
-        Args:
-            text: Post text content (required)
-            Other args: Optional metadata features (see extract_metadata_features)
-            
-        Returns:
-            Dictionary containing:
-                - prediction: "positive" or "negative"
-                - confidence: Confidence score (0-1)
-                - probabilities: Dict with positive/negative probabilities
-                - features_extracted: Dict with extracted feature counts
-        """
-        if not text:
-            raise ValueError("Text cannot be empty")
-        
-        # 1. Generate embedding
-        embedding = self.get_gemini_embedding(text)
-        if embedding is None:
-            raise RuntimeError("Failed to generate embedding from Gemini API")
-        
-        embedding_array = np.array(embedding).reshape(1, -1)
-        
-        # 2. Apply PCA if available
-        if self.pca is not None:
-            embedding_array = self.pca.transform(embedding_array)
-        
-        # 3. Extract metadata features
-        metadata_features = self.extract_metadata_features(
-            text=text,
-            post_hour=post_hour,
-            post_day_of_week=post_day_of_week,
-            post_month=post_month,
-            has_media=has_media,
-            media_count=media_count,
-            media_type=media_type,
-            post_type=post_type,
-            author_follower_count=author_follower_count,
-            avg_sentiment=avg_sentiment,
-            median_sentiment=median_sentiment,
-            num_comments_analyzed=num_comments_analyzed
-        ).reshape(1, -1)
-        
-        # 4. Combine features (embeddings first, then metadata)
-        combined_features = np.concatenate([embedding_array, metadata_features], axis=1)
-        
-        # 5. Scale features
-        scaled_features = self.scaler.transform(combined_features)
-        
-        # 6. Make prediction
-        prediction = self.model.predict(scaled_features)[0]
-        probabilities = self.model.predict_proba(scaled_features)[0]
-        
-        # 7. Format response
-        result = {
-            "prediction": "positive" if prediction == 1 else "negative",
-            "confidence": float(max(probabilities)),
-            "probabilities": {
-                "negative": float(probabilities[0]),
-                "positive": float(probabilities[1])
-            },
-            "features_extracted": {
-                "text_length": int(metadata_features[0, 0]),
-                "emoji_count": int(metadata_features[0, 1]),
-                "url_count": int(metadata_features[0, 2]),
-                "hashtag_count": int(metadata_features[0, 3]),
-                "mention_count": int(metadata_features[0, 4]),
-                "embedding_dimension": embedding_array.shape[1]
-            }
-        }
-        
-        return result
-
-
-# Example usage
-if __name__ == "__main__":
-    # Test the service
-    service = PRClassifierService()
-    
-    test_text = """
-    Exciting news! We're launching our new product that will revolutionize 
-    the industry. Join us for the launch event! #Innovation #TechNews
-    """
-    
-    result = service.predict(test_text)
-    print("\nPrediction Result:")
-    print(f"  Prediction: {result['prediction']}")
-    print(f"  Confidence: {result['confidence']:.2%}")
-    print(f"  Probabilities: {result['probabilities']}")
-    print(f"  Features: {result['features_extracted']}")
-
-
-
-
diff --git a/render.yaml b/render.yaml
index 23d8ca9..f7a3298 100644
--- a/render.yaml
+++ b/render.yaml
@@ -1,24 +1,24 @@
-# Render Blueprint for the LinkedIn PR Sentiment Classifier API
+# Render Blueprint for the Lyra ML API (services/ml_api).
 # Docs: https://render.com/docs/blueprint-spec
 #
 # Deploy: push this repo to GitHub, then in the Render dashboard choose
 # "New +" -> "Blueprint" and point it at the repo. Render reads this file.
 #
-# The trained model artifacts in output/ are committed to the repo, so no
-# external object storage is required at deploy time.
+# This is the provider-free TF-IDF recruiting-signal engine. Trained model
+# artifacts in output/models/ are committed to the repo, so no external object
+# storage or AI provider key is required at deploy time.
 
 services:
   - type: web
-    name: pr-sentiment-api
+    name: lyra-ml-api
     runtime: python
     region: oregon
     plan: free               # always-free instance; sleeps after ~15 min idle (≈50s cold start on next request)
     branch: main
-    buildCommand: pip install --no-cache-dir -r requirements_api.txt
+    buildCommand: pip install --no-cache-dir -r services/ml_api/requirements.txt
     # gunicorn manages the process; UvicornWorker runs the ASGI app.
-    # --timeout 120 leaves headroom for the synchronous Gemini embedding call.
     startCommand: >-
-      gunicorn api:app
+      gunicorn services.ml_api.main:app
       --worker-class uvicorn.workers.UvicornWorker
       --bind 0.0.0.0:$PORT
       --workers ${WEB_CONCURRENCY:-1}
@@ -28,20 +28,16 @@ services:
     healthCheckPath: /health
     autoDeploy: true
     envVars:
-      # Required — set the value in the Render dashboard (sync: false keeps the
-      # secret out of this file and out of git).
-      - key: GEMINI_API_KEY
-        sync: false
+      # Directory holding the trained TF-IDF model artifacts (output/models).
       - key: MODEL_DIR
-        value: output
-      # Lock to a single worker: the model + ML libs are memory-heavy and one
-      # worker fits comfortably on small instances. Raise once you size memory.
+        value: output/models
+      # The TF-IDF models load into each worker; one worker is plenty here.
       - key: WEB_CONCURRENCY
         value: "1"
-      # CORS: replace "*" with your frontend origin(s), comma-separated, in prod.
-      - key: ALLOWED_ORIGINS
-        value: "*"
-      - key: LOG_LEVEL
-        value: INFO
+      # CORS: the Next.js frontend origin allowed to call this API directly.
+      # (The app normally calls it server-side via app/api/analyze, so this
+      #  mainly matters if the browser hits it directly.)
+      - key: FRONTEND_ORIGIN
+        value: "http://localhost:3000"
       - key: PYTHON_VERSION
         value: "3.12.3"
diff --git a/requirements_api.txt b/requirements_api.txt
deleted file mode 100644
index 94aaf8a..0000000
--- a/requirements_api.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# FastAPI PR Sentiment Classifier - Dependencies
-
-# Web Framework
-fastapi==0.109.0
-uvicorn[standard]==0.27.0
-pydantic==2.5.3
-
-# Machine Learning
-# NOTE: scikit-learn must match the version used to pickle the artifacts in
-# output/ (saved with 1.6.1). A mismatch makes sklearn silently risk
-# "invalid results" on unpickle, so this pin is load-bearing.
-xgboost==2.0.3
-scikit-learn==1.6.1
-numpy==1.26.3
-pandas==2.2.0
-
-# Google Gemini API
-google-generativeai==0.3.2
-
-# Utilities
-python-dotenv==1.0.0
-joblib==1.3.2
-
-# Production server (Render)
-gunicorn==21.2.0
-python-multipart==0.0.6
-
-# Rate limiting (per-client IP). Built on starlette + the `limits` library;
-# compatible with the pinned fastapi==0.109.0 / starlette 0.35.x above.
-slowapi==0.1.9