From c5a7e84f0564bd6b63963dc928311c449a84c0f0 Mon Sep 17 00:00:00 2001 From: Michael Feng Date: Sat, 30 May 2026 19:50:23 +1000 Subject: [PATCH] refactor: switch AI to OpenAI; drop Gemini /predict backend - Default the Vercel AI SDK provider to OpenAI (openai/gpt-4o-mini) via lib/ai/provider.ts getModel(); add @ai-sdk/openai direct-key path (OPENAI_API_KEY), keep AI Gateway + google/ fallback. Make the persona client and /api/gemini route provider-agnostic (no hardcoded Gemini ids). - Drop the Gemini-embedding /predict service entirely (it required a Gemini key and a model trained on Gemini embeddings): remove api.py, prediction_service.py, requirements_api.txt, Procfile, app/api/predict, and the orphaned app/sentiment-analyzer page. - Make services/ml_api (provider-free TF-IDF engine, /analyze) the deployed backend: repoint render.yaml and dev:ml to services.ml_api.main:app. - Update env.example and docs (README, RENDER_DEPLOY, API_README) accordingly. Co-Authored-By: Claude Opus 4.8 (1M context) --- Procfile | 1 - README.md | 50 ++-- RENDER_DEPLOY.md | 88 +++--- api.py | 331 ---------------------- app/api/gemini/route.ts | 6 +- app/api/predict/route.ts | 60 ---- app/sentiment-analyzer/layout.tsx | 19 -- app/sentiment-analyzer/page.tsx | 453 ------------------------------ docs/API_README.md | 355 ++++++++++------------- env.example | 73 ++--- lib/ai/provider.ts | 47 ++-- lib/google-ai/client.ts | 30 +- package-lock.json | 17 ++ package.json | 3 +- prediction_service.py | 329 ---------------------- render.yaml | 34 +-- requirements_api.txt | 30 -- 17 files changed, 311 insertions(+), 1615 deletions(-) delete mode 100644 Procfile delete mode 100644 api.py delete mode 100644 app/api/predict/route.ts delete mode 100644 app/sentiment-analyzer/layout.tsx delete mode 100644 app/sentiment-analyzer/page.tsx delete mode 100644 prediction_service.py delete mode 100644 requirements_api.txt diff --git a/Procfile b/Procfile deleted file mode 100644 index c8de277..0000000 --- a/Procfile +++ /dev/null @@ -1 +0,0 @@ -web: gunicorn api:app --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT --workers ${WEB_CONCURRENCY:-1} --timeout 120 --access-logfile - --error-logfile - diff --git a/README.md b/README.md index 410ee2f..2b6c274 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,14 @@ This project analyzes LinkedIn posts to classify them as generating positive or ## ๐Ÿ—๏ธ Architecture +> **Note:** the sections below describe the original research/training workflow +> in `attempt2.ipynb` (Gemini embeddings + XGBoost). The **deployed runtime +> backend** is now `services/ml_api` โ€” a provider-free TF-IDF recruiting-signal +> engine that needs no AI provider key. See `docs/API_README.md` for its +> endpoints (`/health`, `/analyze`, `/analyze/compare`, `/history`) and +> `RENDER_DEPLOY.md` for deployment. The Next.js app reaches it via the +> same-origin proxy `app/api/analyze` (`ML_API_URL`). + ``` LinkedIn Posts โ†’ Label Generation โ†’ Feature Engineering โ†’ Model Training โ†’ Prediction โ†“ (VADER + Engagement) โ†“ โ†“ @@ -214,26 +222,30 @@ Supabase schema for logging requests/responses: `docs/supabase.sql` (table `anal ## AI provider abstraction (Vercel AI SDK) -The Next.js AI layer uses the **Vercel AI SDK** (`ai` + `@ai-sdk/google` + `zod`), -so the model provider is swappable. `lib/ai/provider.ts` exposes `getModel()`, -which resolves a model from the `AI_MODEL` env var (default -`google/gemini-2.0-flash`). +The Next.js AI layer uses the **Vercel AI SDK** (`ai` + `@ai-sdk/openai` + +`@ai-sdk/google` + `zod`), so the model provider is swappable. `lib/ai/provider.ts` +exposes `getModel()`, which resolves a model from the `AI_MODEL` env var +(default `openai/gpt-4o-mini`). The persona-critique / variant-eval client at +`lib/google-ai/client.ts` (legacy dir name, now provider-agnostic) uses this +resolver with `generateObject` + zod. How a model is resolved: - If `AI_GATEWAY_API_KEY` is set (or, on Vercel, OIDC enables the Gateway), the `provider/model` string is routed through the **Vercel AI Gateway**, which adds failover and cost tracking. -- Otherwise it falls back to the `@ai-sdk/google` provider using a direct key. - The key is read from `GOOGLE_GENERATIVE_AI_API_KEY` (preferred), with - `GEMINI_API_KEY` as a fallback. +- Otherwise it falls back to a direct provider key. For the default + `openai/...` model the key is read from `OPENAI_API_KEY` (via `@ai-sdk/openai`). + A `google/...` model still works via `@ai-sdk/google`, reading + `GOOGLE_GENERATIVE_AI_API_KEY` (preferred), with `GEMINI_API_KEY` as a fallback. -**Switching providers** is a one-line change: set `AI_MODEL` (e.g. -`openai/gpt-4o-mini`) and supply the relevant provider key or use the Gateway. +**Switching providers/models** is a one-line change: set `AI_MODEL` and supply the +matching provider key (or use the Gateway). ``` -AI_MODEL=google/gemini-2.0-flash # default -AI_GATEWAY_API_KEY=... # optional: route via Vercel AI Gateway -GOOGLE_GENERATIVE_AI_API_KEY=... # direct Google key (GEMINI_API_KEY is a fallback) +AI_MODEL=openai/gpt-4o-mini # default +OPENAI_API_KEY=... # direct OpenAI key (used by the default model) +AI_GATEWAY_API_KEY=... # optional: route any provider/model via Vercel AI Gateway +GOOGLE_GENERATIVE_AI_API_KEY=... # only if you switch AI_MODEL to google/... (GEMINI_API_KEY is a fallback) ``` ## Rate limiting @@ -241,8 +253,8 @@ GOOGLE_GENERATIVE_AI_API_KEY=... # direct Google key (GEMINI_API_KEY is a Two independent limiters protect the public surface: **Next.js inbound limiter** (`lib/ratelimit.ts`) โ€” an in-memory, per-client-IP -limiter applied to all public POST routes: `/api/gemini`, `/api/predict`, -`/api/analyze`, `/api/analyze-with-images`, `/api/ab-tests`, `/api/personas`, +limiter applied to the public POST routes, including `/api/analyze`, +`/api/gemini`, `/api/analyze-with-images`, `/api/ab-tests`, `/api/personas`, and `/api/drafts`. Over-limit requests get a `429` with a `Retry-After` header. Configure with: ``` @@ -256,10 +268,6 @@ There is also a separate **outbound** throttle on calls to the AI provider, configured with `GEMINI_RATE_LIMIT_MAX_REQUESTS` (default `15`) and `GEMINI_RATE_LIMIT_WINDOW_MS` (default `60000`). -**FastAPI `/predict` limiter** (`api.py`) โ€” a `slowapi` per-IP limit on -`POST /predict`, returning `429` when exceeded. Configure with -`RATE_LIMIT_PREDICT` (default `30/minute`). -> Caveat: the default store is in-memory **per gunicorn worker**, so with N -> workers the effective global limit is ~Nร— the configured value. Set -> `RATELIMIT_STORAGE_URI` (e.g. `redis://host:6379/0`) for a consistent global -> limit. See `RENDER_DEPLOY.md` for deployment details. +The `services/ml_api` backend has no built-in limiter of its own; it is reached +only through the Next.js proxy (`app/api/analyze`), so the inbound limiter above +covers it. See `RENDER_DEPLOY.md` for deployment details. diff --git a/RENDER_DEPLOY.md b/RENDER_DEPLOY.md index 0677b2c..9d710bf 100644 --- a/RENDER_DEPLOY.md +++ b/RENDER_DEPLOY.md @@ -1,55 +1,58 @@ -# Deploying the PR Sentiment API to Render +# Deploying the Lyra ML API to Render -This is the FastAPI backend (`api.py` + `prediction_service.py`) that serves the -trained LinkedIn PR sentiment classifier. The trained artifacts live in -`output/` and are committed to the repo, so no external storage is needed. +This is the FastAPI backend (`services/ml_api`) โ€” a **provider-free TF-IDF +recruiting-signal engine**. It needs no AI provider key and no external database: +the trained model artifacts live in `output/models/` (committed to the repo) and +request/response logging uses local sqlite. ## What ships | File | Purpose | |------|---------| | `render.yaml` | Render Blueprint โ€” defines the web service, build/start commands, health check, env vars. | -| `Procfile` | Same start command, for non-Blueprint / generic buildpack deploys. | -| `runtime.txt` | Pins Python 3.12.3. | -| `requirements_api.txt` | Python deps (scikit-learn pinned to **1.6.1** to match the pickled model). | -| `output/*.pkl`, `output/*.npy` | Trained model + scaler + encoders. | +| `services/ml_api/requirements.txt` | Python deps for the API. | +| `output/models/*` | Trained TF-IDF models + metadata (`metadata.json`, `*.joblib`, `train_tfidf_matrix.npz`, โ€ฆ). | + +The Blueprint provisions a single web service named **`lyra-ml-api`** on the +Render **free** plan, running: + +``` +gunicorn services.ml_api.main:app -k uvicorn.workers.UvicornWorker +``` + +(bound to `$PORT`, `${WEB_CONCURRENCY:-1}` workers, `/health` health check). ## One-time setup -1. Push this branch to GitHub. +1. Push this repo to GitHub. 2. In Render: **New +** โ†’ **Blueprint** โ†’ select the repo. Render reads `render.yaml`. -3. Set the **`GEMINI_API_KEY`** secret in the dashboard (it's `sync: false`, so it is - never stored in the repo). Get a key at https://aistudio.google.com/app/apikey. -4. (Recommended) Set **`ALLOWED_ORIGINS`** to your frontend origin(s), - comma-separated, instead of `*`. -5. Deploy. Render runs the health check against `/health`; the service only - reports healthy once the model has loaded. +3. (Recommended) Set **`FRONTEND_ORIGIN`** to your deployed frontend origin so the + browser can reach the API directly if needed (the app normally calls it + server-side via `app/api/analyze`, so CORS rarely matters in production). +4. Deploy. Render runs the health check against `/health`; the service only + reports healthy once the models have loaded. + +> No secrets are required โ€” there is no AI provider key for this service. ## Environment variables | Var | Required | Default | Notes | |-----|----------|---------|-------| -| `GEMINI_API_KEY` | โœ… | โ€” | App refuses to start without it (fail-fast). | -| `MODEL_DIR` | | `output` | Directory holding the `.pkl`/`.npy` artifacts. | -| `ALLOWED_ORIGINS` | | `*` | Comma-separated origins. With `*`, credentials are disabled (CORS spec). | -| `WEB_CONCURRENCY` | | `1` | gunicorn workers. Each worker loads the model โ€” raise only after checking memory. | -| `LOG_LEVEL` | | `INFO` | | -| `RATE_LIMIT_PREDICT` | | `30/minute` | Per-IP limit on `POST /predict` (slowapi/limits syntax, e.g. `100/hour`, `5/second`). Over-limit requests get a 429. | -| `RATELIMIT_STORAGE_URI` | | (in-memory) | Shared rate-limit store, e.g. `redis://host:6379/0`. Without it the store is per-process โ€” see caveat below. | +| `MODEL_DIR` | | `output/models` | Directory holding the trained TF-IDF artifacts. | +| `FRONTEND_ORIGIN` | | `http://localhost:3000` | Frontend origin allowed by CORS. Mainly matters if the browser hits the API directly. | +| `WEB_CONCURRENCY` | | `1` | gunicorn workers. Each worker loads the models โ€” raise only after checking memory. | | `PORT` | (Render-injected) | `8000` | Bound automatically by the start command. | - -> **Rate-limit caveat:** the default store is in-memory **per gunicorn worker**, so -> with `WEB_CONCURRENCY` = N the effective global limit is roughly Nร— the -> configured `RATE_LIMIT_PREDICT`. Set `RATELIMIT_STORAGE_URI` to a shared Redis -> instance for a single, consistent global limit across all workers and instances. +| `PYTHON_VERSION` | | `3.12.3` | Pins the Python runtime for the build. | ## Verify after deploy ```bash curl https://.onrender.com/health -curl -X POST https://.onrender.com/predict \ +# -> {"status":"ok","models_loaded":true} + +curl -X POST https://.onrender.com/analyze \ -H 'Content-Type: application/json' \ - -d '{"text":"Excited to announce our new platform! #AI","has_media":1,"media_count":1}' + -d '{"post_text":"We are scaling our AI team fast. Expect late nights but huge impact."}' ``` Interactive docs: `https://.onrender.com/docs` @@ -58,19 +61,19 @@ Interactive docs: `https://.onrender.com/docs` ```bash python -m venv venv && source venv/bin/activate -pip install -r requirements_api.txt -export GEMINI_API_KEY=your-key -python api.py # dev server on :8000 (set PORT/RELOAD to override) +pip install -r services/ml_api/requirements.txt +# dev server on :8000 (reads PORT/MODEL_DIR from env) +python -m services.ml_api.main # or, mirror production: -gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 +gunicorn services.ml_api.main:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 ``` ## Wiring the Next.js frontend The frontend never calls the FastAPI service directly. Instead: -- `app/sentiment-analyzer/page.tsx` POSTs to the same-origin route `/api/predict`. -- `app/api/predict/route.ts` forwards the request to `${ML_API_URL}/predict`. +- The app POSTs to the same-origin route `app/api/analyze`. +- `app/api/analyze` forwards the request to `${ML_API_URL}/analyze`. So you only set **one** env var on the Next.js host (e.g. Vercel): @@ -79,16 +82,5 @@ ML_API_URL=https://.onrender.com ``` Locally, `ML_API_URL` defaults to `http://localhost:8000`. Run both together -with `npm run dev` (starts `next dev` + `uvicorn api:app` via `concurrently`), -which also needs `GEMINI_API_KEY` exported for the Python side. - -## Model caveat (read before demoing) - -The model currently in `output/` is the **full-embedding (768-dim) classifier** -โ€” the PCA/regularization fixes described in `FIXES_APPLIED.md` were *documented -but never saved* (`pca_reducer.pkl` is absent, and the saved model reports 784 -input features = 768 embeddings + 16 metadata). It therefore still carries the -documented overfitting (~84% train / ~45% test). The serving pipeline is -correct and dimensionally consistent; if you re-run the notebook to actually -apply PCA, save `pca_reducer.pkl` into `output/` and the service will pick it up -automatically (it already branches on the file's presence). +with `npm run dev` (starts `next dev` + `uvicorn services.ml_api.main:app` via +`concurrently`). diff --git a/api.py b/api.py deleted file mode 100644 index 8d054de..0000000 --- a/api.py +++ /dev/null @@ -1,331 +0,0 @@ -""" -FastAPI Server for LinkedIn PR Sentiment Classification - -This API provides endpoints for predicting whether a LinkedIn post will -generate positive or negative PR using machine learning. -""" - -import logging -import os -from contextlib import asynccontextmanager -from datetime import datetime, timezone -from typing import Dict, Optional - -import uvicorn -from fastapi import FastAPI, HTTPException, Request, status -from fastapi.concurrency import run_in_threadpool -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel, Field, field_validator -from slowapi import Limiter, _rate_limit_exceeded_handler -from slowapi.errors import RateLimitExceeded -from slowapi.middleware import SlowAPIMiddleware -from slowapi.util import get_remote_address - -from prediction_service import PRClassifierService - -logging.basicConfig( - level=os.getenv("LOG_LEVEL", "INFO").upper(), - format="%(asctime)s %(levelname)s %(name)s: %(message)s", -) -logger = logging.getLogger("pr_api") - - -# --- Rate limiting (slowapi) ------------------------------------------------- -# Per-client inbound rate limiting keyed by remote IP address. Mainly guards -# the expensive /predict endpoint (Gemini embedding + XGBoost inference). -# -# RATE_LIMIT_PREDICT controls the /predict limit and is env-configurable so it -# can be tuned without a code change (slowapi/limits syntax, e.g. "30/minute", -# "100/hour", "5/second"). -# -# NOTE: the default storage is in-memory and therefore PER-PROCESS. Under -# gunicorn with multiple UvicornWorkers (or multiple Render instances), each -# worker keeps its own counter, so the effective limit is multiplied by the -# worker/instance count. For a globally consistent limit across workers/ -# instances, point RATELIMIT_STORAGE_URI at a shared store such as Redis -# (e.g. "redis://host:6379/0"), which is passed through to the Limiter below. -RATE_LIMIT_PREDICT = os.getenv("RATE_LIMIT_PREDICT", "30/minute") -_RATELIMIT_STORAGE_URI = os.getenv("RATELIMIT_STORAGE_URI") # optional shared store - -limiter = Limiter( - key_func=get_remote_address, - storage_uri=_RATELIMIT_STORAGE_URI, # None -> in-memory (per-process) default -) - - -# Global service instance (populated during the lifespan startup) -prediction_service: Optional[PRClassifierService] = None - - -@asynccontextmanager -async def lifespan(app: FastAPI): - """Initialize the prediction service once, before the app serves traffic.""" - global prediction_service - - model_dir = os.getenv("MODEL_DIR", "output") - api_key = os.getenv("GEMINI_API_KEY") - - if not api_key: - # Fail fast: without the key the service can never produce a prediction, - # so we don't want the platform to report a "healthy" deploy. - raise RuntimeError( - "GEMINI_API_KEY environment variable not set. " - "Set it in the Render dashboard (or your local environment) " - "before starting the server." - ) - - try: - prediction_service = PRClassifierService(model_dir=model_dir, api_key=api_key) - logger.info("Prediction service initialized with model from: %s", model_dir) - except Exception: - logger.exception("Failed to initialize prediction service") - raise - - yield - - prediction_service = None - - -# Initialize FastAPI app -app = FastAPI( - title="LinkedIn PR Sentiment Classifier API", - description="Predict PR sentiment (positive/negative) for LinkedIn posts using AI", - version="1.0.0", - docs_url="/docs", - redoc_url="/redoc", - lifespan=lifespan, -) - -# CORS: configurable via ALLOWED_ORIGINS (comma-separated). Defaults to "*". -# The CORS spec forbids combining a wildcard origin with credentials, so we only -# enable credentials when explicit origins are listed. -_origins_env = os.getenv("ALLOWED_ORIGINS", "*").strip() -if _origins_env == "*": - _allow_origins = ["*"] - _allow_credentials = False -else: - _allow_origins = [o.strip() for o in _origins_env.split(",") if o.strip()] - _allow_credentials = True - -app.add_middleware( - CORSMiddleware, - allow_origins=_allow_origins, - allow_credentials=_allow_credentials, - allow_methods=["*"], - allow_headers=["*"], -) - -# Wire up rate limiting: register the limiter on app state (slowapi looks it up -# there), install the middleware, and return HTTP 429 when a limit is exceeded. -app.state.limiter = limiter -app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) -app.add_middleware(SlowAPIMiddleware) - - -def _utc_now() -> str: - return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") - - -# Pydantic models for request/response -class PredictionRequest(BaseModel): - """Request model for PR prediction""" - text: str = Field(..., description="LinkedIn post text content", min_length=1, max_length=10000) - - # Optional metadata fields - post_hour: Optional[int] = Field(12, ge=0, le=23, description="Hour of posting (0-23)") - post_day_of_week: Optional[int] = Field(2, ge=0, le=6, description="Day of week (0=Monday, 6=Sunday)") - post_month: Optional[int] = Field(1, ge=1, le=12, description="Month (1-12)") - has_media: Optional[int] = Field(0, ge=0, le=1, description="Has media (0 or 1)") - media_count: Optional[int] = Field(0, ge=0, description="Number of media items") - media_type: Optional[str] = Field("none", description="Media type (none/image/video)") - post_type: Optional[str] = Field("regular", description="Post type (regular/article)") - author_follower_count: Optional[int] = Field(1000, ge=0, description="Author follower count") - avg_sentiment: Optional[float] = Field(0.0, ge=-1.0, le=1.0, description="Average comment sentiment") - median_sentiment: Optional[float] = Field(0.0, ge=-1.0, le=1.0, description="Median comment sentiment") - num_comments_analyzed: Optional[int] = Field(0, ge=0, description="Number of comments analyzed") - - @field_validator("text") - @classmethod - def text_not_empty(cls, v: str) -> str: - if not v or not v.strip(): - raise ValueError("Text cannot be empty or whitespace only") - return v.strip() - - model_config = { - "json_schema_extra": { - "example": { - "text": "Excited to announce our new AI-powered analytics platform! This will transform how businesses understand their customers. #AI #Innovation", - "post_hour": 14, - "post_day_of_week": 2, - "has_media": 1, - "media_count": 1, - } - } - } - - -class PredictionResponse(BaseModel): - """Response model for PR prediction""" - prediction: str = Field(..., description="Predicted sentiment: 'positive' or 'negative'") - confidence: float = Field(..., description="Confidence score (0-1)") - probabilities: Dict[str, float] = Field(..., description="Probability for each class") - features_extracted: Dict = Field(..., description="Extracted features from input") - timestamp: str = Field(..., description="Prediction timestamp") - - model_config = { - "json_schema_extra": { - "example": { - "prediction": "positive", - "confidence": 0.85, - "probabilities": {"negative": 0.15, "positive": 0.85}, - "features_extracted": { - "text_length": 152, - "emoji_count": 0, - "url_count": 0, - "hashtag_count": 2, - "mention_count": 0, - "embedding_dimension": 768, - }, - "timestamp": "2025-12-15T10:30:00Z", - } - } - } - - -class HealthResponse(BaseModel): - """Health check response""" - status: str - message: str - model_loaded: bool - timestamp: str - - -@app.get("/", tags=["General"]) -async def root(): - """Root endpoint with API information""" - return { - "name": "LinkedIn PR Sentiment Classifier API", - "version": "1.0.0", - "description": "Predict PR sentiment for LinkedIn posts", - "endpoints": { - "health": "/health", - "predict": "/predict (POST)", - "docs": "/docs", - }, - } - - -@app.get("/health", response_model=HealthResponse, tags=["General"]) -async def health_check(): - """Health check endpoint (used by Render's health checks)""" - model_loaded = prediction_service is not None - - return HealthResponse( - status="healthy" if model_loaded else "unhealthy", - message="Service is running" if model_loaded else "Model not loaded", - model_loaded=model_loaded, - timestamp=_utc_now(), - ) - - -@app.post("/predict", response_model=PredictionResponse, tags=["Prediction"]) -@limiter.limit(RATE_LIMIT_PREDICT) -async def predict_pr_sentiment(request: Request, payload: PredictionRequest): - """ - Predict PR sentiment for a LinkedIn post. - - Analyzes the provided post text and returns a prediction of whether it will - generate positive or negative PR, along with confidence scores and extracted - features. - """ - if prediction_service is None: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Prediction service not initialized", - ) - - try: - # The prediction makes a synchronous network call to the Gemini - # embedding API; run it off the event loop so concurrent requests - # aren't blocked. - result = await run_in_threadpool( - prediction_service.predict, - text=payload.text, - post_hour=payload.post_hour, - post_day_of_week=payload.post_day_of_week, - post_month=payload.post_month, - has_media=payload.has_media, - media_count=payload.media_count, - media_type=payload.media_type, - post_type=payload.post_type, - author_follower_count=payload.author_follower_count, - avg_sentiment=payload.avg_sentiment, - median_sentiment=payload.median_sentiment, - num_comments_analyzed=payload.num_comments_analyzed, - ) - - result["timestamp"] = _utc_now() - return PredictionResponse(**result) - - except ValueError as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=f"Invalid input: {str(e)}", - ) - except RuntimeError as e: - logger.error("Prediction failed: %s", e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Prediction failed: {str(e)}", - ) - except Exception as e: - logger.exception("Unexpected error during prediction") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Unexpected error: {str(e)}", - ) - - -@app.get("/model-info", tags=["General"]) -async def model_info(): - """Get information about the loaded model""" - if prediction_service is None: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Prediction service not initialized", - ) - - embedding_dim = 30 if prediction_service.pca else 768 - return { - "model_directory": prediction_service.model_dir, - "pca_enabled": prediction_service.pca is not None, - "embedding_model": "Gemini models/embedding-001", - "classifier_model": "XGBoost", - "features": { - "embedding_dimension": embedding_dim, - "metadata_features": len(prediction_service.metadata_features), - "total_features": embedding_dim + len(prediction_service.metadata_features), - }, - "encoders": { - "post_types": list(prediction_service.post_type_encoder.classes_), - "media_types": list(prediction_service.media_type_encoder.classes_), - }, - } - - -# Local development entrypoint. -# In production (Render) the app is served by gunicorn/uvicorn via the start -# command, which binds to $PORT โ€” see render.yaml. -if __name__ == "__main__": - if not os.getenv("GEMINI_API_KEY"): - print("โŒ Error: GEMINI_API_KEY environment variable not set") - print(" Please set it with: export GEMINI_API_KEY='your-api-key'") - raise SystemExit(1) - - uvicorn.run( - "api:app", - host=os.getenv("HOST", "0.0.0.0"), - port=int(os.getenv("PORT", "8000")), - reload=os.getenv("RELOAD", "false").lower() == "true", - log_level=os.getenv("LOG_LEVEL", "info").lower(), - ) diff --git a/app/api/gemini/route.ts b/app/api/gemini/route.ts index 9b054d3..cdab5af 100644 --- a/app/api/gemini/route.ts +++ b/app/api/gemini/route.ts @@ -1,6 +1,6 @@ import { generateText } from 'ai' import { NextRequest, NextResponse } from 'next/server' -import { getModel } from '@/lib/ai/provider' +import { getModel, DEFAULT_AI_MODEL } from '@/lib/ai/provider' import { withRateLimit } from '@/lib/google-ai/rate-limiter' import { enforceRateLimit } from '@/lib/ratelimit' @@ -30,7 +30,7 @@ export async function POST(request: NextRequest) { return NextResponse.json({ success: true, response: text, - model: model || process.env.AI_MODEL || 'google/gemini-2.0-flash', + model: model || process.env.AI_MODEL || DEFAULT_AI_MODEL, }) } catch (error: any) { console.error('Gemini API error:', error) @@ -48,6 +48,6 @@ export async function GET() { return NextResponse.json({ message: 'AI text generation endpoint (Vercel AI SDK)', usage: 'POST with { "prompt": "your prompt", "model": "optional provider/model string" }', - defaultModel: process.env.AI_MODEL || 'google/gemini-2.0-flash', + defaultModel: process.env.AI_MODEL || DEFAULT_AI_MODEL, }) } diff --git a/app/api/predict/route.ts b/app/api/predict/route.ts deleted file mode 100644 index b2a2690..0000000 --- a/app/api/predict/route.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { NextRequest, NextResponse } from 'next/server' -import { enforceRateLimit } from '@/lib/ratelimit' - -// Server-side proxy to the PR-sentiment FastAPI service (api.py /predict). -// Keeps the backend URL server-side (no CORS dependency from the browser) and -// lets us swap localhost for the deployed Render URL via one env var. -const ML_API_URL = process.env.ML_API_URL || 'http://localhost:8000' - -// The embedding step calls the Gemini API, so allow generous headroom. -const DEFAULT_TIMEOUT_MS = 30000 - -export async function POST(req: NextRequest) { - const limited = enforceRateLimit(req) - if (limited) return limited - - let body: { text?: string } & Record - try { - body = await req.json() - } catch { - return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) - } - - if (!body?.text || !String(body.text).trim()) { - return NextResponse.json({ error: 'text is required' }, { status: 400 }) - } - - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS) - - try { - const response = await fetch(`${ML_API_URL}/predict`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - signal: controller.signal, - }) - clearTimeout(timeout) - - const data = await response.json().catch(() => null) - - if (!response.ok) { - return NextResponse.json( - { - error: 'Upstream ML API error', - detail: data?.detail ?? response.statusText, - }, - { status: response.status === 422 ? 422 : 502 } - ) - } - - return NextResponse.json(data) - } catch (error: any) { - clearTimeout(timeout) - const isAbort = error?.name === 'AbortError' - return NextResponse.json( - { error: isAbort ? 'ML API timeout' : 'Failed to call ML API', detail: String(error) }, - { status: 502 } - ) - } -} diff --git a/app/sentiment-analyzer/layout.tsx b/app/sentiment-analyzer/layout.tsx deleted file mode 100644 index 1dc6eaf..0000000 --- a/app/sentiment-analyzer/layout.tsx +++ /dev/null @@ -1,19 +0,0 @@ -import type { Metadata } from "next"; - -export const metadata: Metadata = { - title: "LinkedIn PR Sentiment Analyzer - AI-Powered Post Analysis", - description: "Predict whether your LinkedIn post will generate positive or negative PR using AI-powered sentiment analysis with Gemini embeddings and XGBoost", - keywords: ["LinkedIn", "PR", "Sentiment Analysis", "AI", "Machine Learning", "XGBoost", "Gemini"], -}; - -export default function SentimentAnalyzerLayout({ - children, -}: { - children: React.ReactNode; -}) { - return children; -} - - - - diff --git a/app/sentiment-analyzer/page.tsx b/app/sentiment-analyzer/page.tsx deleted file mode 100644 index 18e4ff5..0000000 --- a/app/sentiment-analyzer/page.tsx +++ /dev/null @@ -1,453 +0,0 @@ -'use client'; - -import { useState } from 'react'; - -interface PredictionResult { - prediction: string; - confidence: number; - probabilities: { - positive: number; - negative: number; - }; - features_extracted: { - text_length: number; - emoji_count: number; - url_count: number; - hashtag_count: number; - mention_count: number; - embedding_dimension: number; - }; - timestamp: string; -} - -interface FormData { - text: string; - post_hour: number; - post_day_of_week: number; - post_month: number; - has_media: number; - media_count: number; - media_type: string; - post_type: string; - author_follower_count: number; -} - -// Calls the same-origin Next.js proxy (app/api/predict/route.ts), which forwards -// to the FastAPI service at ML_API_URL. Avoids hardcoding the backend URL and -// any browser CORS dependency. -const PREDICT_ENDPOINT = '/api/predict'; - -const examples = { - 1: { - text: "Excited to announce our new AI-powered analytics platform! This will transform how businesses understand their customers. Join us at the launch event next week! ๐Ÿš€ #AI #Innovation #TechNews", - has_media: 1, - media_count: 1, - media_type: "image" - }, - 2: { - text: "We deeply regret the service outage that affected our customers yesterday. We take full responsibility and are implementing measures to prevent this from happening again. Your trust is our priority.", - has_media: 0, - media_count: 0, - media_type: "none" - }, - 3: { - text: "Proud to share that our team won the Best Workplace Award 2024! This achievement reflects our commitment to creating an inclusive and innovative environment. Thank you to everyone who made this possible! ๐Ÿ†โœจ", - has_media: 1, - media_count: 2, - media_type: "image" - } -}; - -export default function SentimentAnalyzer() { - const [formData, setFormData] = useState({ - text: '', - post_hour: 12, - post_day_of_week: 2, - post_month: 1, - has_media: 0, - media_count: 0, - media_type: 'none', - post_type: 'regular', - author_follower_count: 1000 - }); - - const [result, setResult] = useState(null); - const [loading, setLoading] = useState(false); - const [error, setError] = useState(null); - const [showAdvanced, setShowAdvanced] = useState(false); - - const loadExample = (num: 1 | 2 | 3) => { - const example = examples[num]; - setFormData(prev => ({ - ...prev, - text: example.text, - has_media: example.has_media, - media_count: example.media_count, - media_type: example.media_type - })); - }; - - const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault(); - setLoading(true); - setError(null); - setResult(null); - - try { - const response = await fetch(PREDICT_ENDPOINT, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(formData) - }); - - if (!response.ok) { - const errorData = await response.json(); - throw new Error(errorData.detail || 'Prediction failed'); - } - - const data: PredictionResult = await response.json(); - setResult(data); - } catch (err) { - setError(err instanceof Error ? err.message : 'An error occurred'); - } finally { - setLoading(false); - } - }; - - return ( -
-
- {/* Header */} -
-

- LinkedIn PR Sentiment Analyzer -

-

- Predict whether your LinkedIn post will generate positive or negative PR using AI -

-
- - {/* Main Content */} -
- {/* Input Section */} -
-

- - - - - Input Your LinkedIn Post -

- -
- {/* Text Area */} -
- -