Skip to content

v2.1.0: E5 embedding migration, Two-Phase Search #293

v2.1.0: E5 embedding migration, Two-Phase Search

v2.1.0: E5 embedding migration, Two-Phase Search #293

Workflow file for this run

name: CI/CD Pipeline
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
jobs:
# Backend TypeScript compilation and linting
backend-build:
name: Web UI Backend - Build & Type Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-backend-${{ hashFiles('services/web-ui/backend/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-backend-
${{ runner.os }}-npm-
- name: Install dependencies
working-directory: services/web-ui/backend
run: npm ci
- name: TypeScript type check
working-directory: services/web-ui/backend
run: npx tsc --noEmit
- name: Build
working-directory: services/web-ui/backend
run: npm run build
# Frontend TypeScript compilation and build
frontend-build:
name: Web UI Frontend - Build & Type Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-frontend-${{ hashFiles('services/web-ui/frontend/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-frontend-
${{ runner.os }}-npm-
- name: Install dependencies
working-directory: services/web-ui/frontend
run: |
npm install
npm install --force @rollup/rollup-linux-x64-gnu
npm install --no-save @tailwindcss/oxide-linux-x64-gnu@$(node -p "require('@tailwindcss/oxide/package.json').version")
npm rebuild rollup
- name: TypeScript type check
working-directory: services/web-ui/frontend
run: npx tsc --noEmit
- name: Build
working-directory: services/web-ui/frontend
run: npm run build
env:
NAPI_RS_FORCE_WASI: 1
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: frontend-dist
path: services/web-ui/frontend/dist/
retention-days: 7
# Prompt Guard API - Python validation
prompt-guard-api:
name: Prompt Guard API - Validation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: prompt-guard-api/requirements.txt
- name: Install dependencies
working-directory: prompt-guard-api
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Syntax check
working-directory: prompt-guard-api
run: python -m py_compile app.py
# Docker build validation
docker-build:
name: Docker - Build & Smoke Test
runs-on: ubuntu-latest
timeout-minutes: 25
strategy:
matrix:
service:
- web-ui-backend
- web-ui-frontend
- prompt-guard-api
- heuristics-service
- semantic-service
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Web UI Backend
if: matrix.service == 'web-ui-backend'
# Build from project root (Dockerfile expects services/web-ui/backend/* and plugin/Chrome paths)
run: docker build -f services/web-ui/backend/Dockerfile -t vigil-guard-web-ui-backend:test .
- name: Build Web UI Frontend
if: matrix.service == 'web-ui-frontend'
run: docker build -f services/web-ui/frontend/Dockerfile -t vigil-guard-web-ui-frontend:test .
- name: Build Prompt Guard API
if: matrix.service == 'prompt-guard-api'
working-directory: prompt-guard-api
run: docker build -t vigil-guard-prompt-guard-api:test .
- name: Build Heuristics Service
if: matrix.service == 'heuristics-service'
working-directory: services/heuristics-service
run: docker build -t vigil-guard-heuristics-service:test .
- name: Build Semantic Service
if: matrix.service == 'semantic-service'
working-directory: services/semantic-service
run: docker build -t vigil-guard-semantic-service:test .
- name: Smoke test - Start container
env:
CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
run: |
set -euo pipefail
docker rm -f "${CONTAINER_NAME}" >/dev/null 2>&1 || true
case "${{ matrix.service }}" in
web-ui-backend)
docker run -d --name "${CONTAINER_NAME}" \
-e NODE_ENV=test \
-e JWT_SECRET="${{ secrets.CI_JWT_SECRET || 'test-jwt-secret-min-32-chars-long-for-ci-smoke-test' }}" \
-e SESSION_SECRET="${{ secrets.CI_SESSION_SECRET || 'test-session-secret-min-64-chars-long-for-ci-smoke-test-validation-x' }}" \
-e WEB_UI_ADMIN_PASSWORD="${{ secrets.CI_WEB_UI_ADMIN_PASSWORD || 'test-admin-password-min-32-chars' }}" \
-e TARGET_DIR=/tmp/test \
vigil-guard-web-ui-backend:test
sleep 8
docker logs "${CONTAINER_NAME}"
;;
web-ui-frontend)
docker run -d --name "${CONTAINER_NAME}" \
vigil-guard-web-ui-frontend:test
sleep 5
docker logs "${CONTAINER_NAME}"
;;
prompt-guard-api)
docker run -d --name "${CONTAINER_NAME}" \
-e MOCK_MODEL=true \
vigil-guard-prompt-guard-api:test
sleep 8
docker logs "${CONTAINER_NAME}"
;;
heuristics-service)
docker run -d --name "${CONTAINER_NAME}" \
-e NODE_ENV=test \
vigil-guard-heuristics-service:test
sleep 5
docker logs "${CONTAINER_NAME}"
;;
semantic-service)
docker run -d --name "${CONTAINER_NAME}" \
-e NODE_ENV=test \
vigil-guard-semantic-service:test
sleep 5
docker logs "${CONTAINER_NAME}"
;;
esac
- name: Smoke test - Health check
env:
CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
run: |
set -euo pipefail
if ! docker ps -a --format '{{.Names}}' | grep -Fxq "${CONTAINER_NAME}"; then
echo "::error::Container ${CONTAINER_NAME} is not running"
docker ps -a
exit 1
fi
wait_for() {
local cmd="$1"
local attempts="${2:-12}"
local delay="${3:-5}"
for i in $(seq 1 "$attempts"); do
if eval "$cmd"; then
echo "✅ Health check succeeded (attempt $i/$attempts)"
return 0
fi
echo "⏳ Waiting for service (attempt $i/$attempts)..."
sleep "$delay"
done
echo "❌ Service did not become healthy in time"
return 1
}
case "${{ matrix.service }}" in
web-ui-backend)
wait_for "docker exec ${CONTAINER_NAME} wget -qO- http://127.0.0.1:8787/health >/dev/null" || exit 1
;;
web-ui-frontend)
wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1/" || exit 1
;;
prompt-guard-api)
wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1:8000/health" || exit 1
wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1:8000/" || exit 1
wait_for "docker exec ${CONTAINER_NAME} curl -fsS -X POST http://127.0.0.1:8000/detect -H 'Content-Type: application/json' -d '{\"text\":\"Hello world\"}'" || exit 1
;;
heuristics-service)
# Alpine image uses wget instead of curl
wait_for "docker exec ${CONTAINER_NAME} wget -qO- http://127.0.0.1:5005/health" || exit 1
;;
semantic-service)
# node:20-slim doesn't have curl/wget, use Node.js for health check
# Accept any HTTP response (200 or 503 degraded) - service is running
wait_for "docker exec ${CONTAINER_NAME} node -e \"require('http').get('http://127.0.0.1:5006/health', r => { console.log('Status:', r.statusCode); process.exit(0); }).on('error', e => process.exit(1))\"" || exit 1
;;
esac
- name: Cleanup
if: always()
env:
CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
run: docker rm -f "${CONTAINER_NAME}" || true
# Docker Compose validation
docker-compose-validate:
name: Docker Compose - Syntax Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Validate docker-compose.yml
run: docker compose config > /dev/null
env:
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-min-32-chars' }}
GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.CI_GRAFANA_ADMIN_PASSWORD || 'test-grafana-password-min-32-chars-x' }}
GRAFANA_UID: 472
GRAFANA_GID: 472
JWT_SECRET: ${{ secrets.CI_JWT_SECRET || 'test-jwt-secret-min-32-chars-long-for-ci-validation' }}
SESSION_SECRET: ${{ secrets.CI_SESSION_SECRET || 'test-session-secret-min-64-chars-long-for-ci-validation-tests-x' }}
WEB_UI_ADMIN_PASSWORD: ${{ secrets.CI_WEB_UI_ADMIN_PASSWORD || 'test-admin-password-min-32-chars-x' }}
- name: Check required services
run: |
services=$(docker compose config --services)
required="web-ui-backend web-ui-frontend clickhouse grafana n8n caddy heuristics-service semantic-service"
for svc in $required; do
echo "$services" | grep -q "$svc" || (echo "Missing service: $svc" && exit 1)
done
env:
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-min-32-chars' }}
GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.CI_GRAFANA_ADMIN_PASSWORD || 'test-grafana-password-min-32-chars-x' }}
GRAFANA_UID: 472
GRAFANA_GID: 472
JWT_SECRET: ${{ secrets.CI_JWT_SECRET || 'test-jwt-secret-min-32-chars-long-for-ci-validation' }}
SESSION_SECRET: ${{ secrets.CI_SESSION_SECRET || 'test-session-secret-min-64-chars-long-for-ci-validation-tests-x' }}
WEB_UI_ADMIN_PASSWORD: ${{ secrets.CI_WEB_UI_ADMIN_PASSWORD || 'test-admin-password-min-32-chars-x' }}
# Documentation validation
docs-check:
name: Documentation - Link & Format Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check Markdown files exist
run: |
# v2.0.0 documentation structure
required_docs=(
"README.md"
"docs/README.md"
"docs/operations/installation.md"
"docs/guides/configuration.md"
"docs/AUTHENTICATION.md"
"docs/API.md"
"prompt-guard-api/README.md"
)
for doc in "${required_docs[@]}"; do
if [ ! -f "$doc" ]; then
echo "Missing required documentation: $doc"
exit 1
fi
done
- name: Check for TODO/FIXME in code
run: |
if grep -r "TODO\|FIXME" services/ prompt-guard-api/ --include="*.ts" --include="*.tsx" --include="*.py" | grep -v node_modules; then
echo "Warning: Found TODO/FIXME comments"
fi
# Security audit
security-audit:
name: Security - Dependency Audit
runs-on: ubuntu-latest
strategy:
matrix:
component:
- backend
- frontend
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-${{ matrix.component }}-${{ hashFiles(format('services/web-ui/{0}/package-lock.json', matrix.component)) }}
restore-keys: |
${{ runner.os }}-npm-${{ matrix.component }}-
${{ runner.os }}-npm-
- name: Security audit - ${{ matrix.component }}
working-directory: services/web-ui/${{ matrix.component }}
run: |
npm ci
if [ "${{ matrix.component }}" = "frontend" ]; then
npm rebuild rollup
fi
npm audit --audit-level=moderate
continue-on-error: true
# Secret scanning
secret-scan:
name: Security - Secret Scanning
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check for secrets in code
uses: trufflesecurity/trufflehog@main
with:
path: ./
base: ${{ github.event.repository.default_branch }}
head: HEAD
continue-on-error: true
# Installation script validation
install-script-check:
name: Installation Script - Validation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check script syntax
run: bash -n install.sh
- name: Check download script syntax
run: bash -n scripts/download-llama-model.sh
- name: Verify script permissions
run: |
[ -x install.sh ] || (echo "install.sh not executable" && exit 1)
[ -x scripts/download-llama-model.sh ] || (echo "download-llama-model.sh not executable" && exit 1)
# Heuristics Service - Unit Tests (v2.0.0)
heuristics-service-tests:
name: Heuristics Service - Unit Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-heuristics-${{ hashFiles('services/heuristics-service/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-heuristics-
${{ runner.os }}-npm-
- name: Install dependencies
working-directory: services/heuristics-service
run: npm ci
- name: Run unit tests
working-directory: services/heuristics-service
run: npm test -- tests/unit/
# Semantic Service - Unit Tests (v2.0.0)
semantic-service-tests:
name: Semantic Service - Unit Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-semantic-${{ hashFiles('services/semantic-service/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-semantic-
${{ runner.os }}-npm-
- name: Install dependencies
working-directory: services/semantic-service
run: npm ci
- name: Run unit tests
working-directory: services/semantic-service
run: npm test -- tests/unit/
# Golden Dataset Quality Gate (Phase 3.6 - Semantic E5 Migration)
# This is a MANDATORY quality gate - build MUST fail if any example is misclassified
golden-dataset-validation:
name: 🎯 Golden Dataset - Quality Gate
runs-on: ubuntu-latest
# Only run on changes to semantic-service or golden dataset
if: |
github.event_name == 'push' ||
contains(github.event.pull_request.changed_files, 'services/semantic-service/') ||
contains(github.event.pull_request.changed_files, 'services/semantic-service/tests/golden-dataset/')
services:
clickhouse:
image: clickhouse/clickhouse-server:24.8-alpine
ports:
- 8123:8123
- 9000:9000
env:
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-32-chars' }}
options: >-
--health-cmd "wget -qO- http://localhost:8123/ping || exit 1"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Cache npm dependencies
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-npm-semantic-golden-${{ hashFiles('services/semantic-service/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-semantic-
- name: Install Node dependencies
working-directory: services/semantic-service
run: npm ci
- name: Install Python dependencies for embedding generation
working-directory: services/semantic-service
run: |
pip install transformers torch onnx onnxruntime
- name: Download E5 model (cached)
uses: actions/cache@v3
with:
path: services/semantic-service/models/multilingual-e5-small-onnx-int8
key: model-e5-small-onnx-int8-v1
- name: Download model if not cached
working-directory: services/semantic-service
run: |
if [ ! -d "models/multilingual-e5-small-onnx-int8/onnx" ]; then
python3 scripts/download-e5-model.py || echo "Model download script not found, will use test fixtures"
fi
- name: Setup ClickHouse tables
env:
CLICKHOUSE_HOST: localhost
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-32-chars' }}
run: |
# Create database and tables
curl -s "http://localhost:8123/" \
--data-binary "CREATE DATABASE IF NOT EXISTS n8n_logs"
# Create pattern_embeddings_v2 table
curl -s "http://localhost:8123/" \
--data-binary @services/semantic-service/sql/04-semantic-embeddings-v2.sql
# Create semantic_safe_embeddings table
curl -s "http://localhost:8123/" \
--data-binary @services/semantic-service/sql/05-semantic-safe-embeddings.sql
- name: Import embeddings (attack + safe patterns)
working-directory: services/semantic-service
env:
CLICKHOUSE_HOST: localhost
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-32-chars' }}
run: |
# Import attack patterns
if [ -f "data/embeddings_v2.jsonl" ]; then
node scripts/import-embeddings.js --input data/embeddings_v2.jsonl --table pattern_embeddings_v2
fi
# Import safe patterns
if [ -f "data/safe_embeddings.jsonl" ]; then
node scripts/import-embeddings.js --input data/safe_embeddings.jsonl --table semantic_safe_embeddings
fi
- name: 🎯 Run Golden Dataset Tests (Quality Gate)
working-directory: services/semantic-service
env:
NODE_ENV: development
CLICKHOUSE_HOST: localhost
CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD || 'test-clickhouse-password-32-chars' }}
RUN_GOLDEN_TESTS: 1
run: |
echo "============================================================"
echo "🎯 GOLDEN DATASET QUALITY GATE"
echo "============================================================"
echo "This is a MANDATORY quality gate."
echo "Build MUST pass all golden dataset tests."
echo "Requirements:"
echo " - Detection Rate: ≥85%"
echo " - False Positive Rate: ≤5%"
echo " - Polish Detection: ≥80%"
echo "============================================================"
npx vitest run tests/golden-dataset/ --reporter=verbose
echo "============================================================"
echo "✅ Golden Dataset Quality Gate PASSED"
echo "============================================================"
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: golden-dataset-results
path: |
services/semantic-service/tests/golden-dataset/VALIDATION_REPORT.md
retention-days: 30
# All checks passed
all-checks:
name: ✅ All Checks Passed
runs-on: ubuntu-latest
needs:
- backend-build
- frontend-build
- prompt-guard-api
- docker-build
- docker-compose-validate
- docs-check
- security-audit
- secret-scan
- install-script-check
- heuristics-service-tests
- semantic-service-tests
- golden-dataset-validation
steps:
- name: Success
run: |
echo "=================================="
echo "✅ All CI checks passed successfully!"
echo "=================================="
echo ""
echo "Build summary:"
echo " ✓ Backend TypeScript compiled"
echo " ✓ Frontend TypeScript compiled"
echo " ✓ Prompt Guard API validated"
echo " ✓ Docker images built"
echo " ✓ Docker Compose validated"
echo " ✓ Documentation checked"
echo " ✓ Security audit completed"
echo " ✓ Installation scripts validated"
echo " ✓ Heuristics Service unit tests (v2.0.0)"
echo " ✓ Semantic Service unit tests (v2.0.0)"
echo " ✓ Golden Dataset Quality Gate (E5 Migration v2.0)"
echo ""