v2.1.0: E5 embedding migration, Two-Phase Search #293

Workflow file for this run

	name: CI/CD Pipeline

	on:
	push:
	branches: [ main, develop ]
	pull_request:
	branches: [ main, develop ]

	jobs:
	# Backend TypeScript compilation and linting
	backend-build:
	name: Web UI Backend - Build & Type Check
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-backend-${{ hashFiles('services/web-ui/backend/package-lock.json') }}
	restore-keys: \|
	${{ runner.os }}-npm-backend-
	${{ runner.os }}-npm-

	- name: Install dependencies
	working-directory: services/web-ui/backend
	run: npm ci

	- name: TypeScript type check
	working-directory: services/web-ui/backend
	run: npx tsc --noEmit

	- name: Build
	working-directory: services/web-ui/backend
	run: npm run build

	# Frontend TypeScript compilation and build
	frontend-build:
	name: Web UI Frontend - Build & Type Check
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-frontend-${{ hashFiles('services/web-ui/frontend/package-lock.json') }}
	restore-keys: \|
	${{ runner.os }}-npm-frontend-
	${{ runner.os }}-npm-

	- name: Install dependencies
	working-directory: services/web-ui/frontend
	run: \|
	npm install
	npm install --force @rollup/rollup-linux-x64-gnu
	npm install --no-save @tailwindcss/oxide-linux-x64-gnu@$(node -p "require('@tailwindcss/oxide/package.json').version")
	npm rebuild rollup

	- name: TypeScript type check
	working-directory: services/web-ui/frontend
	run: npx tsc --noEmit

	- name: Build
	working-directory: services/web-ui/frontend
	run: npm run build
	env:
	NAPI_RS_FORCE_WASI: 1

	- name: Upload build artifacts
	uses: actions/upload-artifact@v4
	with:
	name: frontend-dist
	path: services/web-ui/frontend/dist/
	retention-days: 7

	# Prompt Guard API - Python validation
	prompt-guard-api:
	name: Prompt Guard API - Validation
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'
	cache: 'pip'
	cache-dependency-path: prompt-guard-api/requirements.txt

	- name: Install dependencies
	working-directory: prompt-guard-api
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt

	- name: Syntax check
	working-directory: prompt-guard-api
	run: python -m py_compile app.py

	# Docker build validation
	docker-build:
	name: Docker - Build & Smoke Test
	runs-on: ubuntu-latest
	timeout-minutes: 25

	strategy:
	matrix:
	service:
	- web-ui-backend
	- web-ui-frontend
	- prompt-guard-api
	- heuristics-service
	- semantic-service

	steps:
	- uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build Web UI Backend
	if: matrix.service == 'web-ui-backend'
	# Build from project root (Dockerfile expects services/web-ui/backend/* and plugin/Chrome paths)
	run: docker build -f services/web-ui/backend/Dockerfile -t vigil-guard-web-ui-backend:test .

	- name: Build Web UI Frontend
	if: matrix.service == 'web-ui-frontend'
	run: docker build -f services/web-ui/frontend/Dockerfile -t vigil-guard-web-ui-frontend:test .

	- name: Build Prompt Guard API
	if: matrix.service == 'prompt-guard-api'
	working-directory: prompt-guard-api
	run: docker build -t vigil-guard-prompt-guard-api:test .

	- name: Build Heuristics Service
	if: matrix.service == 'heuristics-service'
	working-directory: services/heuristics-service
	run: docker build -t vigil-guard-heuristics-service:test .

	- name: Build Semantic Service
	if: matrix.service == 'semantic-service'
	working-directory: services/semantic-service
	run: docker build -t vigil-guard-semantic-service:test .

	- name: Smoke test - Start container
	env:
	CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
	run: \|
	set -euo pipefail
	docker rm -f "${CONTAINER_NAME}" >/dev/null 2>&1 \|\| true

	case "${{ matrix.service }}" in
	web-ui-backend)
	docker run -d --name "${CONTAINER_NAME}" \
	-e NODE_ENV=test \
	-e JWT_SECRET="${{ secrets.CI_JWT_SECRET \|\| 'test-jwt-secret-min-32-chars-long-for-ci-smoke-test' }}" \
	-e SESSION_SECRET="${{ secrets.CI_SESSION_SECRET \|\| 'test-session-secret-min-64-chars-long-for-ci-smoke-test-validation-x' }}" \
	-e WEB_UI_ADMIN_PASSWORD="${{ secrets.CI_WEB_UI_ADMIN_PASSWORD \|\| 'test-admin-password-min-32-chars' }}" \
	-e TARGET_DIR=/tmp/test \
	vigil-guard-web-ui-backend:test
	sleep 8
	docker logs "${CONTAINER_NAME}"
	;;
	web-ui-frontend)
	docker run -d --name "${CONTAINER_NAME}" \
	vigil-guard-web-ui-frontend:test
	sleep 5
	docker logs "${CONTAINER_NAME}"
	;;
	prompt-guard-api)
	docker run -d --name "${CONTAINER_NAME}" \
	-e MOCK_MODEL=true \
	vigil-guard-prompt-guard-api:test
	sleep 8
	docker logs "${CONTAINER_NAME}"
	;;
	heuristics-service)
	docker run -d --name "${CONTAINER_NAME}" \
	-e NODE_ENV=test \
	vigil-guard-heuristics-service:test
	sleep 5
	docker logs "${CONTAINER_NAME}"
	;;
	semantic-service)
	docker run -d --name "${CONTAINER_NAME}" \
	-e NODE_ENV=test \
	vigil-guard-semantic-service:test
	sleep 5
	docker logs "${CONTAINER_NAME}"
	;;
	esac

	- name: Smoke test - Health check
	env:
	CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
	run: \|
	set -euo pipefail

	if ! docker ps -a --format '{{.Names}}' \| grep -Fxq "${CONTAINER_NAME}"; then
	echo "::error::Container ${CONTAINER_NAME} is not running"
	docker ps -a
	exit 1
	fi

	wait_for() {
	local cmd="$1"
	local attempts="${2:-12}"
	local delay="${3:-5}"
	for i in $(seq 1 "$attempts"); do
	if eval "$cmd"; then
	echo "✅ Health check succeeded (attempt $i/$attempts)"
	return 0
	fi
	echo "⏳ Waiting for service (attempt $i/$attempts)..."
	sleep "$delay"
	done
	echo "❌ Service did not become healthy in time"
	return 1
	}

	case "${{ matrix.service }}" in
	web-ui-backend)
	wait_for "docker exec ${CONTAINER_NAME} wget -qO- http://127.0.0.1:8787/health >/dev/null" \|\| exit 1
	;;
	web-ui-frontend)
	wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1/" \|\| exit 1
	;;
	prompt-guard-api)
	wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1:8000/health" \|\| exit 1
	wait_for "docker exec ${CONTAINER_NAME} curl -fsS http://127.0.0.1:8000/" \|\| exit 1
	wait_for "docker exec ${CONTAINER_NAME} curl -fsS -X POST http://127.0.0.1:8000/detect -H 'Content-Type: application/json' -d '{\"text\":\"Hello world\"}'" \|\| exit 1
	;;
	heuristics-service)
	# Alpine image uses wget instead of curl
	wait_for "docker exec ${CONTAINER_NAME} wget -qO- http://127.0.0.1:5005/health" \|\| exit 1
	;;
	semantic-service)
	# node:20-slim doesn't have curl/wget, use Node.js for health check
	# Accept any HTTP response (200 or 503 degraded) - service is running
	wait_for "docker exec ${CONTAINER_NAME} node -e \"require('http').get('http://127.0.0.1:5006/health', r => { console.log('Status:', r.statusCode); process.exit(0); }).on('error', e => process.exit(1))\"" \|\| exit 1
	;;
	esac

	- name: Cleanup
	if: always()
	env:
	CONTAINER_NAME: test-${{ matrix.service }}-${{ github.run_id }}-${{ github.run_attempt }}
	run: docker rm -f "${CONTAINER_NAME}" \|\| true

	# Docker Compose validation
	docker-compose-validate:
	name: Docker Compose - Syntax Check
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Validate docker-compose.yml
	run: docker compose config > /dev/null
	env:
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-min-32-chars' }}
	GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.CI_GRAFANA_ADMIN_PASSWORD \|\| 'test-grafana-password-min-32-chars-x' }}
	GRAFANA_UID: 472
	GRAFANA_GID: 472
	JWT_SECRET: ${{ secrets.CI_JWT_SECRET \|\| 'test-jwt-secret-min-32-chars-long-for-ci-validation' }}
	SESSION_SECRET: ${{ secrets.CI_SESSION_SECRET \|\| 'test-session-secret-min-64-chars-long-for-ci-validation-tests-x' }}
	WEB_UI_ADMIN_PASSWORD: ${{ secrets.CI_WEB_UI_ADMIN_PASSWORD \|\| 'test-admin-password-min-32-chars-x' }}

	- name: Check required services
	run: \|
	services=$(docker compose config --services)
	required="web-ui-backend web-ui-frontend clickhouse grafana n8n caddy heuristics-service semantic-service"
	for svc in $required; do
	echo "$services" \| grep -q "$svc" \|\| (echo "Missing service: $svc" && exit 1)
	done
	env:
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-min-32-chars' }}
	GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.CI_GRAFANA_ADMIN_PASSWORD \|\| 'test-grafana-password-min-32-chars-x' }}
	GRAFANA_UID: 472
	GRAFANA_GID: 472
	JWT_SECRET: ${{ secrets.CI_JWT_SECRET \|\| 'test-jwt-secret-min-32-chars-long-for-ci-validation' }}
	SESSION_SECRET: ${{ secrets.CI_SESSION_SECRET \|\| 'test-session-secret-min-64-chars-long-for-ci-validation-tests-x' }}
	WEB_UI_ADMIN_PASSWORD: ${{ secrets.CI_WEB_UI_ADMIN_PASSWORD \|\| 'test-admin-password-min-32-chars-x' }}

	# Documentation validation
	docs-check:
	name: Documentation - Link & Format Check
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Check Markdown files exist
	run: \|
	# v2.0.0 documentation structure
	required_docs=(
	"README.md"
	"docs/README.md"
	"docs/operations/installation.md"
	"docs/guides/configuration.md"
	"docs/AUTHENTICATION.md"
	"docs/API.md"
	"prompt-guard-api/README.md"
	)
	for doc in "${required_docs[@]}"; do
	if [ ! -f "$doc" ]; then
	echo "Missing required documentation: $doc"
	exit 1
	fi
	done

	- name: Check for TODO/FIXME in code
	run: \|
	if grep -r "TODO\\|FIXME" services/ prompt-guard-api/ --include=".ts" --include=".tsx" --include="*.py" \| grep -v node_modules; then
	echo "Warning: Found TODO/FIXME comments"
	fi

	# Security audit
	security-audit:
	name: Security - Dependency Audit
	runs-on: ubuntu-latest

	strategy:
	matrix:
	component:
	- backend
	- frontend

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-${{ matrix.component }}-${{ hashFiles(format('services/web-ui/{0}/package-lock.json', matrix.component)) }}
	restore-keys: \|
	${{ runner.os }}-npm-${{ matrix.component }}-
	${{ runner.os }}-npm-

	- name: Security audit - ${{ matrix.component }}
	working-directory: services/web-ui/${{ matrix.component }}
	run: \|
	npm ci
	if [ "${{ matrix.component }}" = "frontend" ]; then
	npm rebuild rollup
	fi
	npm audit --audit-level=moderate
	continue-on-error: true

	# Secret scanning
	secret-scan:
	name: Security - Secret Scanning
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Check for secrets in code
	uses: trufflesecurity/trufflehog@main
	with:
	path: ./
	base: ${{ github.event.repository.default_branch }}
	head: HEAD
	continue-on-error: true

	# Installation script validation
	install-script-check:
	name: Installation Script - Validation
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Check script syntax
	run: bash -n install.sh

	- name: Check download script syntax
	run: bash -n scripts/download-llama-model.sh

	- name: Verify script permissions
	run: \|
	[ -x install.sh ] \|\| (echo "install.sh not executable" && exit 1)
	[ -x scripts/download-llama-model.sh ] \|\| (echo "download-llama-model.sh not executable" && exit 1)

	# Heuristics Service - Unit Tests (v2.0.0)
	heuristics-service-tests:
	name: Heuristics Service - Unit Tests
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-heuristics-${{ hashFiles('services/heuristics-service/package-lock.json') }}
	restore-keys: \|
	${{ runner.os }}-npm-heuristics-
	${{ runner.os }}-npm-

	- name: Install dependencies
	working-directory: services/heuristics-service
	run: npm ci

	- name: Run unit tests
	working-directory: services/heuristics-service
	run: npm test -- tests/unit/

	# Semantic Service - Unit Tests (v2.0.0)
	semantic-service-tests:
	name: Semantic Service - Unit Tests
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-semantic-${{ hashFiles('services/semantic-service/package-lock.json') }}
	restore-keys: \|
	${{ runner.os }}-npm-semantic-
	${{ runner.os }}-npm-

	- name: Install dependencies
	working-directory: services/semantic-service
	run: npm ci

	- name: Run unit tests
	working-directory: services/semantic-service
	run: npm test -- tests/unit/

	# Golden Dataset Quality Gate (Phase 3.6 - Semantic E5 Migration)
	# This is a MANDATORY quality gate - build MUST fail if any example is misclassified
	golden-dataset-validation:
	name: 🎯 Golden Dataset - Quality Gate
	runs-on: ubuntu-latest
	# Only run on changes to semantic-service or golden dataset
	if: \|
	github.event_name == 'push' \|\|
	contains(github.event.pull_request.changed_files, 'services/semantic-service/') \|\|
	contains(github.event.pull_request.changed_files, 'services/semantic-service/tests/golden-dataset/')

	services:
	clickhouse:
	image: clickhouse/clickhouse-server:24.8-alpine
	ports:
	- 8123:8123
	- 9000:9000
	env:
	CLICKHOUSE_USER: default
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-32-chars' }}
	options: >-
	--health-cmd "wget -qO- http://localhost:8123/ping \|\| exit 1"
	--health-interval 10s
	--health-timeout 5s
	--health-retries 5

	steps:
	- uses: actions/checkout@v4

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'
	cache: 'pip'

	- name: Cache npm dependencies
	uses: actions/cache@v3
	with:
	path: ~/.npm
	key: ${{ runner.os }}-npm-semantic-golden-${{ hashFiles('services/semantic-service/package-lock.json') }}
	restore-keys: \|
	${{ runner.os }}-npm-semantic-

	- name: Install Node dependencies
	working-directory: services/semantic-service
	run: npm ci

	- name: Install Python dependencies for embedding generation
	working-directory: services/semantic-service
	run: \|
	pip install transformers torch onnx onnxruntime

	- name: Download E5 model (cached)
	uses: actions/cache@v3
	with:
	path: services/semantic-service/models/multilingual-e5-small-onnx-int8
	key: model-e5-small-onnx-int8-v1

	- name: Download model if not cached
	working-directory: services/semantic-service
	run: \|
	if [ ! -d "models/multilingual-e5-small-onnx-int8/onnx" ]; then
	python3 scripts/download-e5-model.py \|\| echo "Model download script not found, will use test fixtures"
	fi

	- name: Setup ClickHouse tables
	env:
	CLICKHOUSE_HOST: localhost
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-32-chars' }}
	run: \|
	# Create database and tables
	curl -s "http://localhost:8123/" \
	--data-binary "CREATE DATABASE IF NOT EXISTS n8n_logs"

	# Create pattern_embeddings_v2 table
	curl -s "http://localhost:8123/" \
	--data-binary @services/semantic-service/sql/04-semantic-embeddings-v2.sql

	# Create semantic_safe_embeddings table
	curl -s "http://localhost:8123/" \
	--data-binary @services/semantic-service/sql/05-semantic-safe-embeddings.sql

	- name: Import embeddings (attack + safe patterns)
	working-directory: services/semantic-service
	env:
	CLICKHOUSE_HOST: localhost
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-32-chars' }}
	run: \|
	# Import attack patterns
	if [ -f "data/embeddings_v2.jsonl" ]; then
	node scripts/import-embeddings.js --input data/embeddings_v2.jsonl --table pattern_embeddings_v2
	fi

	# Import safe patterns
	if [ -f "data/safe_embeddings.jsonl" ]; then
	node scripts/import-embeddings.js --input data/safe_embeddings.jsonl --table semantic_safe_embeddings
	fi

	- name: 🎯 Run Golden Dataset Tests (Quality Gate)
	working-directory: services/semantic-service
	env:
	NODE_ENV: development
	CLICKHOUSE_HOST: localhost
	CLICKHOUSE_PASSWORD: ${{ secrets.CI_CLICKHOUSE_PASSWORD \|\| 'test-clickhouse-password-32-chars' }}
	RUN_GOLDEN_TESTS: 1
	run: \|
	echo "============================================================"
	echo "🎯 GOLDEN DATASET QUALITY GATE"
	echo "============================================================"
	echo "This is a MANDATORY quality gate."
	echo "Build MUST pass all golden dataset tests."
	echo "Requirements:"
	echo " - Detection Rate: ≥85%"
	echo " - False Positive Rate: ≤5%"
	echo " - Polish Detection: ≥80%"
	echo "============================================================"

	npx vitest run tests/golden-dataset/ --reporter=verbose

	echo "============================================================"
	echo "✅ Golden Dataset Quality Gate PASSED"
	echo "============================================================"

	- name: Upload test results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: golden-dataset-results
	path: \|
	services/semantic-service/tests/golden-dataset/VALIDATION_REPORT.md
	retention-days: 30

	# All checks passed
	all-checks:
	name: ✅ All Checks Passed
	runs-on: ubuntu-latest
	needs:
	- backend-build
	- frontend-build
	- prompt-guard-api
	- docker-build
	- docker-compose-validate
	- docs-check
	- security-audit
	- secret-scan
	- install-script-check
	- heuristics-service-tests
	- semantic-service-tests
	- golden-dataset-validation

	steps:
	- name: Success
	run: \|
	echo "=================================="
	echo "✅ All CI checks passed successfully!"
	echo "=================================="
	echo ""
	echo "Build summary:"
	echo " ✓ Backend TypeScript compiled"
	echo " ✓ Frontend TypeScript compiled"
	echo " ✓ Prompt Guard API validated"
	echo " ✓ Docker images built"
	echo " ✓ Docker Compose validated"
	echo " ✓ Documentation checked"
	echo " ✓ Security audit completed"
	echo " ✓ Installation scripts validated"
	echo " ✓ Heuristics Service unit tests (v2.0.0)"
	echo " ✓ Semantic Service unit tests (v2.0.0)"
	echo " ✓ Golden Dataset Quality Gate (E5 Migration v2.0)"
	echo ""

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

v2.1.0: E5 embedding migration, Two-Phase Search #293

Workflow file

v2.1.0: E5 embedding migration, Two-Phase Search #293

Uh oh!

Workflow file for this run