Monitoring and Health Checks #7125

Workflow file for this run

.github/workflows/monitoring.yml at 236192a

	name: Monitoring and Health Checks

	on:
	schedule:
	# Run health checks every 5 minutes
	- cron: "/5 * * *"
	workflow_dispatch:
	inputs:
	environment:
	description: "Environment to monitor"
	required: true
	default: "production"
	type: choice
	options:
	- production
	- staging

	jobs:
	# Health check production
	health-check-production:
	runs-on: ubuntu-latest
	if: github.event.inputs.environment == 'production' \|\| github.event.schedule

	steps:
	- name: Check production health
	run: \|
	PRODUCTION_URL="${{ secrets.PRODUCTION_URL }}"

	# Basic health check
	HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$PRODUCTION_URL/api/health")

	if [ "$HTTP_STATUS" != "200" ]; then
	echo "❌ Production health check failed (HTTP $HTTP_STATUS)"
	exit 1
	fi

	# Response time check
	RESPONSE_TIME=$(curl -s -o /dev/null -w "%{time_total}" "$PRODUCTION_URL/api/health")
	RESPONSE_TIME_MS=$(echo "$RESPONSE_TIME * 1000" \| bc)

	if (( $(echo "$RESPONSE_TIME_MS > 5000" \| bc -l) )); then
	echo "⚠️ Production response time is slow: ${RESPONSE_TIME_MS}ms"
	else
	echo "✅ Production health check passed (${RESPONSE_TIME_MS}ms)"
	fi

	# Database connectivity check
	curl -f "$PRODUCTION_URL/api/health/database" \|\| {
	echo "❌ Database connectivity check failed"
	exit 1
	}

	# Redis connectivity check
	curl -f "$PRODUCTION_URL/api/health/redis" \|\| {
	echo "❌ Redis connectivity check failed"
	exit 1
	}

	- name: Alert on failure
	if: failure()
	run: \|
	echo "🚨 Production Health Check Failed!"
	echo "Environment: Production"
	echo "Time: $(date)"
	echo "Branch: ${{ github.ref_name }}"
	echo "Please check the production environment immediately!"

	# Health check staging
	health-check-staging:
	runs-on: ubuntu-latest
	if: github.event.inputs.environment == 'staging'

	steps:
	- name: Check staging health
	run: \|
	STAGING_URL="${{ secrets.STAGING_URL }}"

	# Basic health check
	HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$STAGING_URL/api/health")

	if [ "$HTTP_STATUS" != "200" ]; then
	echo "❌ Staging health check failed (HTTP $HTTP_STATUS)"
	exit 1
	fi

	echo "✅ Staging health check passed"

	# Performance monitoring
	performance-monitoring:
	runs-on: ubuntu-latest
	if: github.event.schedule

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Node.js
	uses: actions/setup-node@v4
	with:
	node-version: "20"
	cache: "npm"

	- name: Install dependencies
	run: npm ci

	- name: Run performance tests
	run: \|
	# Run basic performance tests
	npm run test:performance \|\| echo "Performance tests not configured"

	- name: Check application metrics
	run: \|
	PRODUCTION_URL="${{ secrets.PRODUCTION_URL }}"

	# Get application metrics
	METRICS=$(curl -s "$PRODUCTION_URL/api/metrics" \|\| echo "{}")

	# Parse and check key metrics
	echo "Application metrics:"
	echo "$METRICS"

	# Check if metrics are within acceptable ranges
	# Add your specific metric checks here

	# Security monitoring
	security-monitoring:
	runs-on: ubuntu-latest
	if: github.event.schedule

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Run security audit
	run: \|
	npm audit --audit-level=moderate

	- name: Check for known vulnerabilities
	run: \|
	npm audit --audit-level=high --json > audit-results.json

	# Check if there are high or critical vulnerabilities
	HIGH_VULNS=$(cat audit-results.json \| jq '.metadata.vulnerabilities.high // 0')
	CRITICAL_VULNS=$(cat audit-results.json \| jq '.metadata.vulnerabilities.critical // 0')

	if [ "$HIGH_VULNS" -gt 0 ] \|\| [ "$CRITICAL_VULNS" -gt 0 ]; then
	echo "⚠️ Security vulnerabilities found:"
	echo "High: $HIGH_VULNS, Critical: $CRITICAL_VULNS"

	# Send alert
	curl -X POST -H 'Content-type: application/json' \
	--data "{\"text\":\"⚠️ Security Alert: $HIGH_VULNS high, $CRITICAL_VULNS critical vulnerabilities found in WorkNow\"}" \
	"${{ secrets.SLACK_WEBHOOK_URL }}"
	else
	echo "✅ No critical security vulnerabilities found"
	fi

	# Resource monitoring
	resource-monitoring:
	runs-on: ubuntu-latest
	if: github.event.schedule

	steps:
	- name: Check server resources
	uses: appleboy/ssh-action@v1.0.3
	with:
	host: ${{ secrets.PRODUCTION_HOST }}
	username: ${{ secrets.PRODUCTION_USER }}
	key: ${{ secrets.PRODUCTION_SSH_KEY }}
	script: \|
	# Check disk usage
	DISK_USAGE=$(df -h / \| awk 'NR==2 {print $5}' \| sed 's/%//')
	if [ "$DISK_USAGE" -gt 80 ]; then
	echo "⚠️ Disk usage is high: ${DISK_USAGE}%"
	fi

	# Check memory usage
	MEMORY_USAGE=$(free \| awk 'NR==2{printf "%.0f", $3*100/$2}')
	if [ "$MEMORY_USAGE" -gt 80 ]; then
	echo "⚠️ Memory usage is high: ${MEMORY_USAGE}%"
	fi

	# Check CPU load
	CPU_LOAD=$(uptime \| awk -F'load average:' '{print $2}' \| awk '{print $1}' \| sed 's/,//')
	if (( $(echo "$CPU_LOAD > 2.0" \| bc -l) )); then
	echo "⚠️ CPU load is high: $CPU_LOAD"
	fi

	# Check Docker container status
	docker ps --format "table {{.Names}}\t{{.Status}}" \| grep worknow

	echo "✅ Resource monitoring completed"

	# Log analysis
	log-analysis:
	runs-on: ubuntu-latest
	if: github.event.schedule

	steps:
	- name: Analyze application logs
	uses: appleboy/ssh-action@v1.0.3
	with:
	host: ${{ secrets.PRODUCTION_HOST }}
	username: ${{ secrets.PRODUCTION_USER }}
	key: ${{ secrets.PRODUCTION_SSH_KEY }}
	script: \|
	# Check for error patterns in logs
	ERROR_COUNT=$(docker logs worknow-app --since=1h 2>&1 \| grep -i error \| wc -l)

	if [ "$ERROR_COUNT" -gt 10 ]; then
	echo "⚠️ High error count in last hour: $ERROR_COUNT"

	# Get recent errors
	echo "Recent errors:"
	docker logs worknow-app --since=1h 2>&1 \| grep -i error \| tail -5
	else
	echo "✅ Error count is normal: $ERROR_COUNT"
	fi

	# Check for specific error patterns
	docker logs worknow-app --since=1h 2>&1 \| grep -E "(timeout\|connection refused\|database error)" \|\| echo "No critical errors found"

	# Notify monitoring summary
	notify-summary:
	runs-on: ubuntu-latest
	needs:
	[
	health-check-production,
	performance-monitoring,
	security-monitoring,
	resource-monitoring,
	log-analysis,
	]
	if: always() && github.event.schedule

	steps:
	- name: Send monitoring summary
	run: \|
	echo "📊 WorkNow Monitoring Summary"
	echo "Health Check: ${{ needs.health-check-production.result }}"
	echo "Performance: ${{ needs.performance-monitoring.result }}"
	echo "Security: ${{ needs.security-monitoring.result }}"
	echo "Resources: ${{ needs.resource-monitoring.result }}"
	echo "Logs: ${{ needs.log-analysis.result }}"
	echo "Time: $(date)"
	if [ "${{ job.status }}" == "success" ]; then
	echo "✅ All systems operational"
	else
	echo "⚠️ Some issues detected - check individual jobs"
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Monitoring and Health Checks #7125

Workflow file

Monitoring and Health Checks #7125

Uh oh!

Workflow file for this run