Skip to content

Ghost Hunter - Automated CVE Hunt #474

Ghost Hunter - Automated CVE Hunt

Ghost Hunter - Automated CVE Hunt #474

Workflow file for this run

name: Ghost Hunter - Automated CVE Hunt
on:
push:
branches:
- main
- master
paths:
- 'src/**'
- 'main.py'
- 'requirements.txt'
- '.github/workflows/hunt.yml'
pull_request:
branches:
- main
- master
paths:
- 'src/**'
- 'main.py'
- 'requirements.txt'
schedule:
# Run every 6 hours
- cron: '0 */6 * * *'
workflow_dispatch:
inputs:
force_run:
description: 'Force a hunt run even if recently executed'
required: false
default: false
type: boolean
permissions:
contents: write
issues: write
pull-requests: read
concurrency:
group: ghost-hunt-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
PYTHON_VERSION: '3.11'
jobs:
hunt:
name: πŸ” Ghost Hunt
runs-on: ubuntu-latest
steps:
- name: πŸ“₯ Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: 🐍 Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: πŸ“¦ Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: πŸ—„οΈ Cache CVE Data Files
uses: actions/cache@v4
with:
path: |
data/nvd.json
data/cvelistV5
key: cve-data-${{ hashFiles('data/nvd.json') }}-v1
restore-keys: |
cve-data-v1-
- name: πŸ—„οΈ Restore Database Cache
uses: actions/cache@v4
with:
path: ghost_log.db
key: ghost-db-${{ hashFiles('ghost_log.db') }}-${{ github.run_id }}
restore-keys: |
ghost-db-${{ hashFiles('ghost_log.db') }}-
ghost-db-
- name: 🧹 Clean Legacy GitHub Data
run: |
# Remove legacy GitHub Code discovery data (disabled feature)
if [ -f ghost_log.db ]; then
python -c "
import sqlite3
conn = sqlite3.connect('ghost_log.db')
cursor = conn.cursor()
# Delete GitHub Code sources
cursor.execute(\"DELETE FROM discovery_sources WHERE source_type = 'github_code'\")
deleted_sources = cursor.rowcount
# Delete orphaned ghost CVEs (those with no remaining sources)
cursor.execute('''
DELETE FROM ghost_cves
WHERE cve_id NOT IN (SELECT DISTINCT cve_id FROM discovery_sources)
''')
deleted_ghosts = cursor.rowcount
conn.commit()
conn.close()
if deleted_sources > 0 or deleted_ghosts > 0:
print(f'Cleaned {deleted_sources} GitHub sources and {deleted_ghosts} orphaned ghosts')
" 2>/dev/null || echo "Database cleanup skipped (no database or error)"
fi
- name: πŸ” Run Ghost Hunt
id: hunt
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NVD_API_KEY: ${{ secrets.NVD_API_KEY }}
run: |
set +e # Don't fail immediately on errors
python main.py --hunt --log-level INFO --log-file hunt.log --no-banner
EXIT_CODE=$?
if [ $EXIT_CODE -ne 0 ]; then
echo "::warning::Hunt completed with errors (exit code: $EXIT_CODE)"
echo "hunt_status=completed_with_errors" >> $GITHUB_OUTPUT
else
echo "hunt_status=success" >> $GITHUB_OUTPUT
fi
exit 0 # Don't fail the workflow
- name: πŸ”„ Check for Ghost Resolutions
id: resolutions
env:
PYTHONPATH: ${{ github.workspace }}
run: |
echo "Checking if any Ghost CVEs have been published..."
python main.py --check-resolutions --no-banner || echo "Resolution check completed with warnings"
- name: πŸ“Š Generate Reports
id: report
run: |
python main.py --report --format all --output-dir reports --no-banner
# Extract statistics for output
GHOST_COUNT=$(python -c "
from src.storage import DatabaseManager
db = DatabaseManager()
stats = db.get_statistics()
print(stats.get('total_ghosts', 0))
" 2>/dev/null || echo "0")
echo "ghost_count=$GHOST_COUNT" >> $GITHUB_OUTPUT
- name: πŸ“‹ Display Hunt Summary
run: |
echo "## πŸ” Ghost Hunt Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Trigger:** ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
echo "**Branch:** ${{ github.ref_name }}" >> $GITHUB_STEP_SUMMARY
echo "**Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
echo "**Hunt Status:** ${{ steps.hunt.outputs.hunt_status }}" >> $GITHUB_STEP_SUMMARY
echo "**Total Ghosts:** ${{ steps.report.outputs.ghost_count }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f reports/ghost_report_*.md ]; then
# Get the latest report
LATEST_REPORT=$(ls -t reports/ghost_report_*.md | head -1)
cat "$LATEST_REPORT" >> $GITHUB_STEP_SUMMARY
else
python main.py --dashboard --no-banner 2>&1 | head -50 >> $GITHUB_STEP_SUMMARY || echo "Dashboard generation failed" >> $GITHUB_STEP_SUMMARY
fi
- name: πŸ“€ Commit Database Updates
if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
# Optimize database before commit (reduces size 20-30%)
echo "Optimizing database..."
python -c "
import sqlite3
from pathlib import Path
db_path = 'ghost_log.db'
if Path(db_path).exists():
size_before = Path(db_path).stat().st_size / (1024 * 1024)
conn = sqlite3.connect(db_path)
conn.execute('VACUUM')
conn.execute('ANALYZE')
conn.close()
size_after = Path(db_path).stat().st_size / (1024 * 1024)
saved = ((size_before - size_after) / size_before * 100) if size_before > 0 else 0
print(f'Database optimized: {size_before:.2f}MB -> {size_after:.2f}MB (saved {saved:.1f}%)')
" || echo "Database optimization skipped"
# Add database and reports
git add ghost_log.db || true
git add reports/ghost_report*.{json,csv,md} || true
# Check if there are changes to commit
if git diff --staged --quiet; then
echo "No changes to commit"
exit 0
fi
TIMESTAMP=$(date -u +"%Y-%m-%d %H:%M UTC")
GHOST_COUNT=$(python -c "
from src.storage import DatabaseManager
db = DatabaseManager()
stats = db.get_statistics()
print(stats.get('total_ghosts', 0))
" 2>/dev/null || echo "0")
git commit -m "πŸ” Ghost Hunt: ${TIMESTAMP} | ${GHOST_COUNT} Ghosts tracked [via ${{ github.event_name }}]"
# Enhanced retry logic with exponential backoff
MAX_RETRIES=5
RETRY_DELAY=10
for i in $(seq 1 $MAX_RETRIES); do
echo "Push attempt $i of $MAX_RETRIES"
# Pull with rebase first
if git pull --rebase origin ${{ github.ref_name }}; then
echo "βœ“ Rebase successful"
else
echo "⚠ Rebase failed, attempting merge"
git rebase --abort || true
git pull --no-rebase origin ${{ github.ref_name }} || true
fi
# Try to push with extended timeout
if timeout 300 git push; then
echo "βœ“ Push successful on attempt $i"
exit 0
else
EXIT_CODE=$?
echo "βœ— Push attempt $i failed (exit code: $EXIT_CODE)"
if [ $i -lt $MAX_RETRIES ]; then
echo "Waiting ${RETRY_DELAY}s before retry..."
sleep $RETRY_DELAY
RETRY_DELAY=$((RETRY_DELAY * 2)) # Exponential backoff: 10s -> 20s -> 40s -> 80s -> 160s
fi
fi
done
echo "::error::Failed to push after $MAX_RETRIES attempts"
exit 1
- name: πŸ“ Upload Hunt Logs
if: always()
uses: actions/upload-artifact@v4
with:
name: hunt-logs-${{ github.run_number }}
path: hunt.log
retention-days: 7
- name: πŸ“ Upload Reports
if: always()
uses: actions/upload-artifact@v4
with:
name: ghost-hunt-reports-${{ github.run_number }}
path: reports/
retention-days: 30
- name: πŸ“ Upload Database
if: always()
uses: actions/upload-artifact@v4
with:
name: ghost-database-${{ github.run_number }}
path: ghost_log.db
retention-days: 90
- name: 🚨 Alert on New Critical Ghosts
if: always()
run: |
# Check for critical ghosts (30+ days in limbo)
CRITICAL_COUNT=$(python -c "
from src.storage import DatabaseManager
from src.config import APP_SETTINGS
db = DatabaseManager()
ghosts = db.get_ghost_cves(only_ghosts=True)
critical = [g for g in ghosts if g.days_in_limbo >= APP_SETTINGS.limbo_critical_days]
print(len(critical))
" 2>/dev/null || echo "0")
if [ "$CRITICAL_COUNT" -gt "0" ]; then
echo "⚠️ Warning: $CRITICAL_COUNT critical Ghost CVEs (30+ days in limbo)"
echo "::warning::$CRITICAL_COUNT critical Ghost CVEs detected"
fi
- name: 🧹 Database Maintenance (Phase 2)
if: github.event_name == 'schedule' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')
run: |
python -c "
from datetime import datetime
from src.storage.maintenance import DatabaseMaintenance
maint = DatabaseMaintenance()
# Weekly cleanup of resolved CVEs (no longer ghosts)
# This prevents database bloat from CVEs that became published
if datetime.now().weekday() == 0:
cleaned = maint.cleanup_resolved_ghosts(days=30)
print(f'βœ“ Cleaned up {cleaned} resolved CVEs (published 30+ days ago)')
result = maint.vacuum()
print(f'βœ“ Weekly VACUUM: Saved {result[\"saved_percent\"]:.1f}%')
# Monthly archiving on 1st of month
if datetime.now().day == 1:
archived = maint.archive_old_sources(days=90)
print(f'βœ“ Monthly archive: {archived} old sources archived')
# Optimize indexes
maint.optimize_indexes()
print('βœ“ Indexes optimized')
"
- name: 🚨 Create Issues for Critical Ghosts (Phase 4)
if: github.event_name == 'schedule' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PYTHONPATH: ${{ github.workspace }}
run: |
python scripts/create_ghost_issues.py || echo "Issue creation completed with warnings"
- name: πŸ“Š Workflow Health Check
if: always()
run: |
echo "## πŸ“Š Workflow Health Metrics" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Database metrics
if [ -f ghost_log.db ]; then
DB_SIZE=$(du -h ghost_log.db | cut -f1)
TOTAL_CVES=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM ghost_cves' 2>/dev/null || echo "N/A")
ACTIVE_GHOSTS=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM ghost_cves WHERE is_ghost=1' 2>/dev/null || echo "N/A")
TOTAL_SOURCES=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM discovery_sources' 2>/dev/null || echo "N/A")
echo "- **Database Size**: ${DB_SIZE}" >> $GITHUB_STEP_SUMMARY
echo "- **Total CVEs Tracked**: ${TOTAL_CVES}" >> $GITHUB_STEP_SUMMARY
echo "- **Active Ghosts**: ${ACTIVE_GHOSTS}" >> $GITHUB_STEP_SUMMARY
echo "- **Discovery Sources**: ${TOTAL_SOURCES}" >> $GITHUB_STEP_SUMMARY
else
echo "- **Database**: Not found" >> $GITHUB_STEP_SUMMARY
fi
echo "- **Workflow Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
echo "- **Hunt Status**: ${{ steps.hunt.outputs.hunt_status }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Alert if workflow had issues
if [ "${{ job.status }}" != "success" ]; then
echo "::warning::Workflow completed with status: ${{ job.status }}"
fi
# Optional: Create GitHub Issue for new Ghosts (only on scheduled runs)
notify:
name: πŸ“’ Notify New Ghosts
runs-on: ubuntu-latest
needs: hunt
if: success() && github.event_name == 'schedule'
steps:
- name: πŸ“₯ Download Reports
uses: actions/download-artifact@v4
with:
name: ghost-hunt-reports-${{ github.run_number }}
- name: 🐍 Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: πŸ“¦ Install Dependencies
run: pip install requests
- name: πŸ“’ Check for New Ghosts
id: check_ghosts
run: |
# Parse the latest JSON report for new ghosts
if [ -f "reports/ghost_report_*.json" ]; then
LATEST_REPORT=$(ls -t reports/ghost_report_*.json | head -1)
NEW_GHOSTS=$(python -c "
import json
from datetime import datetime, timedelta
with open('$LATEST_REPORT') as f:
data = json.load(f)
# Count ghosts first seen in the last 6 hours
cutoff = datetime.utcnow() - timedelta(hours=6)
new_count = 0
for ghost in data.get('ghosts', []):
first_seen = datetime.fromisoformat(ghost['first_seen'].replace('Z', '+00:00'))
if first_seen.replace(tzinfo=None) > cutoff:
new_count += 1
print(new_count)
" 2>/dev/null || echo "0")
echo "new_ghost_count=$NEW_GHOSTS" >> $GITHUB_OUTPUT
else
echo "new_ghost_count=0" >> $GITHUB_OUTPUT
fi
- name: πŸ“ Create Issue for New Ghosts
if: steps.check_ghosts.outputs.new_ghost_count > 0
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const path = require('path');
// Find latest JSON report
const reportsDir = 'reports';
const files = fs.readdirSync(reportsDir)
.filter(f => f.endsWith('.json'))
.sort()
.reverse();
if (files.length === 0) return;
const reportPath = path.join(reportsDir, files[0]);
const report = JSON.parse(fs.readFileSync(reportPath, 'utf8'));
const cutoff = new Date(Date.now() - 6 * 60 * 60 * 1000);
const newGhosts = report.ghosts.filter(g =>
new Date(g.first_seen) > cutoff
);
if (newGhosts.length === 0) return;
let body = `## πŸ‘» New Ghost CVEs Detected\n\n`;
body += `**Hunt Time:** ${new Date().toISOString()}\n`;
body += `**New Ghosts:** ${newGhosts.length}\n\n`;
body += `| CVE ID | Status | First Seen | Primary Source |\n`;
body += `|--------|--------|------------|----------------|\n`;
for (const ghost of newGhosts.slice(0, 20)) {
const source = ghost.sources[0];
body += `| ${ghost.cve_id} | ${ghost.registry_status} | `;
body += `${ghost.first_seen.split('T')[0]} | `;
body += `[${source?.name || 'Unknown'}](${source?.url || '#'}) |\n`;
}
if (newGhosts.length > 20) {
body += `\n*...and ${newGhosts.length - 20} more*\n`;
}
body += `\n---\n*Automated report by Ghost Hunter*`;
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `πŸ‘» ${newGhosts.length} New Ghost CVE(s) Detected`,
body: body,
labels: ['ghost-cve', 'automated']
});