Ghost Hunter - Automated CVE Hunt #474
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Ghost Hunter - Automated CVE Hunt | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - master | |
| paths: | |
| - 'src/**' | |
| - 'main.py' | |
| - 'requirements.txt' | |
| - '.github/workflows/hunt.yml' | |
| pull_request: | |
| branches: | |
| - main | |
| - master | |
| paths: | |
| - 'src/**' | |
| - 'main.py' | |
| - 'requirements.txt' | |
| schedule: | |
| # Run every 6 hours | |
| - cron: '0 */6 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| force_run: | |
| description: 'Force a hunt run even if recently executed' | |
| required: false | |
| default: false | |
| type: boolean | |
| permissions: | |
| contents: write | |
| issues: write | |
| pull-requests: read | |
| concurrency: | |
| group: ghost-hunt-${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| PYTHON_VERSION: '3.11' | |
| jobs: | |
| hunt: | |
| name: π Ghost Hunt | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: π₯ Checkout Repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: π Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: π¦ Install Dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: ποΈ Cache CVE Data Files | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| data/nvd.json | |
| data/cvelistV5 | |
| key: cve-data-${{ hashFiles('data/nvd.json') }}-v1 | |
| restore-keys: | | |
| cve-data-v1- | |
| - name: ποΈ Restore Database Cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: ghost_log.db | |
| key: ghost-db-${{ hashFiles('ghost_log.db') }}-${{ github.run_id }} | |
| restore-keys: | | |
| ghost-db-${{ hashFiles('ghost_log.db') }}- | |
| ghost-db- | |
| - name: π§Ή Clean Legacy GitHub Data | |
| run: | | |
| # Remove legacy GitHub Code discovery data (disabled feature) | |
| if [ -f ghost_log.db ]; then | |
| python -c " | |
| import sqlite3 | |
| conn = sqlite3.connect('ghost_log.db') | |
| cursor = conn.cursor() | |
| # Delete GitHub Code sources | |
| cursor.execute(\"DELETE FROM discovery_sources WHERE source_type = 'github_code'\") | |
| deleted_sources = cursor.rowcount | |
| # Delete orphaned ghost CVEs (those with no remaining sources) | |
| cursor.execute(''' | |
| DELETE FROM ghost_cves | |
| WHERE cve_id NOT IN (SELECT DISTINCT cve_id FROM discovery_sources) | |
| ''') | |
| deleted_ghosts = cursor.rowcount | |
| conn.commit() | |
| conn.close() | |
| if deleted_sources > 0 or deleted_ghosts > 0: | |
| print(f'Cleaned {deleted_sources} GitHub sources and {deleted_ghosts} orphaned ghosts') | |
| " 2>/dev/null || echo "Database cleanup skipped (no database or error)" | |
| fi | |
| - name: π Run Ghost Hunt | |
| id: hunt | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| NVD_API_KEY: ${{ secrets.NVD_API_KEY }} | |
| run: | | |
| set +e # Don't fail immediately on errors | |
| python main.py --hunt --log-level INFO --log-file hunt.log --no-banner | |
| EXIT_CODE=$? | |
| if [ $EXIT_CODE -ne 0 ]; then | |
| echo "::warning::Hunt completed with errors (exit code: $EXIT_CODE)" | |
| echo "hunt_status=completed_with_errors" >> $GITHUB_OUTPUT | |
| else | |
| echo "hunt_status=success" >> $GITHUB_OUTPUT | |
| fi | |
| exit 0 # Don't fail the workflow | |
| - name: π Check for Ghost Resolutions | |
| id: resolutions | |
| env: | |
| PYTHONPATH: ${{ github.workspace }} | |
| run: | | |
| echo "Checking if any Ghost CVEs have been published..." | |
| python main.py --check-resolutions --no-banner || echo "Resolution check completed with warnings" | |
| - name: π Generate Reports | |
| id: report | |
| run: | | |
| python main.py --report --format all --output-dir reports --no-banner | |
| # Extract statistics for output | |
| GHOST_COUNT=$(python -c " | |
| from src.storage import DatabaseManager | |
| db = DatabaseManager() | |
| stats = db.get_statistics() | |
| print(stats.get('total_ghosts', 0)) | |
| " 2>/dev/null || echo "0") | |
| echo "ghost_count=$GHOST_COUNT" >> $GITHUB_OUTPUT | |
| - name: π Display Hunt Summary | |
| run: | | |
| echo "## π Ghost Hunt Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Trigger:** ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Branch:** ${{ github.ref_name }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Hunt Status:** ${{ steps.hunt.outputs.hunt_status }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Total Ghosts:** ${{ steps.report.outputs.ghost_count }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ -f reports/ghost_report_*.md ]; then | |
| # Get the latest report | |
| LATEST_REPORT=$(ls -t reports/ghost_report_*.md | head -1) | |
| cat "$LATEST_REPORT" >> $GITHUB_STEP_SUMMARY | |
| else | |
| python main.py --dashboard --no-banner 2>&1 | head -50 >> $GITHUB_STEP_SUMMARY || echo "Dashboard generation failed" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| - name: π€ Commit Database Updates | |
| if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') | |
| run: | | |
| git config --local user.email "github-actions[bot]@users.noreply.github.com" | |
| git config --local user.name "github-actions[bot]" | |
| # Optimize database before commit (reduces size 20-30%) | |
| echo "Optimizing database..." | |
| python -c " | |
| import sqlite3 | |
| from pathlib import Path | |
| db_path = 'ghost_log.db' | |
| if Path(db_path).exists(): | |
| size_before = Path(db_path).stat().st_size / (1024 * 1024) | |
| conn = sqlite3.connect(db_path) | |
| conn.execute('VACUUM') | |
| conn.execute('ANALYZE') | |
| conn.close() | |
| size_after = Path(db_path).stat().st_size / (1024 * 1024) | |
| saved = ((size_before - size_after) / size_before * 100) if size_before > 0 else 0 | |
| print(f'Database optimized: {size_before:.2f}MB -> {size_after:.2f}MB (saved {saved:.1f}%)') | |
| " || echo "Database optimization skipped" | |
| # Add database and reports | |
| git add ghost_log.db || true | |
| git add reports/ghost_report*.{json,csv,md} || true | |
| # Check if there are changes to commit | |
| if git diff --staged --quiet; then | |
| echo "No changes to commit" | |
| exit 0 | |
| fi | |
| TIMESTAMP=$(date -u +"%Y-%m-%d %H:%M UTC") | |
| GHOST_COUNT=$(python -c " | |
| from src.storage import DatabaseManager | |
| db = DatabaseManager() | |
| stats = db.get_statistics() | |
| print(stats.get('total_ghosts', 0)) | |
| " 2>/dev/null || echo "0") | |
| git commit -m "π Ghost Hunt: ${TIMESTAMP} | ${GHOST_COUNT} Ghosts tracked [via ${{ github.event_name }}]" | |
| # Enhanced retry logic with exponential backoff | |
| MAX_RETRIES=5 | |
| RETRY_DELAY=10 | |
| for i in $(seq 1 $MAX_RETRIES); do | |
| echo "Push attempt $i of $MAX_RETRIES" | |
| # Pull with rebase first | |
| if git pull --rebase origin ${{ github.ref_name }}; then | |
| echo "β Rebase successful" | |
| else | |
| echo "β Rebase failed, attempting merge" | |
| git rebase --abort || true | |
| git pull --no-rebase origin ${{ github.ref_name }} || true | |
| fi | |
| # Try to push with extended timeout | |
| if timeout 300 git push; then | |
| echo "β Push successful on attempt $i" | |
| exit 0 | |
| else | |
| EXIT_CODE=$? | |
| echo "β Push attempt $i failed (exit code: $EXIT_CODE)" | |
| if [ $i -lt $MAX_RETRIES ]; then | |
| echo "Waiting ${RETRY_DELAY}s before retry..." | |
| sleep $RETRY_DELAY | |
| RETRY_DELAY=$((RETRY_DELAY * 2)) # Exponential backoff: 10s -> 20s -> 40s -> 80s -> 160s | |
| fi | |
| fi | |
| done | |
| echo "::error::Failed to push after $MAX_RETRIES attempts" | |
| exit 1 | |
| - name: π Upload Hunt Logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: hunt-logs-${{ github.run_number }} | |
| path: hunt.log | |
| retention-days: 7 | |
| - name: π Upload Reports | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ghost-hunt-reports-${{ github.run_number }} | |
| path: reports/ | |
| retention-days: 30 | |
| - name: π Upload Database | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ghost-database-${{ github.run_number }} | |
| path: ghost_log.db | |
| retention-days: 90 | |
| - name: π¨ Alert on New Critical Ghosts | |
| if: always() | |
| run: | | |
| # Check for critical ghosts (30+ days in limbo) | |
| CRITICAL_COUNT=$(python -c " | |
| from src.storage import DatabaseManager | |
| from src.config import APP_SETTINGS | |
| db = DatabaseManager() | |
| ghosts = db.get_ghost_cves(only_ghosts=True) | |
| critical = [g for g in ghosts if g.days_in_limbo >= APP_SETTINGS.limbo_critical_days] | |
| print(len(critical)) | |
| " 2>/dev/null || echo "0") | |
| if [ "$CRITICAL_COUNT" -gt "0" ]; then | |
| echo "β οΈ Warning: $CRITICAL_COUNT critical Ghost CVEs (30+ days in limbo)" | |
| echo "::warning::$CRITICAL_COUNT critical Ghost CVEs detected" | |
| fi | |
| - name: π§Ή Database Maintenance (Phase 2) | |
| if: github.event_name == 'schedule' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') | |
| run: | | |
| python -c " | |
| from datetime import datetime | |
| from src.storage.maintenance import DatabaseMaintenance | |
| maint = DatabaseMaintenance() | |
| # Weekly cleanup of resolved CVEs (no longer ghosts) | |
| # This prevents database bloat from CVEs that became published | |
| if datetime.now().weekday() == 0: | |
| cleaned = maint.cleanup_resolved_ghosts(days=30) | |
| print(f'β Cleaned up {cleaned} resolved CVEs (published 30+ days ago)') | |
| result = maint.vacuum() | |
| print(f'β Weekly VACUUM: Saved {result[\"saved_percent\"]:.1f}%') | |
| # Monthly archiving on 1st of month | |
| if datetime.now().day == 1: | |
| archived = maint.archive_old_sources(days=90) | |
| print(f'β Monthly archive: {archived} old sources archived') | |
| # Optimize indexes | |
| maint.optimize_indexes() | |
| print('β Indexes optimized') | |
| " | |
| - name: π¨ Create Issues for Critical Ghosts (Phase 4) | |
| if: github.event_name == 'schedule' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| PYTHONPATH: ${{ github.workspace }} | |
| run: | | |
| python scripts/create_ghost_issues.py || echo "Issue creation completed with warnings" | |
| - name: π Workflow Health Check | |
| if: always() | |
| run: | | |
| echo "## π Workflow Health Metrics" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Database metrics | |
| if [ -f ghost_log.db ]; then | |
| DB_SIZE=$(du -h ghost_log.db | cut -f1) | |
| TOTAL_CVES=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM ghost_cves' 2>/dev/null || echo "N/A") | |
| ACTIVE_GHOSTS=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM ghost_cves WHERE is_ghost=1' 2>/dev/null || echo "N/A") | |
| TOTAL_SOURCES=$(sqlite3 ghost_log.db 'SELECT COUNT(*) FROM discovery_sources' 2>/dev/null || echo "N/A") | |
| echo "- **Database Size**: ${DB_SIZE}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Total CVEs Tracked**: ${TOTAL_CVES}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Active Ghosts**: ${ACTIVE_GHOSTS}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Discovery Sources**: ${TOTAL_SOURCES}" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "- **Database**: Not found" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "- **Workflow Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Hunt Status**: ${{ steps.hunt.outputs.hunt_status }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Alert if workflow had issues | |
| if [ "${{ job.status }}" != "success" ]; then | |
| echo "::warning::Workflow completed with status: ${{ job.status }}" | |
| fi | |
| # Optional: Create GitHub Issue for new Ghosts (only on scheduled runs) | |
| notify: | |
| name: π’ Notify New Ghosts | |
| runs-on: ubuntu-latest | |
| needs: hunt | |
| if: success() && github.event_name == 'schedule' | |
| steps: | |
| - name: π₯ Download Reports | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ghost-hunt-reports-${{ github.run_number }} | |
| - name: π Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: π¦ Install Dependencies | |
| run: pip install requests | |
| - name: π’ Check for New Ghosts | |
| id: check_ghosts | |
| run: | | |
| # Parse the latest JSON report for new ghosts | |
| if [ -f "reports/ghost_report_*.json" ]; then | |
| LATEST_REPORT=$(ls -t reports/ghost_report_*.json | head -1) | |
| NEW_GHOSTS=$(python -c " | |
| import json | |
| from datetime import datetime, timedelta | |
| with open('$LATEST_REPORT') as f: | |
| data = json.load(f) | |
| # Count ghosts first seen in the last 6 hours | |
| cutoff = datetime.utcnow() - timedelta(hours=6) | |
| new_count = 0 | |
| for ghost in data.get('ghosts', []): | |
| first_seen = datetime.fromisoformat(ghost['first_seen'].replace('Z', '+00:00')) | |
| if first_seen.replace(tzinfo=None) > cutoff: | |
| new_count += 1 | |
| print(new_count) | |
| " 2>/dev/null || echo "0") | |
| echo "new_ghost_count=$NEW_GHOSTS" >> $GITHUB_OUTPUT | |
| else | |
| echo "new_ghost_count=0" >> $GITHUB_OUTPUT | |
| fi | |
| - name: π Create Issue for New Ghosts | |
| if: steps.check_ghosts.outputs.new_ghost_count > 0 | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| // Find latest JSON report | |
| const reportsDir = 'reports'; | |
| const files = fs.readdirSync(reportsDir) | |
| .filter(f => f.endsWith('.json')) | |
| .sort() | |
| .reverse(); | |
| if (files.length === 0) return; | |
| const reportPath = path.join(reportsDir, files[0]); | |
| const report = JSON.parse(fs.readFileSync(reportPath, 'utf8')); | |
| const cutoff = new Date(Date.now() - 6 * 60 * 60 * 1000); | |
| const newGhosts = report.ghosts.filter(g => | |
| new Date(g.first_seen) > cutoff | |
| ); | |
| if (newGhosts.length === 0) return; | |
| let body = `## π» New Ghost CVEs Detected\n\n`; | |
| body += `**Hunt Time:** ${new Date().toISOString()}\n`; | |
| body += `**New Ghosts:** ${newGhosts.length}\n\n`; | |
| body += `| CVE ID | Status | First Seen | Primary Source |\n`; | |
| body += `|--------|--------|------------|----------------|\n`; | |
| for (const ghost of newGhosts.slice(0, 20)) { | |
| const source = ghost.sources[0]; | |
| body += `| ${ghost.cve_id} | ${ghost.registry_status} | `; | |
| body += `${ghost.first_seen.split('T')[0]} | `; | |
| body += `[${source?.name || 'Unknown'}](${source?.url || '#'}) |\n`; | |
| } | |
| if (newGhosts.length > 20) { | |
| body += `\n*...and ${newGhosts.length - 20} more*\n`; | |
| } | |
| body += `\n---\n*Automated report by Ghost Hunter*`; | |
| await github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: `π» ${newGhosts.length} New Ghost CVE(s) Detected`, | |
| body: body, | |
| labels: ['ghost-cve', 'automated'] | |
| }); |