doc updates #98
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Deploy | |
| on: | |
| push: | |
| branches: | |
| - master | |
| paths: | |
| - 'app/**' | |
| - 'Dockerfile' | |
| - 'docker-compose.dev.yml' | |
| - 'scripts/workflow_deploy.sh' | |
| - '.github/workflows/deploy.yml' | |
| # Prevent concurrent workflows - if infrastructure deploy is running, wait | |
| concurrency: | |
| group: ${{ github.repository }}-all-workflows | |
| cancel-in-progress: false | |
| env: | |
| REGISTRY: ghcr.io | |
| IMAGE_NAME: ${{ github.repository }} | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Build test image | |
| run: | | |
| docker build --target test -t ttb-verifier:test . | |
| - name: Run tests | |
| run: | | |
| docker run --rm ttb-verifier:test | |
| build-and-push: | |
| needs: test | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| packages: write | |
| outputs: | |
| image-digest: ${{ steps.build.outputs.digest }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set lowercase image name | |
| id: image | |
| run: echo "name=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ${{ env.REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push Docker image | |
| id: build | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| target: production | |
| push: true | |
| tags: | | |
| ${{ env.REGISTRY }}/${{ steps.image.outputs.name }}:latest | |
| ${{ env.REGISTRY }}/${{ steps.image.outputs.name }}:${{ github.ref_name }}-${{ github.sha }} | |
| cache-from: type=gha | |
| cache-to: type=gha,mode=max | |
| - name: Build and push worker Docker image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| target: worker | |
| push: true | |
| tags: | | |
| ${{ env.REGISTRY }}/${{ steps.image.outputs.name }}-worker:latest | |
| ${{ env.REGISTRY }}/${{ steps.image.outputs.name }}-worker:${{ github.ref_name }}-${{ github.sha }} | |
| cache-from: type=gha | |
| cache-to: type=gha,mode=max | |
| deploy: | |
| needs: build-and-push | |
| runs-on: ubuntu-latest | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} | |
| aws-region: ${{ secrets.AWS_REGION }} | |
| role-session-name: GitHubActions-Deploy | |
| - name: Deploy to EC2 via SSM | |
| run: | | |
| echo "🚀 Deploying to EC2 instance: ${{ secrets.EC2_INSTANCE_ID }}" | |
| # Detect what changed to determine deployment strategy | |
| echo "🔍 Analyzing changed files..." | |
| git fetch origin ${{ github.event.before }} | |
| CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} || echo "") | |
| # Check if only app code changed (verifier-only deployment) | |
| VERIFIER_ONLY=false | |
| if echo "$CHANGED_FILES" | grep -qE '^(app/|Dockerfile|scripts/workflow_deploy\.sh|\.github/workflows/deploy\.yml)'; then | |
| # App-related files changed - check if ONLY app files changed | |
| if ! echo "$CHANGED_FILES" | grep -qE '^(docker-compose|infrastructure/)'; then | |
| VERIFIER_ONLY=true | |
| echo "✅ Only app code changed - verifier-only deployment" | |
| echo " Ollama will NOT be restarted (model stays in GPU)" | |
| else | |
| echo "🔄 Infrastructure files changed - full deployment" | |
| fi | |
| fi | |
| # Set deployment command | |
| if [ "$VERIFIER_ONLY" = "true" ]; then | |
| DEPLOY_CMD='VERIFIER_ONLY=true /app/workflow_deploy.sh' | |
| else | |
| DEPLOY_CMD='/app/workflow_deploy.sh' | |
| fi | |
| echo "📋 Deployment command: $DEPLOY_CMD" | |
| # Send deployment command via SSM | |
| COMMAND_ID=$(aws ssm send-command \ | |
| --instance-ids "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --document-name "AWS-RunShellScript" \ | |
| --parameters "commands=[\"$DEPLOY_CMD\"]" \ | |
| --comment "GitHub Actions deployment - commit ${{ github.sha }}" \ | |
| --output text \ | |
| --query "Command.CommandId") | |
| echo "📋 Command ID: $COMMAND_ID" | |
| echo "⏳ Waiting for SSM agent to pick up command..." | |
| sleep 3 | |
| echo "⏳ Polling for deployment completion (timeout: 5 minutes)..." | |
| # Poll for command completion (30 attempts x 10 seconds = 5 minutes) | |
| MAX_ATTEMPTS=30 | |
| ATTEMPT=0 | |
| LAST_STATUS="" | |
| while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do | |
| ATTEMPT=$((ATTEMPT+1)) | |
| # Get command status with proper error handling | |
| # Temporarily disable exit-on-error for this command | |
| set +e | |
| RESULT=$(aws ssm get-command-invocation \ | |
| --command-id "$COMMAND_ID" \ | |
| --instance-id "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --query 'Status' \ | |
| --output text 2>&1) | |
| EXIT_CODE=$? | |
| set -e | |
| # Check if command invocation exists yet | |
| if [ $EXIT_CODE -ne 0 ]; then | |
| if echo "$RESULT" | grep -q "InvocationDoesNotExist"; then | |
| STATUS="Waiting" | |
| else | |
| echo "⚠️ Error querying command status: $RESULT" | |
| STATUS="Unknown" | |
| fi | |
| else | |
| STATUS="$RESULT" | |
| fi | |
| # Only print status if it changed | |
| if [ "$STATUS" != "$LAST_STATUS" ]; then | |
| echo "Status: $STATUS" | |
| LAST_STATUS="$STATUS" | |
| else | |
| echo -n "." | |
| fi | |
| # Check for terminal states | |
| if [ "$STATUS" = "Success" ]; then | |
| echo "" | |
| echo "✅ Deployment completed successfully!" | |
| break | |
| elif [ "$STATUS" = "Failed" ] || [ "$STATUS" = "Cancelled" ] || [ "$STATUS" = "TimedOut" ]; then | |
| echo "" | |
| echo "❌ Deployment failed with status: $STATUS" | |
| # Get command output for debugging | |
| echo "📄 Deployment output:" | |
| aws ssm get-command-invocation \ | |
| --command-id "$COMMAND_ID" \ | |
| --instance-id "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --query '[StandardOutputContent,StandardErrorContent]' \ | |
| --output text | |
| exit 1 | |
| fi | |
| # Wait before next poll (unless we just succeeded) | |
| if [ "$STATUS" != "Success" ] && [ $ATTEMPT -lt $MAX_ATTEMPTS ]; then | |
| sleep 10 | |
| fi | |
| done | |
| if [ $ATTEMPT -eq $MAX_ATTEMPTS ] && [ "$STATUS" != "Success" ]; then | |
| echo "" | |
| echo "❌ Deployment timed out after 5 minutes" | |
| echo "Last known status: $STATUS" | |
| # Try to get any available output | |
| echo "📄 Attempting to retrieve deployment logs:" | |
| aws ssm get-command-invocation \ | |
| --command-id "$COMMAND_ID" \ | |
| --instance-id "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --query '[StandardOutputContent,StandardErrorContent]' \ | |
| --output text 2>&1 || echo "No logs available yet" | |
| exit 1 | |
| fi | |
| # Get final command output | |
| echo "📄 Deployment output:" | |
| aws ssm get-command-invocation \ | |
| --command-id "$COMMAND_ID" \ | |
| --instance-id "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --query '[StandardOutputContent,StandardErrorContent]' \ | |
| --output text | |
| - name: Verify deployment | |
| run: | | |
| echo "🔍 Verifying deployment..." | |
| sleep 10 # Give the service a moment to fully start | |
| # Get the application URL from terraform outputs (if available) | |
| # For now, we'll just verify via SSM that containers are running | |
| aws ssm send-command \ | |
| --instance-ids "${{ secrets.EC2_INSTANCE_ID }}" \ | |
| --document-name "AWS-RunShellScript" \ | |
| --parameters 'commands=["docker ps --filter name=ttb-verifier --format \"{{.Status}}\""]' \ | |
| --output text | |
| echo "✅ Deployment verification complete!" |