newrelic · aavinash-nr · May 7, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 27, 2026
diff --git a/.github/actions/node-layer-setup/action.yml b/.github/actions/node-layer-setup/action.yml
@@ -22,7 +22,8 @@ runs:
       id: check-tag
       shell: bash
       run: |
-        if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+(\.[0-9]+)*_nodejs$ ]]; then
+        if [[ "${{ github.event_name }}" == "workflow_dispatch" ]] || \
+           [[ "${{ github.event.ref }}" =~ ^refs/tags/v[0-9]+(\.[0-9]+)*_nodejs$ ]]; then
             echo "match=true" >> $GITHUB_OUTPUT
         fi
     - name: Run Node unit tests

diff --git a/.github/actions/notify-slack-layer/action.yml b/.github/actions/notify-slack-layer/action.yml
@@ -0,0 +1,173 @@
+name: Notify Slack Layer Release
+description: Builds and sends a structured Slack notification for lambda layer releases
+
+inputs:
+  language_name:
+    description: Language display name, e.g. "Node.js"
+    required: true
+  versions_json:
+    description: |
+      JSON array describing each runtime version to include in the Slack message.
+      Schema (one object per version):
+        - key:         unique identifier, e.g. "3.9"
+        - label:       display name shown in Slack, e.g. "Python 3.9"
+        - job:         GitHub Actions job name for API result lookup, e.g. "publish-python (3.9)"
+        - fallback:    result to use when the API lookup finds no match, e.g. "success" or "failure"
+        - failure_key: job_key used by region-retry for per-version region failure detail, e.g. "python-3.9"
+      Example:
+        [
+          {"key":"3.9","label":"Python 3.9","job":"publish-python (3.9)","fallback":"success","failure_key":"python-3.9"},
+          {"key":"3.10","label":"Python 3.10","job":"publish-python (3.10)","fallback":"success","failure_key":"python-3.10"}
+        ]
+    required: true
+  failure_summaries:
+    description: Newline-separated failure_summary strings from publish jobs (used for ECR failures)
+    required: false
+    default: ""
+  slack_webhook:
+    description: Slack incoming webhook URL
+    required: true
+  gh_token:
+    required: true
+  repo:
+    required: true
+  run_id:
+    required: true
+  run_attempt:
+    description: Pass github.run_attempt from the calling workflow.
+    required: false
+    default: "1"
+  ref_name:
+    required: true
+  actor:
+    required: true
+  server_url:
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Build Slack payload
+      shell: bash
+      env:
+        LANGUAGE_NAME: ${{ inputs.language_name }}
+        VERSIONS_JSON: ${{ inputs.versions_json }}
+        FAILURE_SUMMARIES: ${{ inputs.failure_summaries }}
+        GH_TOKEN: ${{ inputs.gh_token }}
+        REPO: ${{ inputs.repo }}
+        RUN_ID: ${{ inputs.run_id }}
+        RUN_ATTEMPT: ${{ inputs.run_attempt }}
+        TAG: ${{ inputs.ref_name }}
+        ACTOR: ${{ inputs.actor }}
+        SERVER_URL: ${{ inputs.server_url }}
+      run: |
+        python3 << 'PYEOF'
+        import json, os, subprocess, glob, re
+
+        versions = json.loads(os.environ["VERSIONS_JSON"])
+        run_id   = os.environ["RUN_ID"]
+        run_attempt = os.environ.get("RUN_ATTEMPT", "1")
+        repo     = os.environ["REPO"]
+
+        # ── Fetch per-job results from the GitHub API ────────────────────────
+        r = subprocess.run(
+          ["gh", "api", "--paginate",
+           f"repos/{repo}/actions/runs/{run_id}/jobs",
+           "-q", ".jobs[]"],
+          capture_output=True, text=True
+        )
+        api_jobs = {}
+        for line in r.stdout.strip().splitlines():
+          try:
+            job = json.loads(line)
+            api_jobs[job["name"]] = job.get("conclusion") or "in_progress"
+          except Exception:
+            pass
+
+        results = {}
+        for v in versions:
+          results[v["key"]] = api_jobs.get(v["job"], v["fallback"])
+
+        # ── Download per-version region failure artifacts ─────────────────────
+        # Artifacts are named: failed-regions-{job_key}-{run_id}-attempt-{N}
+        subprocess.run(
+          ["gh", "run", "download", run_id,
+           "--pattern", "failed-regions-*",
+           "--dir", "/tmp/region-artifacts",
+           "--repo", repo],
+          capture_output=True, text=True
+        )
+
+        version_failures = {}  # job_key -> comma-separated failed regions
+        artifact_base = "/tmp/region-artifacts"
+        if os.path.isdir(artifact_base):
+          for artifact_dir in sorted(glob.glob(f"{artifact_base}/failed-regions-*")):
+            artifact_name = os.path.basename(artifact_dir)
+            # Strip "failed-regions-" prefix, then split off "-{run_id}-attempt-{N}"
+            stripped = artifact_name[len("failed-regions-"):]
+            parts = stripped.rsplit(f"-{run_id}-attempt-", 1)
+            if len(parts) != 2:
+              continue
+            job_key, attempt_num = parts
+            if attempt_num != run_attempt:
+              continue
+            txt_file = os.path.join(artifact_dir, f"failed-regions-{job_key}.txt")
+            if os.path.isfile(txt_file):
+              content = open(txt_file).read().strip()
+              if content:
+                version_failures[job_key] = content
+
+        # ── Build Slack message ───────────────────────────────────────────────
+        total        = len(versions)
+        failed_count = sum(1 for v in versions if results.get(v["key"]) != "success")
+        all_ok       = failed_count == 0
+
+        lang   = os.environ["LANGUAGE_NAME"]
+        icon   = ":white_check_mark:" if all_ok else ":x:"
+        status = "Succeeded" if all_ok else f"Failed ({failed_count}/{total} versions failed)"
+        lines  = [
+          f"{icon} *{lang} Layer Release {status}*",
+          f"Tag: `{os.environ['TAG']}`",
+          f"Triggered by: {os.environ['ACTOR']}",
+          "",
+          "*Layer Results:*",
+        ]
+        for v in versions:
+          res = results.get(v["key"], "unknown")
+          em  = ":white_check_mark:" if res == "success" else ":x:"
+          if res == "success":
+            detail = "Layer published successfully"
+          else:
+            fk          = v.get("failure_key", "")
+            regions_str = version_failures.get(fk, "")
+            if regions_str:
+              regions = [r for r in regions_str.split(",") if r]
+              detail  = f"{len(regions)} region(s) failed: {', '.join(regions)}"
+            else:
+              detail = "Layer publish to AWS failed"
+          lines.append(f"{em} {v['label']} — {detail}")
+
+        # ECR failures (from failure_summaries — not stored in artifacts)
+        skipped_ecr = []
+        for fs in os.environ.get("FAILURE_SUMMARIES", "").splitlines():
+          fs = fs.strip()
+          if "ECR images failed:" in fs:
+            for img in fs.split("ECR images failed:")[-1].strip().split():
+              if img not in skipped_ecr:
+                skipped_ecr.append(img)
+        if skipped_ecr:
+          lines += ["", ":warning: *ECR images failed to publish:*"]
+          lines += [f"• {i}" for i in skipped_ecr]
+
+        run_url = f"{os.environ['SERVER_URL']}/{repo}/actions/runs/{run_id}"
+        lines  += ["", f"<{run_url}|View Run>"]
+
+        with open("/tmp/slack-payload.json", "w") as f:
+          json.dump({"text": "\n".join(lines)}, f)
+        PYEOF
+    - name: Notify Slack
+      uses: slackapi/slack-github-action@v2.1.0
+      with:
+        webhook: ${{ inputs.slack_webhook }}
+        webhook-type: incoming-webhook
+        payload-file-path: /tmp/slack-payload.json
diff --git a/.github/actions/python-layer-setup/action.yml b/.github/actions/python-layer-setup/action.yml
@@ -13,7 +13,8 @@ runs:
       id: check-tag
       shell: bash
       run: |
-        if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+(\.[0-9]+)*_python$ ]]; then
+        if [[ "${{ github.event_name }}" == "workflow_dispatch" ]] || \
+           [[ "${{ github.event.ref }}" =~ ^refs/tags/v[0-9]+(\.[0-9]+)*_python$ ]]; then
             echo "match=true" >> $GITHUB_OUTPUT
         fi
     - name: Install python dependencies

diff --git a/.github/actions/region-retry/action.yml b/.github/actions/region-retry/action.yml
@@ -0,0 +1,105 @@
+name: Region Retry State
+description: |
+  Save failed regions after a publish attempt, or load them before a re-run.
+  On attempt N > 1, load automatically restricts publishing to the regions
+  that failed in attempt N-1. This makes "Re-run failed jobs" smart: it only
+  retries the regions that actually need it.
+
+  Usage in a publish job:
+    # Before publish step:
+    - uses: ./.github/actions/region-retry
+      id: region-retry-load
+      with: { mode: load, job_key: nodejs-20, run_id: ..., run_attempt: ... }
+
+    # In publish step env:
+    #   PUBLISH_REGIONS: steps.region-retry-load.outputs.publish_regions || inputs.regions
+
+    # After publish step (if: always()):
+    - uses: ./.github/actions/region-retry
+      with: { mode: save, job_key: nodejs-20, failure_summary: ..., run_id: ..., run_attempt: ... }
+
+inputs:
+  mode:
+    description: '"save" — write failed regions artifact after publish. "load" — read it before publish.'
+    required: true
+  job_key:
+    description: 'Unique key for this publish job, e.g. "nodejs-20", "python-3.9", "java-java21".'
+    required: true
+  failure_summary:
+    description: 'failure_summary output from the publish step (save mode only).'
+    required: false
+    default: ''
+  run_id:
+    description: 'Pass github.run_id from the calling workflow.'
+    required: true
+  run_attempt:
+    description: 'Pass github.run_attempt from the calling workflow.'
+    required: true
+
+outputs:
+  publish_regions:
+    description: 'Comma-separated regions to target (empty = all). Set PUBLISH_REGIONS env var to this value.'
+    value: ${{ steps.load-regions.outputs.publish_regions }}
+
+runs:
+  using: composite
+  steps:
+    # ── SAVE: write failed regions to an artifact named by attempt number ─────
+    - name: Write failed-regions file
+      if: inputs.mode == 'save'
+      id: write-file
+      shell: bash
+      run: |
+        fs="${{ inputs.failure_summary }}"
+        outfile="/tmp/failed-regions-${{ inputs.job_key }}.txt"
+        if [[ "$fs" == *"regions failed:"* ]]; then
+          regions="${fs#*regions failed: }"
+          printf '%s' "${regions// /,}" > "$outfile"
+          echo "has_failures=true" >> "$GITHUB_OUTPUT"
+          echo "Captured failed regions: ${regions}"
+        else
+          echo "has_failures=false" >> "$GITHUB_OUTPUT"
+        fi
+
+    - name: Upload failed-regions artifact
+      if: inputs.mode == 'save' && steps.write-file.outputs.has_failures == 'true'
+      uses: actions/upload-artifact@v4
+      with:
+        name: failed-regions-${{ inputs.job_key }}-${{ inputs.run_id }}-attempt-${{ inputs.run_attempt }}
+        path: /tmp/failed-regions-${{ inputs.job_key }}.txt
+        retention-days: 7
+
+    # ── LOAD: on re-run, download the previous attempt's failed-regions ────────
+    - name: Compute previous attempt artifact name
+      if: inputs.mode == 'load'
+      id: artifact-name
+      shell: bash
+      run: |
+        prev=$(( ${{ inputs.run_attempt }} - 1 ))
+        echo "name=failed-regions-${{ inputs.job_key }}-${{ inputs.run_id }}-attempt-${prev}" >> "$GITHUB_OUTPUT"
+
+    - name: Download previous failed-regions artifact
+      if: inputs.mode == 'load' && fromJSON(inputs.run_attempt) > 1
+      id: download
+      continue-on-error: true
+      uses: actions/download-artifact@v4
+      with:
+        name: ${{ steps.artifact-name.outputs.name }}
+        path: /tmp/prev-attempt-${{ inputs.job_key }}
+
+    - name: Set publish_regions output
+      if: inputs.mode == 'load'
+      id: load-regions
+      shell: bash
+      run: |
+        regions=""
+        f="/tmp/prev-attempt-${{ inputs.job_key }}/failed-regions-${{ inputs.job_key }}.txt"
+        if [[ -f "$f" ]]; then
+          regions=$(cat "$f")
+          if [[ -n "$regions" ]]; then
+            echo "Re-run: restricting to previously failed regions: ${regions}"
+          else
+            echo "Previous attempt had no failures — publishing to all regions."
+          fi
+        fi
+        echo "publish_regions=${regions}" >> "$GITHUB_OUTPUT"