Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions keeper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,25 @@ docker compose down
docker compose up -d --build
```

## Health & Operational Status

The keeper provides a rich health endpoint at `/health` that exposes detailed operational states. This allows operators to distinguish between normal operation, temporary degradation, and total failure.

### Health States

| State | Description | HTTP Status | Recommended Action |
|-------|-------------|-------------|--------------------|
| `ok` | Normal operation. | 200 | No action required. |
| `degraded_rpc` | Partial RPC failure (Circuit Breaker is HALF_OPEN). | 200 | Monitor RPC connectivity; investigate network/provider stability. |
| `degraded_backlog` | High retry backlog pressure (>50 tasks). | 200 | Consider increasing `MAX_CONCURRENT_EXECUTIONS` or scale keeper instances. |
| `stale` | Polling activity has stopped or delayed beyond threshold. | 503 | Investigate if the main polling loop is hung. Sidecar will restart service. |
| `failing` | Total failure: RPC disconnected or Circuit Breaker is OPEN. | 503 | Check network connection, RPC endpoint availability, and credentials. |

### Monitoring

- **Health Check Sidecar**: Use the included `health-check-sidecar.sh` to automatically restart the keeper on `503` errors.
- **Prometheus**: Scrape `/metrics/prometheus` for real-time alerting on `keeper_rpc_connected` and `keeper_backlog_size`.

---

## Need Help?
Expand Down
68 changes: 68 additions & 0 deletions keeper/__tests__/server.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,72 @@ describe('Metrics', () => {
const metrics = new Metrics();
expect(metrics).toBeDefined();
});

it('should return ok status when operating normally', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true });
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('ok');
expect(health.reason).toBe('Keeper is operating normally');
});

it('should return failing status when rpc is disconnected', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: false });
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('failing');
expect(health.reason).toBe('RPC connection lost or circuit breaker is OPEN. Service is non-functional.');
});

it('should return failing status when circuit breaker is OPEN', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true });
metrics.record('rpcCircuitState', 2); // OPEN
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('failing');
expect(health.reason).toBe('RPC connection lost or circuit breaker is OPEN. Service is non-functional.');
});

it('should return degraded_rpc when circuit breaker is HALF_OPEN', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true });
metrics.record('rpcCircuitState', 1); // HALF_OPEN
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('degraded_rpc');
expect(health.reason).toContain('Partial RPC failure');
expect(health.details.severity).toBe('WARNING');
expect(health.details.is_healthy).toBe(true);
});

it('should return degraded_stale when polling is moderately delayed', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true });
// Set last poll to 40 seconds ago (threshold is 60s, warning at 30s)
metrics.lastPollAt = new Date(Date.now() - 40000);
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('degraded_stale');
expect(health.reason).toContain('Polling activity is delayed');
expect(health.details.severity).toBe('WARNING');
});

it('should return degraded_backlog when backlog is high', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true, backlogSize: 60 });
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('degraded_backlog');
expect(health.reason).toContain('High retry backlog pressure');
expect(health.details.severity).toBe('WARNING');
});

it('should return stale when polling is critically delayed', () => {
const metrics = new Metrics();
metrics.updateHealth({ rpcConnected: true });
// Set last poll to 70 seconds ago
metrics.lastPollAt = new Date(Date.now() - 70000);
const health = metrics.getHealthStatus(60000);
expect(health.status).toBe('stale');
expect(health.reason).toContain('Critical: No polling activity');
expect(health.details.severity).toBe('CRITICAL');
expect(health.details.is_healthy).toBe(false);
});
});
8 changes: 7 additions & 1 deletion keeper/coverage/coverage-summary.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
{"total": {"lines":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"},"statements":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"},"functions":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"},"branches":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"},"branchesTrue":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"}}
{"total": {"lines":{"total":512,"covered":0,"skipped":0,"pct":0},"statements":{"total":526,"covered":0,"skipped":0,"pct":0},"functions":{"total":86,"covered":0,"skipped":0,"pct":0},"branches":{"total":314,"covered":0,"skipped":0,"pct":0},"branchesTrue":{"total":0,"covered":0,"skipped":0,"pct":"Unknown"}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\concurrency.js": {"lines":{"total":50,"covered":0,"skipped":0,"pct":0},"functions":{"total":8,"covered":0,"skipped":0,"pct":0},"statements":{"total":51,"covered":0,"skipped":0,"pct":0},"branches":{"total":26,"covered":0,"skipped":0,"pct":0}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\logger.js": {"lines":{"total":33,"covered":0,"skipped":0,"pct":0},"functions":{"total":18,"covered":0,"skipped":0,"pct":0},"statements":{"total":33,"covered":0,"skipped":0,"pct":0},"branches":{"total":20,"covered":0,"skipped":0,"pct":0}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\poller.js": {"lines":{"total":148,"covered":0,"skipped":0,"pct":0},"functions":{"total":17,"covered":0,"skipped":0,"pct":0},"statements":{"total":150,"covered":0,"skipped":0,"pct":0},"branches":{"total":81,"covered":0,"skipped":0,"pct":0}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\queue.js": {"lines":{"total":126,"covered":0,"skipped":0,"pct":0},"functions":{"total":13,"covered":0,"skipped":0,"pct":0},"statements":{"total":128,"covered":0,"skipped":0,"pct":0},"branches":{"total":55,"covered":0,"skipped":0,"pct":0}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\registry.js": {"lines":{"total":84,"covered":0,"skipped":0,"pct":0},"functions":{"total":17,"covered":0,"skipped":0,"pct":0},"statements":{"total":89,"covered":0,"skipped":0,"pct":0},"branches":{"total":54,"covered":0,"skipped":0,"pct":0}}
,"C:\\Users\\Jason\\Desktop\\DripWave\\Stellar\\SoroTask\\keeper\\src\\retry.js": {"lines":{"total":71,"covered":0,"skipped":0,"pct":0},"functions":{"total":13,"covered":0,"skipped":0,"pct":0},"statements":{"total":75,"covered":0,"skipped":0,"pct":0},"branches":{"total":78,"covered":0,"skipped":0,"pct":0}}
}
2 changes: 1 addition & 1 deletion keeper/coverage/lcov-report/concurrency.js.html
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ <h1><a href="index.html">All files</a> concurrency.js</h1>
<div class='footer quiet pad2 space-top1 center small'>
Code coverage generated by
<a href="https://istanbul.js.org/" target="_blank" rel="noopener noreferrer">istanbul</a>
at 2026-04-29T05:10:17.735Z
at 2026-04-29T15:55:23.174Z
</div>
<script src="prettify.js"></script>
<script>
Expand Down
108 changes: 99 additions & 9 deletions keeper/coverage/lcov-report/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,30 @@ <h1>All files</h1>
<div class='clearfix'>

<div class='fl pad1y space-right2'>
<span class="strong">Unknown% </span>
<span class="strong">0% </span>
<span class="quiet">Statements</span>
<span class='fraction'>0/0</span>
<span class='fraction'>0/526</span>
</div>


<div class='fl pad1y space-right2'>
<span class="strong">Unknown% </span>
<span class="strong">0% </span>
<span class="quiet">Branches</span>
<span class='fraction'>0/0</span>
<span class='fraction'>0/314</span>
</div>


<div class='fl pad1y space-right2'>
<span class="strong">Unknown% </span>
<span class="quiet">Functions</span>
<span class='fraction'>0/0</span>
<span class='fraction'>0/86</span>
</div>


<div class='fl pad1y space-right2'>
<span class="strong">Unknown% </span>
<span class="strong">0% </span>
<span class="quiet">Lines</span>
<span class='fraction'>0/0</span>
<span class='fraction'>0/512</span>
</div>


Expand Down Expand Up @@ -78,15 +78,105 @@ <h1>All files</h1>
<th data-col="lines_raw" data-type="number" data-fmt="html" class="abs"></th>
</tr>
</thead>
<tbody></tbody>
<tbody><tr>
<td class="file low" data-value="concurrency.js"><a href="concurrency.js.html">concurrency.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="51" class="abs low">0/51</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="26" class="abs low">0/26</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="8" class="abs low">0/8</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="50" class="abs low">0/50</td>
</tr>

<tr>
<td class="file low" data-value="logger.js"><a href="logger.js.html">logger.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="33" class="abs low">0/33</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="20" class="abs low">0/20</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="18" class="abs low">0/18</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="33" class="abs low">0/33</td>
</tr>

<tr>
<td class="file low" data-value="poller.js"><a href="poller.js.html">poller.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="150" class="abs low">0/150</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="81" class="abs low">0/81</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="17" class="abs low">0/17</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="148" class="abs low">0/148</td>
</tr>

<tr>
<td class="file low" data-value="queue.js"><a href="queue.js.html">queue.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="128" class="abs low">0/128</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="55" class="abs low">0/55</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="13" class="abs low">0/13</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="126" class="abs low">0/126</td>
</tr>

<tr>
<td class="file low" data-value="registry.js"><a href="registry.js.html">registry.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="89" class="abs low">0/89</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="54" class="abs low">0/54</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="17" class="abs low">0/17</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="84" class="abs low">0/84</td>
</tr>

<tr>
<td class="file low" data-value="retry.js"><a href="retry.js.html">retry.js</a></td>
<td data-value="0" class="pic low">
<div class="chart"><div class="cover-fill" style="width: 0%"></div><div class="cover-empty" style="width: 100%"></div></div>
</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="75" class="abs low">0/75</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="78" class="abs low">0/78</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="13" class="abs low">0/13</td>
<td data-value="0" class="pct low">0%</td>
<td data-value="71" class="abs low">0/71</td>
</tr>

</tbody>
</table>
</div>
<div class='push'></div><!-- for sticky footer -->
</div><!-- /wrapper -->
<div class='footer quiet pad2 space-top1 center small'>
Code coverage generated by
<a href="https://istanbul.js.org/" target="_blank" rel="noopener noreferrer">istanbul</a>
at 2026-04-29T03:30:00.602Z
at 2026-04-29T15:55:23.174Z
</div>
<script src="prettify.js"></script>
<script>
Expand Down
Loading