Skip to content
Merged
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,29 @@ First release candidate. Surface is end-to-end working for the
Production deployment against an untrusted publisher requires the
catalog-signing chain landed in a follow-up RC (see "Known gaps").

### Security & hardening — broker + supervisor

- **Broker authorization (deny-by-default).** Every brokered call is
gated before dialing an app socket: the method must be in the target's
`exposes` set, and cross-app `ipc.call` callers must hold a matching
grant (`<app>.<method>`, `<app>.*`, or `*`). New `Service.CallFrom` and
`manifest.ExposesMethod` / `HasGrant`; errors `ErrMethodNotExposed`,
`ErrGrantMissing`.
- **TOCTOU re-verification at spawn.** The binary is re-checked
immediately before `exec` (symlink rejection + sha256), closing the gap
between install-scan and launch.
- **Exponential verify-fail backoff.** Verification failures retry with
capped exponential backoff (was a fixed 30s), consistent with crash-loop
handling.
- **Multi-generation audit log rotation.** `supervisor.log` rotates across
N generations (`AuditLogMaxBackups`, default 3) instead of a single step.
- **Address-space cap (Linux).** Spawned apps get `RLIMIT_AS` alongside
`RLIMIT_NOFILE`, configurable via `Config.ChildMemoryLimitBytes`
(default 4 GiB); no-op on non-Linux.
- **Extension DoS guards.** `pkg/extend` adds per-app hook-dispatch rate
limiting (`Registry.SetRateLimit`, `ErrRateLimited`) and a per-app cap on
dynamic registrations (`ErrTooManyRegistrations`).

### Added — pilotctl

`pilotctl appstore` is the operator surface for the app store.
Expand Down
47 changes: 47 additions & 0 deletions pkg/extend/extend.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,17 @@ type Registry struct {

mu sync.RWMutex
byPoint map[HookPoint][]Extension

// limiter, when non-nil, rate-limits hook dispatch per app in Run.
// Nil = unlimited (default); enable via SetRateLimit.
limiter *rateLimiter
}

// ErrRateLimited is returned by Run when an app's hook-invocation rate
// budget is exhausted. The chain aborts rather than dispatch into an app
// that's being called too aggressively.
var ErrRateLimited = errors.New("extend: hook rate limit exceeded")

// NewRegistry constructs an empty Registry. dispatch is required.
func NewRegistry(dispatch Dispatcher) *Registry {
if dispatch == nil {
Expand All @@ -133,6 +142,38 @@ func NewRegistry(dispatch Dispatcher) *Registry {
}
}

// SetRateLimit enables per-app hook-dispatch rate limiting: each app may
// fire `burst` hook invocations instantly and `ratePerSec` sustained
// thereafter. Once an app's budget is exhausted, Run aborts that app's
// hook with ErrRateLimited. Call with a non-positive rate or burst to
// disable. Intended to be set once at daemon wire-up.
func (r *Registry) SetRateLimit(ratePerSec float64, burst int) {
r.mu.Lock()
defer r.mu.Unlock()
if ratePerSec <= 0 || burst <= 0 {
r.limiter = nil
return
}
r.limiter = newRateLimiter(ratePerSec, burst)
}

// CountForApp returns the number of currently-registered extensions
// belonging to appID across all hook points. Used to bound how many
// dynamic registrations a single app may hold.
func (r *Registry) CountForApp(appID string) int {
r.mu.RLock()
defer r.mu.RUnlock()
n := 0
for _, list := range r.byPoint {
for _, ext := range list {
if ext.AppID == appID {
n++
}
}
}
return n
}

// Register adds one extension to the registry. Returns an error if
// the primitive is shape-invalid, the method is empty, or a flag is
// shaped wrong. Multiple apps may register for the same hook point;
Expand Down Expand Up @@ -239,11 +280,17 @@ func (r *Registry) FlagsFor(p HookPoint) []FlagSpec {
// the hook may have populated, then cleared.
func (r *Registry) Run(ctx context.Context, p HookPoint, args HookArgs) (HookArgs, error) {
hooks := r.HooksFor(p)
r.mu.RLock()
lim := r.limiter
r.mu.RUnlock()
current := args
for _, h := range hooks {
if err := ctx.Err(); err != nil {
return current, err
}
if lim != nil && !lim.allow(h.AppID) {
return current, fmt.Errorf("extend %s: hook %s.%s: %w", p, h.AppID, h.Method, ErrRateLimited)
}
next, err := r.dispatch(ctx, h.AppID, h.Method, cloneArgs(current))
if err != nil {
return current, fmt.Errorf("extend %s: hook %s.%s: %w", p, h.AppID, h.Method, err)
Expand Down
60 changes: 60 additions & 0 deletions pkg/extend/ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package extend

import (
"sync"
"time"
)

// rateLimiter is a per-key token bucket used to bound how often the
// registry dispatches into any single app's hooks. A misbehaving or
// hostile app whose hooks fire on every primitive cannot turn the hook
// chain into a DoS amplifier: once its bucket is empty, further hook
// invocations are refused until it refills.
type rateLimiter struct {
mu sync.Mutex
rate float64 // tokens added per second
burst float64 // bucket capacity
now func() time.Time
buckets map[string]*tokenBucket
}

type tokenBucket struct {
tokens float64
last time.Time
}

// newRateLimiter builds a limiter allowing `burst` events instantly and
// `ratePerSec` sustained thereafter, per key.
func newRateLimiter(ratePerSec float64, burst int) *rateLimiter {
return &rateLimiter{
rate: ratePerSec,
burst: float64(burst),
now: time.Now,
buckets: map[string]*tokenBucket{},
}
}

// allow reports whether one event for key may proceed now, consuming a
// token if so. Refills lazily based on elapsed wall-clock time.
func (rl *rateLimiter) allow(key string) bool {
rl.mu.Lock()
defer rl.mu.Unlock()
t := rl.now()
b := rl.buckets[key]
if b == nil {
b = &tokenBucket{tokens: rl.burst, last: t}
rl.buckets[key] = b
}
if elapsed := t.Sub(b.last).Seconds(); elapsed > 0 {
b.tokens += elapsed * rl.rate
if b.tokens > rl.burst {
b.tokens = rl.burst
}
b.last = t
}
if b.tokens >= 1 {
b.tokens--
return true
}
return false
}
25 changes: 25 additions & 0 deletions pkg/extend/ratelimit_disable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package extend

import (
"context"
"testing"
)

// TestSetRateLimit_DisableClearsLimiter covers the disable branch
// (non-positive rate/burst clears the limiter): a previously-enabled
// limiter is removed and Run stops rate-limiting.
func TestSetRateLimit_DisableClearsLimiter(t *testing.T) {
t.Parallel()
r := NewRegistry(noopDispatch)
if err := r.Register(Extension{AppID: "a", Primitive: PreSendMessage, Method: "h"}); err != nil {
t.Fatal(err)
}
r.SetRateLimit(1, 1) // enable, tiny budget
r.SetRateLimit(0, 0) // disable again

for i := 0; i < 50; i++ {
if _, err := r.Run(context.Background(), PreSendMessage, HookArgs{}); err != nil {
t.Fatalf("disabled limiter must not block (call %d): %v", i, err)
}
}
}
126 changes: 126 additions & 0 deletions pkg/extend/ratelimit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package extend

import (
"context"
"errors"
"fmt"
"testing"
"time"
)

// noopDispatch is a Dispatcher that echoes args back with no error.
func noopDispatch(_ context.Context, _, _ string, a HookArgs) (HookArgs, error) {
return a, nil
}

// TestRun_RateLimitsPerApp asserts that once an app's burst budget is
// spent, Run refuses further hook dispatch with ErrRateLimited, and that
// the budget refills as the (injected) clock advances.
func TestRun_RateLimitsPerApp(t *testing.T) {
t.Parallel()
r := NewRegistry(noopDispatch)
if err := r.Register(Extension{AppID: "io.spammer", Primitive: PreSendMessage, Method: "h"}); err != nil {
t.Fatal(err)
}
// 1 token/sec, burst 3.
r.SetRateLimit(1, 3)

// Pin the clock so refill is deterministic.
base := time.Unix(1_000_000, 0)
now := base
r.mu.Lock()
r.limiter.now = func() time.Time { return now }
r.mu.Unlock()

ctx := context.Background()
// First 3 calls consume the burst.
for i := 0; i < 3; i++ {
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); err != nil {
t.Fatalf("call %d should pass, got %v", i, err)
}
}
// 4th call (no time elapsed) is rate-limited.
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); !errors.Is(err, ErrRateLimited) {
t.Fatalf("4th call err = %v, want ErrRateLimited", err)
}
// Advance 1s → exactly one token refills → one call passes, next fails.
now = base.Add(time.Second)
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); err != nil {
t.Fatalf("post-refill call should pass, got %v", err)
}
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); !errors.Is(err, ErrRateLimited) {
t.Fatalf("call after refill drained err = %v, want ErrRateLimited", err)
}
}

// TestRun_RateLimitIsPerApp confirms one app exhausting its budget does
// not block a different app's hooks.
func TestRun_RateLimitIsPerApp(t *testing.T) {
t.Parallel()
r := NewRegistry(noopDispatch)
_ = r.Register(Extension{AppID: "a", Primitive: PreSendMessage, Method: "h", Order: 1})
_ = r.Register(Extension{AppID: "b", Primitive: PostRecvMessage, Method: "h", Order: 1})
r.SetRateLimit(1, 1)
now := time.Unix(2_000_000, 0)
r.mu.Lock()
r.limiter.now = func() time.Time { return now }
r.mu.Unlock()

ctx := context.Background()
// Drain app "a".
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); err != nil {
t.Fatal(err)
}
if _, err := r.Run(ctx, PreSendMessage, HookArgs{}); !errors.Is(err, ErrRateLimited) {
t.Fatalf("app a should be limited, got %v", err)
}
// App "b" still has its own full budget.
if _, err := r.Run(ctx, PostRecvMessage, HookArgs{}); err != nil {
t.Fatalf("app b should not be limited, got %v", err)
}
}

// TestRun_NoLimiterByDefault confirms back-compat: without SetRateLimit,
// many invocations all pass.
func TestRun_NoLimiterByDefault(t *testing.T) {
t.Parallel()
r := NewRegistry(noopDispatch)
_ = r.Register(Extension{AppID: "a", Primitive: PreSendMessage, Method: "h"})
for i := 0; i < 1000; i++ {
if _, err := r.Run(context.Background(), PreSendMessage, HookArgs{}); err != nil {
t.Fatalf("unlimited registry should never rate-limit, got %v at %d", err, i)
}
}
}

// TestDaemonHandler_CapsDynamicRegistrations asserts an app cannot exceed
// maxDynamicRegistrationsPerApp dynamic hook registrations.
func TestDaemonHandler_CapsDynamicRegistrations(t *testing.T) {
t.Parallel()
reg := NewRegistry(noopDispatch)
h := NewDaemonHandler(reg, AllowAll)

for i := 0; i < maxDynamicRegistrationsPerApp; i++ {
err := h.Register("io.greedy", Extension{
Primitive: PreSendMessage,
Method: fmt.Sprintf("m%d", i),
})
if err != nil {
t.Fatalf("registration %d should succeed, got %v", i, err)
}
}
// One past the cap must be refused.
err := h.Register("io.greedy", Extension{Primitive: PreSendMessage, Method: "overflow"})
if !errors.Is(err, ErrTooManyRegistrations) {
t.Fatalf("over-cap registration err = %v, want ErrTooManyRegistrations", err)
}
// A different app is unaffected.
if err := h.Register("io.modest", Extension{Primitive: PreSendMessage, Method: "m"}); err != nil {
t.Fatalf("other app should still register, got %v", err)
}
// After unregistering one of greedy's hooks, it can register again.
reg.UnregisterOne("io.greedy", PreSendMessage, "m0")
if err := h.Register("io.greedy", Extension{Primitive: PreSendMessage, Method: "again"}); err != nil {
t.Fatalf("after freeing a slot, registration should succeed, got %v", err)
}
}
17 changes: 17 additions & 0 deletions pkg/extend/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ var DenyAll Permission = PermissionFunc(func(string, HookPoint) bool { return fa
// configured Permission rejects the request.
var ErrPermissionDenied = errors.New("extend: permission denied")

// maxDynamicRegistrationsPerApp bounds how many hooks a single app may
// hold at once via the runtime-register IPC. Without a cap, an app
// permitted to register dynamically could register unboundedly and
// exhaust memory / slow every primitive's hook lookup — a DoS. The
// cap is generous (a well-behaved app registers a handful of hooks)
// while refusing pathological growth.
const maxDynamicRegistrationsPerApp = 32

// ErrTooManyRegistrations is returned by DaemonHandler.Register when an
// app already holds maxDynamicRegistrationsPerApp hooks.
var ErrTooManyRegistrations = errors.New("extend: too many dynamic registrations for app")

// DaemonHandler is the runtime-side IPC surface the daemon exposes to
// installed apps so they can add/remove their own hooks dynamically
// (within the bounds Permission allows). Apps call these methods via
Expand Down Expand Up @@ -68,6 +80,11 @@ func (h *DaemonHandler) Register(appID string, ext Extension) error {
if !h.perms.CanRegister(appID, ext.Primitive) {
return fmt.Errorf("%w: %s cannot register %s", ErrPermissionDenied, appID, ext.Primitive)
}
// Bound the number of hooks one app may hold to prevent unbounded
// dynamic registration (memory + per-primitive lookup-cost DoS).
if h.reg.CountForApp(appID) >= maxDynamicRegistrationsPerApp {
return fmt.Errorf("%w: %s holds %d (max %d)", ErrTooManyRegistrations, appID, h.reg.CountForApp(appID), maxDynamicRegistrationsPerApp)
}
return h.reg.Register(ext)
}

Expand Down
Loading
Loading