Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,57 @@ func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInst
return oapi.DeleteInstance204Response{}, nil
}

// UpdateInstance updates a running instance (e.g. rotate credential secrets).
// The id parameter can be an instance ID, name, or ID prefix.
// Note: Resolution is handled by ResolveResource middleware.
func (s *ApiService) UpdateInstance(ctx context.Context, request oapi.UpdateInstanceRequestObject) (oapi.UpdateInstanceResponseObject, error) {
log := logger.FromContext(ctx)

inst := mw.GetResolvedInstance[instances.Instance](ctx)
if inst == nil {
return oapi.UpdateInstance500JSONResponse{
Code: "internal_error",
Message: "resource not resolved",
}, nil
}

env := make(map[string]string)
if request.Body.Env != nil {
env = *request.Body.Env
}

updated, err := s.InstanceManager.UpdateInstance(ctx, inst.Id, instances.UpdateInstanceRequest{
Env: env,
})
if err != nil {
switch {
case errors.Is(err, instances.ErrInvalidState):
return oapi.UpdateInstance409JSONResponse{
Code: "invalid_state",
Message: err.Error(),
}, nil
case errors.Is(err, instances.ErrInvalidRequest):
return oapi.UpdateInstance400JSONResponse{
Code: "invalid_request",
Message: err.Error(),
}, nil
case errors.Is(err, instances.ErrNotFound):
return oapi.UpdateInstance404JSONResponse{
Code: "not_found",
Message: err.Error(),
}, nil
default:
log.ErrorContext(ctx, "failed to update instance", "error", err, "instance_id", inst.Id)
return oapi.UpdateInstance500JSONResponse{
Code: "internal_error",
Message: "failed to update instance",
}, nil
}
}

return oapi.UpdateInstance200JSONResponse(instanceToOAPI(*updated)), nil
}

// StandbyInstance puts an instance in standby (pause, snapshot, delete VMM)
// The id parameter can be an instance ID, name, or ID prefix
// Note: Resolution is handled by ResolveResource middleware
Expand Down
4 changes: 4 additions & 0 deletions lib/builds/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ func (m *mockInstanceManager) StopInstance(ctx context.Context, id string) (*ins
return nil, instances.ErrNotFound
}

func (m *mockInstanceManager) UpdateInstance(ctx context.Context, id string, req instances.UpdateInstanceRequest) (*instances.Instance, error) {
return nil, nil
}

func (m *mockInstanceManager) StartInstance(ctx context.Context, id string, req instances.StartInstanceRequest) (*instances.Instance, error) {
return nil, nil
}
Expand Down
21 changes: 21 additions & 0 deletions lib/egressproxy/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,27 @@ func compileHeaderInjectRules(cfgRules []HeaderInjectRuleConfig) ([]headerInject
return out, nil
}

// UpdateInstanceInjectRules atomically replaces the header injection rules
// for an already-registered instance without re-applying iptables enforcement.
// Returns an error if the instance is not currently registered.
func (s *Service) UpdateInstanceInjectRules(instanceID string, rules []HeaderInjectRuleConfig) error {
compiled, err := compileHeaderInjectRules(rules)
if err != nil {
return err
}

s.mu.Lock()
defer s.mu.Unlock()

sourceIP, ok := s.sourceIPByInstance[instanceID]
if !ok {
return fmt.Errorf("instance %s is not registered with the egress proxy", instanceID)
}

s.policiesBySourceIP[sourceIP] = sourcePolicy{headerInjectRules: compiled}
return nil
}

func (s *Service) UnregisterInstance(_ context.Context, instanceID string) {
s.mu.Lock()
sourceIP, ok := s.sourceIPByInstance[instanceID]
Expand Down
64 changes: 64 additions & 0 deletions lib/instances/egress_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/kernel/hypeman/lib/egressproxy"
"github.com/kernel/hypeman/lib/logger"
"github.com/kernel/hypeman/lib/network"
)

Expand Down Expand Up @@ -237,6 +238,69 @@ func (m *manager) maybeRegisterEgressProxy(ctx context.Context, stored *StoredMe
return &guestCfg, nil
}

// updateInstance merges new env values into the stored metadata and refreshes
// the egress proxy inject rules so credential rotations take effect immediately
// on a running instance.
func (m *manager) updateInstance(ctx context.Context, id string, req UpdateInstanceRequest) (*Instance, error) {
log := logger.FromContext(ctx)

meta, err := m.loadMetadata(id)
if err != nil {
return nil, ErrNotFound
}

// Derive state to ensure instance is running
inst := m.toInstance(ctx, meta)
if inst.State != StateRunning && inst.State != StateInitializing {
return nil, fmt.Errorf("%w: instance must be running to update (current state: %s)", ErrInvalidState, inst.State)
}

if len(req.Env) == 0 {
return &inst, nil
}

// Merge new env values into existing env
if meta.Env == nil {
meta.Env = make(map[string]string)
}
for k, v := range req.Env {
meta.Env[k] = v
}

// Validate that credential env bindings are still satisfied after the merge
if len(meta.Credentials) > 0 {
if err := validateCredentialEnvBindings(meta.Credentials, meta.Env); err != nil {
return nil, err
}
}

// Update egress proxy inject rules if egress proxy is active
if meta.NetworkEgress != nil && meta.NetworkEgress.Enabled && len(meta.Credentials) > 0 {
m.egressProxyMu.Lock()
svc := m.egressProxy
m.egressProxyMu.Unlock()

if svc != nil {
newRules := buildEgressProxyInjectRules(meta.NetworkEgress, meta.Credentials, meta.Env)
if err := svc.UpdateInstanceInjectRules(id, newRules); err != nil {
log.ErrorContext(ctx, "failed to update egress proxy inject rules", "instance_id", id, "error", err)
return nil, fmt.Errorf("update egress proxy inject rules: %w", err)
}
log.InfoContext(ctx, "updated egress proxy inject rules", "instance_id", id)
}
}

// Persist updated metadata
if err := m.saveMetadata(meta); err != nil {
log.ErrorContext(ctx, "failed to save metadata after update", "instance_id", id, "error", err)
return nil, fmt.Errorf("save metadata: %w", err)
}

updated := m.toInstance(ctx, meta)
log.InfoContext(ctx, "instance updated", "instance_id", id)
return &updated, nil
}

func (m *manager) unregisterEgressProxyInstance(ctx context.Context, instanceID string) {
_ = ctx
m.egressProxyMu.Lock()
Expand Down
122 changes: 122 additions & 0 deletions lib/instances/egress_proxy_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,128 @@ func TestEgressProxyRewritesHTTPSHeaders(t *testing.T) {
deleted = true
}

func TestEgressProxySecretUpdateOnRunningInstance(t *testing.T) {
requireKVMAccess(t)

manager, _ := setupTestManager(t)
ctx := context.Background()

caPEM, cert := mustGenerateTLSChain(t, []string{"localhost"})
manager.egressProxyServiceOptions = egressproxy.ServiceOptions{
AdditionalRootCAPEM: []string{caPEM},
}

target := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = fmt.Fprint(w, r.Header.Get("Authorization"))
}))
target.TLS = &tls.Config{Certificates: []tls.Certificate{cert}}
target.StartTLS()
defer target.Close()
targetHostPort := strings.TrimPrefix(target.URL, "https://")
targetHost, targetPort, err := net.SplitHostPort(targetHostPort)
require.NoError(t, err)

imageRef := integrationTestImageRef(t, "docker.io/library/nginx:alpine")
t.Logf("Pulling %s image...", imageRef)
created, err := manager.imageManager.CreateImage(ctx, images.CreateImageRequest{Name: imageRef})
require.NoError(t, err)

for i := 0; i < 120; i++ {
img, err := manager.imageManager.GetImage(ctx, created.Name)
if err == nil && img.Status == images.StatusReady {
break
}
time.Sleep(1 * time.Second)
}
img, err := manager.imageManager.GetImage(ctx, created.Name)
require.NoError(t, err)
require.Equal(t, images.StatusReady, img.Status)

require.NoError(t, manager.systemManager.EnsureSystemFiles(ctx))
require.NoError(t, manager.networkManager.Initialize(ctx, nil))

inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
Name: "test-egress-update",
Image: imageRef,
Size: 2 * 1024 * 1024 * 1024,
HotplugSize: 512 * 1024 * 1024,
OverlaySize: 5 * 1024 * 1024 * 1024,
Vcpus: 1,
NetworkEnabled: true,
NetworkEgress: &NetworkEgressPolicy{
Enabled: true,
},
Credentials: map[string]CredentialPolicy{
"OUTBOUND_OPENAI_KEY": {
Source: CredentialSource{Env: "OUTBOUND_OPENAI_KEY"},
Inject: []CredentialInjectRule{
{
Hosts: []string{"127.0.0.1"},
As: CredentialInjectAs{
Header: "Authorization",
Format: "Bearer ${value}",
},
},
},
},
},
Env: map[string]string{
"OUTBOUND_OPENAI_KEY": "original-key-111",
},
Entrypoint: []string{"/bin/sh", "-lc"},
Cmd: []string{"sleep 3600"},
})
require.NoError(t, err)

deleted := false
t.Cleanup(func() {
if !deleted {
_ = manager.DeleteInstance(context.Background(), inst.Id)
}
})

require.NoError(t, waitForVMReady(ctx, inst.SocketPath, 10*time.Second))
require.NoError(t, waitForLogMessage(ctx, manager, inst.Id, "[guest-agent] listening", 45*time.Second))

// Step 1: Verify initial credential is injected
curlCmd := fmt.Sprintf(
"NO_PROXY= no_proxy= curl -k -sS https://%s:%s",
targetHost, targetPort,
)
output, exitCode, err := execCommand(ctx, inst, "sh", "-lc", curlCmd)
require.NoError(t, err)
require.Equal(t, 0, exitCode, "curl output: %s", output)
require.Contains(t, output, "Bearer original-key-111")
t.Log("Initial credential injection verified")

// Step 2: Rotate the secret via UpdateInstance
updated, err := manager.UpdateInstance(ctx, inst.Id, UpdateInstanceRequest{
Env: map[string]string{
"OUTBOUND_OPENAI_KEY": "rotated-key-222",
},
})
require.NoError(t, err)
require.Equal(t, "rotated-key-222", updated.Env["OUTBOUND_OPENAI_KEY"])
t.Log("Secret updated via UpdateInstance")

// Step 3: Verify the rotated credential is now injected by the proxy
output, exitCode, err = execCommand(ctx, inst, "sh", "-lc", curlCmd)
require.NoError(t, err)
require.Equal(t, 0, exitCode, "curl output: %s", output)
require.Contains(t, output, "Bearer rotated-key-222")
t.Log("Rotated credential injection verified")

// Step 4: Verify the guest still sees the mock value (not the real secret)
envOutput, envExitCode, err := execCommand(ctx, inst, "sh", "-lc", "printf '%s' \"$OUTBOUND_OPENAI_KEY\"")
require.NoError(t, err)
require.Equal(t, 0, envExitCode)
require.Equal(t, "mock-OUTBOUND_OPENAI_KEY", envOutput)
t.Log("Guest env still shows mock value after rotation")

require.NoError(t, manager.DeleteInstance(ctx, inst.Id))
deleted = true
}

func mustGenerateTLSChain(t *testing.T, dnsNames []string) (string, tls.Certificate) {
t.Helper()

Expand Down
10 changes: 10 additions & 0 deletions lib/instances/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type Manager interface {
StandbyInstance(ctx context.Context, id string) (*Instance, error)
RestoreInstance(ctx context.Context, id string) (*Instance, error)
RestoreSnapshot(ctx context.Context, id string, snapshotID string, req RestoreSnapshotRequest) (*Instance, error)
UpdateInstance(ctx context.Context, id string, req UpdateInstanceRequest) (*Instance, error)
StopInstance(ctx context.Context, id string) (*Instance, error)
StartInstance(ctx context.Context, id string, req StartInstanceRequest) (*Instance, error)
StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error)
Expand Down Expand Up @@ -297,6 +298,15 @@ func (m *manager) RestoreSnapshot(ctx context.Context, id string, snapshotID str
return m.restoreSnapshot(ctx, id, snapshotID, req)
}

// UpdateInstance updates a running instance's env values and refreshes egress
// proxy inject rules. Used for credential key rotation without instance restart.
func (m *manager) UpdateInstance(ctx context.Context, id string, req UpdateInstanceRequest) (*Instance, error) {
lock := m.getInstanceLock(id)
lock.Lock()
defer lock.Unlock()
return m.updateInstance(ctx, id, req)
}

// StopInstance gracefully stops a running instance
func (m *manager) StopInstance(ctx context.Context, id string) (*Instance, error) {
lock := m.getInstanceLock(id)
Expand Down
6 changes: 6 additions & 0 deletions lib/instances/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ type CreateInstanceRequest struct {
SkipGuestAgent bool // Skip guest-agent installation (disables exec/stat API)
}

// UpdateInstanceRequest is the domain request for updating a running instance.
// Currently supports updating env values for credential key rotation.
type UpdateInstanceRequest struct {
Env map[string]string // Env values to merge into existing env (only credential-source keys are required)
}

// StartInstanceRequest is the domain request for starting a stopped instance
type StartInstanceRequest struct {
Entrypoint []string // Override entrypoint (nil = keep previous/image default)
Expand Down
Loading
Loading