Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
55c44d2
Add configurable snapshot compression with async standby support
sjmiller609 Mar 18, 2026
a61b5da
Merge origin/main into codex/snapshot-compression-defaults
sjmiller609 Mar 19, 2026
47a3197
Add CH snapshot compression restore coverage
sjmiller609 Mar 19, 2026
0d4d7a0
Skip transient compression temp files during snapshot copy
sjmiller609 Mar 19, 2026
042d0c3
Clarify async snapshot compression restore behavior
sjmiller609 Mar 19, 2026
474bfed
Reduce snapshot compression test races
sjmiller609 Mar 19, 2026
0a9523f
Restore compression test parallelism
sjmiller609 Mar 20, 2026
47fd741
Unify snapshot restore cancellation behavior
sjmiller609 Mar 20, 2026
780fcd7
Apply suggestions from code review
sjmiller609 Mar 20, 2026
78a24d0
Add snapshot compression metrics
sjmiller609 Mar 20, 2026
bc2fd60
Fix snapshot compression review feedback
sjmiller609 Mar 20, 2026
e2d691e
Update Stainless model config
sjmiller609 Mar 20, 2026
3a12b9e
Serialize shared initrd rebuilds
sjmiller609 Mar 20, 2026
84113aa
Fix disabled snapshot defaults fallback
sjmiller609 Mar 20, 2026
e21be0c
Make snapshot compression fully opt-in
sjmiller609 Mar 20, 2026
e55214b
Fix snapshot compression restore races
sjmiller609 Mar 20, 2026
c78830a
Fix optional standby body handling
sjmiller609 Mar 20, 2026
d192ba1
Fix standby snapshot compression races
sjmiller609 Mar 20, 2026
19f4aec
Normalize standby snapshot compression copies
sjmiller609 Mar 20, 2026
2eef65d
Handle optional standby bodies outside generated code
sjmiller609 Mar 20, 2026
1c0eea1
Fix snapshot compression cleanup races
sjmiller609 Mar 20, 2026
da7f421
Clarify snapshot compression metrics state
sjmiller609 Mar 20, 2026
759eb3c
Return bad request for invalid standby input
sjmiller609 Mar 20, 2026
6b42cc5
Use native-first snapshot codecs with Go fallback
sjmiller609 Mar 20, 2026
cde775f
Normalize snapshot compression algorithms case-insensitively
sjmiller609 Mar 20, 2026
c17bec4
Tighten standby compression validation handling
sjmiller609 Mar 20, 2026
54df2a1
Merge origin/main into codex/snapshot-compression-defaults
sjmiller609 Mar 20, 2026
e78c657
Reduce compression levels in integration tests to avoid CI timeout
sjmiller609 Mar 21, 2026
2a9c9bc
Reduce compression integration test cycles to fit CI timeout
sjmiller609 Mar 21, 2026
393b80a
Address PR review feedback: add OpenAPI descriptions, fix dst.Close()…
sjmiller609 Mar 21, 2026
460c265
Address review feedback: add server-side compression validation, log …
sjmiller609 Mar 23, 2026
3b9264e
Fix snapshot compression review follow-ups
sjmiller609 Mar 23, 2026
fa7d99b
Merge origin/main into codex/snapshot-compression-defaults
sjmiller609 Mar 23, 2026
8638cce
Fix standby fork compression race
sjmiller609 Mar 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/api/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func newTestService(t *testing.T) *ApiService {
limits := instances.ResourceLimits{
MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB
}
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil)
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", instances.SnapshotPolicy{}, nil, nil)

// Initialize network manager (creates bridge for network-enabled tests)
if err := networkMgr.Initialize(ctx(), nil); err != nil {
Expand Down
104 changes: 103 additions & 1 deletion cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/kernel/hypeman/lib/network"
"github.com/kernel/hypeman/lib/oapi"
"github.com/kernel/hypeman/lib/resources"
"github.com/kernel/hypeman/lib/snapshot"
"github.com/kernel/hypeman/lib/vm_metrics"
"github.com/samber/lo"
)
Expand Down Expand Up @@ -301,6 +302,16 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
SkipKernelHeaders: request.Body.SkipKernelHeaders != nil && *request.Body.SkipKernelHeaders,
SkipGuestAgent: request.Body.SkipGuestAgent != nil && *request.Body.SkipGuestAgent,
}
if request.Body.SnapshotPolicy != nil {
snapshotPolicy, err := toInstanceSnapshotPolicy(*request.Body.SnapshotPolicy)
if err != nil {
return oapi.CreateInstance400JSONResponse{
Code: "invalid_snapshot_policy",
Message: err.Error(),
}, nil
}
domainReq.SnapshotPolicy = snapshotPolicy
}

inst, err := s.InstanceManager.CreateInstance(ctx, domainReq)
if err != nil {
Expand Down Expand Up @@ -438,9 +449,26 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn
}
log := logger.FromContext(ctx)

result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id)
standbyReq := instances.StandbyInstanceRequest{}
if request.Body != nil && request.Body.Compression != nil {
compression, err := toDomainSnapshotCompressionConfig(*request.Body.Compression)
if err != nil {
return oapi.StandbyInstance400JSONResponse{
Code: "invalid_snapshot_compression",
Message: err.Error(),
}, nil
}
standbyReq.Compression = compression
}

result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id, standbyReq)
if err != nil {
switch {
case errors.Is(err, instances.ErrInvalidRequest):
return oapi.StandbyInstance400JSONResponse{
Code: "invalid_request",
Message: err.Error(),
}, nil
case errors.Is(err, instances.ErrInvalidState):
return oapi.StandbyInstance409JSONResponse{
Code: "invalid_state",
Expand Down Expand Up @@ -951,6 +979,10 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
if len(inst.Tags) > 0 {
oapiInst.Tags = toOAPITags(inst.Tags)
}
if inst.SnapshotPolicy != nil {
oapiPolicy := toOAPISnapshotPolicy(*inst.SnapshotPolicy)
oapiInst.SnapshotPolicy = &oapiPolicy
}

// Convert volume attachments
if len(inst.Volumes) > 0 {
Expand Down Expand Up @@ -985,3 +1017,73 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {

return oapiInst
}

func toDomainSnapshotCompressionConfig(cfg oapi.SnapshotCompressionConfig) (*snapshot.SnapshotCompressionConfig, error) {
out := &snapshot.SnapshotCompressionConfig{
Enabled: cfg.Enabled,
}
if cfg.Algorithm != nil {
algo := snapshot.SnapshotCompressionAlgorithm(strings.ToLower(string(*cfg.Algorithm)))
switch algo {
case snapshot.SnapshotCompressionAlgorithmZstd, snapshot.SnapshotCompressionAlgorithmLz4:
default:
return nil, fmt.Errorf("algorithm must be one of zstd or lz4, got %q", *cfg.Algorithm)
}
out.Algorithm = algo
}
if cfg.Level != nil {
level := *cfg.Level
algo := out.Algorithm
if algo == "" {
algo = snapshot.SnapshotCompressionAlgorithmZstd
}
switch algo {
case snapshot.SnapshotCompressionAlgorithmZstd:
if level < snapshot.MinSnapshotCompressionZstdLevel || level > snapshot.MaxSnapshotCompressionZstdLevel {
return nil, fmt.Errorf("level must be between %d and %d for zstd, got %d", snapshot.MinSnapshotCompressionZstdLevel, snapshot.MaxSnapshotCompressionZstdLevel, level)
}
case snapshot.SnapshotCompressionAlgorithmLz4:
if level < snapshot.MinSnapshotCompressionLz4Level || level > snapshot.MaxSnapshotCompressionLz4Level {
return nil, fmt.Errorf("level must be between %d and %d for lz4, got %d", snapshot.MinSnapshotCompressionLz4Level, snapshot.MaxSnapshotCompressionLz4Level, level)
}
}
out.Level = &level
}
return out, nil
}

func toInstanceSnapshotPolicy(policy oapi.SnapshotPolicy) (*instances.SnapshotPolicy, error) {
out := &instances.SnapshotPolicy{}
if policy.Compression != nil {
compression, err := toDomainSnapshotCompressionConfig(*policy.Compression)
if err != nil {
return nil, err
}
out.Compression = compression
}
return out, nil
}

func toOAPISnapshotCompressionConfig(cfg snapshot.SnapshotCompressionConfig) oapi.SnapshotCompressionConfig {
out := oapi.SnapshotCompressionConfig{
Enabled: cfg.Enabled,
}
if cfg.Algorithm != "" {
algo := oapi.SnapshotCompressionConfigAlgorithm(cfg.Algorithm)
out.Algorithm = &algo
}
if cfg.Level != nil {
level := *cfg.Level
out.Level = &level
}
return out
}

func toOAPISnapshotPolicy(policy instances.SnapshotPolicy) oapi.SnapshotPolicy {
out := oapi.SnapshotPolicy{}
if policy.Compression != nil {
compression := toOAPISnapshotCompressionConfig(*policy.Compression)
out.Compression = &compression
}
return out
}
58 changes: 58 additions & 0 deletions cmd/api/api/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,14 @@ type captureForkManager struct {
err error
}

type captureStandbyManager struct {
instances.Manager
lastID string
lastReq *instances.StandbyInstanceRequest
result *instances.Instance
err error
}

type captureUpdateManager struct {
instances.Manager
lastID string
Expand All @@ -222,6 +230,16 @@ func (m *captureForkManager) ForkInstance(ctx context.Context, id string, req in
return m.result, nil
}

func (m *captureStandbyManager) StandbyInstance(ctx context.Context, id string, req instances.StandbyInstanceRequest) (*instances.Instance, error) {
reqCopy := req
m.lastID = id
m.lastReq = &reqCopy
if m.err != nil {
return nil, m.err
}
return m.result, nil
}

func (m *captureUpdateManager) UpdateInstance(ctx context.Context, id string, req instances.UpdateInstanceRequest) (*instances.Instance, error) {
reqCopy := req
m.lastID = id
Expand Down Expand Up @@ -677,6 +695,46 @@ func TestForkInstance_InvalidRequest(t *testing.T) {
assert.Equal(t, "invalid_request", badReq.Code)
}

func TestStandbyInstance_InvalidRequest(t *testing.T) {
t.Parallel()
svc := newTestService(t)

source := instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "standby-src",
Name: "standby-src",
Image: "docker.io/library/alpine:latest",
CreatedAt: time.Now(),
HypervisorType: hypervisor.TypeCloudHypervisor,
},
State: instances.StateStopped,
}

mockMgr := &captureStandbyManager{
Manager: svc.InstanceManager,
err: fmt.Errorf("%w: invalid snapshot compression level", instances.ErrInvalidRequest),
}
svc.InstanceManager = mockMgr

resp, err := svc.StandbyInstance(
mw.WithResolvedInstance(ctx(), source.Id, source),
oapi.StandbyInstanceRequestObject{
Id: source.Id,
Body: &oapi.StandbyInstanceRequest{
Compression: &oapi.SnapshotCompressionConfig{
Enabled: true,
},
},
},
)
require.NoError(t, err)

badReq, ok := resp.(oapi.StandbyInstance400JSONResponse)
require.True(t, ok, "expected 400 response")
assert.Equal(t, "invalid_request", badReq.Code)
assert.Contains(t, badReq.Message, "invalid snapshot compression level")
}

func TestForkInstance_FromRunningFlagForwarded(t *testing.T) {
t.Parallel()
svc := newTestService(t)
Expand Down
47 changes: 47 additions & 0 deletions cmd/api/api/optional_standby_body.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package api

import (
"io"
"net/http"
"strings"
)

// NormalizeOptionalStandbyBody rewrites empty standby POST bodies to "{}"
// so the generated strict handler can decode them without special casing.
func NormalizeOptionalStandbyBody(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
next.ServeHTTP(w, r)
return
}
if isStandbyRoutePath(r.URL.Path) && requestBodyIsEmpty(r) {
r.Body = io.NopCloser(strings.NewReader(`{}`))
r.ContentLength = 2
if r.Header.Get("Content-Type") == "" {
r.Header.Set("Content-Type", "application/json")
}
}

next.ServeHTTP(w, r)
})
}

func isStandbyRoutePath(path string) bool {
if !strings.HasPrefix(path, "/instances/") || !strings.HasSuffix(path, "/standby") {
return false
}

instanceID := strings.TrimPrefix(path, "/instances/")
instanceID = strings.TrimSuffix(instanceID, "/standby")
return instanceID != "" && !strings.Contains(instanceID, "/")
}

func requestBodyIsEmpty(r *http.Request) bool {
if r == nil {
return true
}
if r.Body == nil || r.Body == http.NoBody {
return true
}
return r.ContentLength == 0 && len(r.TransferEncoding) == 0
}
108 changes: 108 additions & 0 deletions cmd/api/api/optional_standby_body_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package api

import (
"bytes"
"io"
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestNormalizeOptionalStandbyBody(t *testing.T) {
t.Parallel()

t.Run("empty standby body becomes empty JSON object", func(t *testing.T) {
t.Parallel()

var gotBody []byte
var gotContentType string
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var err error
gotBody, err = io.ReadAll(r.Body)
require.NoError(t, err)
gotContentType = r.Header.Get("Content-Type")
w.WriteHeader(http.StatusNoContent)
})

req := httptest.NewRequest(http.MethodPost, "/instances/test/standby", nil)
rec := httptest.NewRecorder()

NormalizeOptionalStandbyBody(next).ServeHTTP(rec, req)

assert.Equal(t, http.StatusNoContent, rec.Code)
assert.Equal(t, []byte(`{}`), gotBody)
assert.Equal(t, "application/json", gotContentType)
})

t.Run("existing standby body is preserved", func(t *testing.T) {
t.Parallel()

var gotBody []byte
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var err error
gotBody, err = io.ReadAll(r.Body)
require.NoError(t, err)
w.WriteHeader(http.StatusNoContent)
})

req := httptest.NewRequest(http.MethodPost, "/instances/test/standby", bytes.NewBufferString(`{"compression":{"enabled":true}}`))
rec := httptest.NewRecorder()

NormalizeOptionalStandbyBody(next).ServeHTTP(rec, req)

assert.Equal(t, http.StatusNoContent, rec.Code)
assert.Equal(t, []byte(`{"compression":{"enabled":true}}`), gotBody)
})

t.Run("non-standby route is untouched", func(t *testing.T) {
t.Parallel()

var gotBody []byte
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var err error
gotBody, err = io.ReadAll(r.Body)
require.NoError(t, err)
w.WriteHeader(http.StatusNoContent)
})

req := httptest.NewRequest(http.MethodPost, "/instances/test/start", nil)
rec := httptest.NewRecorder()

NormalizeOptionalStandbyBody(next).ServeHTTP(rec, req)

assert.Equal(t, http.StatusNoContent, rec.Code)
assert.Empty(t, gotBody)
})

t.Run("non-post request skips standby normalization", func(t *testing.T) {
t.Parallel()

var gotBody []byte
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var err error
gotBody, err = io.ReadAll(r.Body)
require.NoError(t, err)
w.WriteHeader(http.StatusNoContent)
})

req := httptest.NewRequest(http.MethodGet, "/instances/test/standby", nil)
rec := httptest.NewRecorder()

NormalizeOptionalStandbyBody(next).ServeHTTP(rec, req)

assert.Equal(t, http.StatusNoContent, rec.Code)
assert.Empty(t, gotBody)
})

t.Run("standby route matcher only accepts single path segment ids", func(t *testing.T) {
t.Parallel()

assert.True(t, isStandbyRoutePath("/instances/test/standby"))
assert.False(t, isStandbyRoutePath("/instances/test/start"))
assert.False(t, isStandbyRoutePath("/instances/test/standby/extra"))
assert.False(t, isStandbyRoutePath("/instances/test/nested/standby"))
})
}
Loading
Loading