diff --git a/Makefile b/Makefile index 7264de7f..30fa33f2 100644 --- a/Makefile +++ b/Makefile @@ -217,7 +217,9 @@ lib/system/init/init: lib/system/init/*.go @echo "Building init binary for Linux..." cd lib/system/init && CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o init . -build-embedded: lib/system/guest_agent/guest-agent lib/system/init/init +build-embedded: + @$(MAKE) -B lib/system/guest_agent/guest-agent + @$(MAKE) -B lib/system/init/init # Build the binary build: @@ -305,15 +307,18 @@ test-guestmemory-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-ca @TEST_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$$PATH"; \ GUESTMEM_TIMEOUT="$${GUESTMEMORY_TEST_TIMEOUT:-15m}"; \ echo "Running manual guest memory integration tests (CloudHypervisor, QEMU, Firecracker)"; \ - sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" "HYPEMAN_RUN_GUESTMEMORY_TESTS=1" \ - go test -tags containers_image_openpgp -run='^TestGuestMemoryPolicy(CloudHypervisor|QEMU|Firecracker)$$' -timeout="$$GUESTMEM_TIMEOUT" ./lib/instances + for TEST_NAME in TestGuestMemoryPolicyCloudHypervisor TestGuestMemoryPolicyQEMU TestGuestMemoryPolicyFirecracker; do \ + echo "Running $$TEST_NAME"; \ + sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" "HYPEMAN_RUN_GUESTMEMORY_TESTS=1" \ + go test -count=1 -tags containers_image_openpgp -run="^$$TEST_NAME$$" -timeout="$$GUESTMEM_TIMEOUT" ./lib/instances || exit $$?; \ + done # Manual-only guest memory policy integration test (macOS VZ). test-guestmemory-vz: build-embedded sign-vz-shim @echo "Running manual guest memory integration test (VZ)"; \ PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" \ HYPEMAN_RUN_GUESTMEMORY_TESTS=1 \ - go test -tags containers_image_openpgp -run='^TestGuestMemoryPolicyVZ$$' -timeout=$(TEST_TIMEOUT) ./lib/instances + go test -count=1 -tags containers_image_openpgp -run='^TestGuestMemoryPolicyVZ$$' -timeout=$(TEST_TIMEOUT) ./lib/instances # Generate JWT token for testing # Usage: make gen-jwt [USER_ID=test-user] diff --git a/cmd/api/api/api.go b/cmd/api/api/api.go index efb65eb1..47f828ed 100644 --- a/cmd/api/api/api.go +++ b/cmd/api/api/api.go @@ -4,6 +4,7 @@ import ( "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/builds" "github.com/kernel/hypeman/lib/devices" + "github.com/kernel/hypeman/lib/guestmemory" "github.com/kernel/hypeman/lib/images" "github.com/kernel/hypeman/lib/ingress" "github.com/kernel/hypeman/lib/instances" @@ -16,16 +17,17 @@ import ( // ApiService implements the oapi.StrictServerInterface type ApiService struct { - Config *config.Config - ImageManager images.Manager - InstanceManager instances.Manager - VolumeManager volumes.Manager - NetworkManager network.Manager - DeviceManager devices.Manager - IngressManager ingress.Manager - BuildManager builds.Manager - ResourceManager *resources.Manager - VMMetricsManager *vm_metrics.Manager + Config *config.Config + ImageManager images.Manager + InstanceManager instances.Manager + VolumeManager volumes.Manager + NetworkManager network.Manager + DeviceManager devices.Manager + IngressManager ingress.Manager + BuildManager builds.Manager + ResourceManager *resources.Manager + GuestMemoryController guestmemory.Controller + VMMetricsManager *vm_metrics.Manager } var _ oapi.StrictServerInterface = (*ApiService)(nil) @@ -41,18 +43,20 @@ func New( ingressManager ingress.Manager, buildManager builds.Manager, resourceManager *resources.Manager, + guestMemoryController guestmemory.Controller, vmMetricsManager *vm_metrics.Manager, ) *ApiService { return &ApiService{ - Config: config, - ImageManager: imageManager, - InstanceManager: instanceManager, - VolumeManager: volumeManager, - NetworkManager: networkManager, - DeviceManager: deviceManager, - IngressManager: ingressManager, - BuildManager: buildManager, - ResourceManager: resourceManager, - VMMetricsManager: vmMetricsManager, + Config: config, + ImageManager: imageManager, + InstanceManager: instanceManager, + VolumeManager: volumeManager, + NetworkManager: networkManager, + DeviceManager: deviceManager, + IngressManager: ingressManager, + BuildManager: buildManager, + ResourceManager: resourceManager, + GuestMemoryController: guestMemoryController, + VMMetricsManager: vmMetricsManager, } } diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index a6c2edcf..a763b630 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -112,6 +112,13 @@ func ctx() context.Context { return context.Background() } +func integrationTestTimeout(timeout time.Duration) time.Duration { + if os.Getenv("CI") == "true" && timeout < 45*time.Second { + return 45 * time.Second + } + return timeout +} + // ctxWithInstance creates a context with a resolved instance (simulates ResolveResource middleware) func ctxWithInstance(svc *ApiService, idOrName string) context.Context { inst, err := svc.InstanceManager.GetInstance(ctx(), idOrName) diff --git a/cmd/api/api/cp_test.go b/cmd/api/api/cp_test.go index 0fee9eff..766cbd33 100644 --- a/cmd/api/api/cp_test.go +++ b/cmd/api/api/cp_test.go @@ -67,7 +67,7 @@ func TestCpToAndFromInstance(t *testing.T) { // Wait for guest-agent to be ready t.Log("Waiting for guest-agent to start...") agentReady := false - agentTimeout := time.After(15 * time.Second) + agentTimeout := time.After(integrationTestTimeout(15 * time.Second)) agentTicker := time.NewTicker(500 * time.Millisecond) defer agentTicker.Stop() @@ -207,7 +207,7 @@ func TestCpDirectoryToInstance(t *testing.T) { // Wait for guest-agent t.Log("Waiting for guest-agent...") agentReady := false - agentTimeout := time.After(15 * time.Second) + agentTimeout := time.After(integrationTestTimeout(15 * time.Second)) agentTicker := time.NewTicker(500 * time.Millisecond) defer agentTicker.Stop() diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index b460e2fa..c679fa81 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -68,7 +68,7 @@ func TestExecInstanceNonTTY(t *testing.T) { // Wait for nginx to be fully started (poll console logs) t.Log("Waiting for nginx to start...") nginxReady := false - nginxTimeout := time.After(15 * time.Second) + nginxTimeout := time.After(integrationTestTimeout(15 * time.Second)) nginxTicker := time.NewTicker(500 * time.Millisecond) defer nginxTicker.Stop() @@ -227,7 +227,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // This is the key difference: we wait for guest-agent, not the app (which exits immediately) t.Log("Waiting for guest-agent to start...") execAgentReady := false - agentTimeout := time.After(15 * time.Second) + agentTimeout := time.After(integrationTestTimeout(15 * time.Second)) agentTicker := time.NewTicker(500 * time.Millisecond) defer agentTicker.Stop() diff --git a/cmd/api/api/resources_reclaim.go b/cmd/api/api/resources_reclaim.go new file mode 100644 index 00000000..3a66343a --- /dev/null +++ b/cmd/api/api/resources_reclaim.go @@ -0,0 +1,180 @@ +package api + +import ( + "context" + "errors" + "time" + + "github.com/kernel/hypeman/lib/guestmemory" + "github.com/kernel/hypeman/lib/logger" + "github.com/kernel/hypeman/lib/oapi" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +const ( + defaultMemoryReclaimHold = 5 * time.Minute + maxMemoryReclaimHold = 1 * time.Hour +) + +// ReclaimMemory triggers proactive guest memory reclaim via runtime ballooning. +func (s *ApiService) ReclaimMemory(ctx context.Context, request oapi.ReclaimMemoryRequestObject) (oapi.ReclaimMemoryResponseObject, error) { + log := logger.FromContext(ctx) + if request.Body == nil { + return oapi.ReclaimMemory400JSONResponse{ + Code: "bad_request", + Message: "request body is required", + }, nil + } + if s.GuestMemoryController == nil { + return oapi.ReclaimMemory500JSONResponse{ + Code: "internal_error", + Message: "guest memory controller not initialized", + }, nil + } + + holdFor, err := parseMemoryReclaimHold(request.Body) + if err != nil { + return oapi.ReclaimMemory400JSONResponse{ + Code: "bad_request", + Message: err.Error(), + }, nil + } + + tracer := otel.Tracer("hypeman/guestmemory") + ctx, span := tracer.Start(ctx, "guestmemory.manual_reclaim", + traceAttrsForManualReclaim(request.Body.ReclaimBytes, holdFor, request.Body.DryRun != nil && *request.Body.DryRun, request.Body.Reason != nil)) + defer span.End() + + log.InfoContext(ctx, + "manual guest memory reclaim requested", + "operation", "manual_reclaim", + "requested_reclaim_bytes", request.Body.ReclaimBytes, + "hold_for_seconds", holdFor.Seconds(), + "dry_run", request.Body.DryRun != nil && *request.Body.DryRun, + "reason_present", request.Body.Reason != nil, + ) + + resp, err := s.GuestMemoryController.TriggerReclaim(ctx, guestmemory.ManualReclaimRequest{ + ReclaimBytes: request.Body.ReclaimBytes, + HoldFor: holdFor, + DryRun: request.Body.DryRun != nil && *request.Body.DryRun, + Reason: derefString(request.Body.Reason), + }) + if err != nil { + switch { + case errors.Is(err, guestmemory.ErrGuestMemoryDisabled), errors.Is(err, guestmemory.ErrActiveBallooningDisabled): + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + log.WarnContext(ctx, "manual guest memory reclaim rejected", "operation", "manual_reclaim", "error", err) + return oapi.ReclaimMemory400JSONResponse{ + Code: "feature_disabled", + Message: err.Error(), + }, nil + default: + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + log.ErrorContext(ctx, "manual guest memory reclaim failed", "operation", "manual_reclaim", "error", err) + return oapi.ReclaimMemory500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + } + + span.SetAttributes( + attribute.Int64("planned_reclaim_bytes", resp.PlannedReclaimBytes), + attribute.Int64("applied_reclaim_bytes", resp.AppliedReclaimBytes), + attribute.Int64("host_available_bytes", resp.HostAvailableBytes), + attribute.String("host_pressure_state", string(resp.HostPressureState)), + attribute.Int("action_count", len(resp.Actions)), + ) + span.SetStatus(codes.Ok, "") + log.InfoContext(ctx, + "manual guest memory reclaim completed", + "operation", "manual_reclaim", + "planned_reclaim_bytes", resp.PlannedReclaimBytes, + "applied_reclaim_bytes", resp.AppliedReclaimBytes, + "host_available_bytes", resp.HostAvailableBytes, + "host_pressure_state", resp.HostPressureState, + "action_count", len(resp.Actions), + ) + + return oapi.ReclaimMemory200JSONResponse(memoryReclaimResponseToOAPI(resp)), nil +} + +func traceAttrsForManualReclaim(reclaimBytes int64, holdFor time.Duration, dryRun bool, reasonPresent bool) trace.SpanStartOption { + return trace.WithAttributes( + attribute.Int64("requested_reclaim_bytes", reclaimBytes), + attribute.Float64("hold_for_seconds", holdFor.Seconds()), + attribute.Bool("dry_run", dryRun), + attribute.Bool("reason_present", reasonPresent), + ) +} + +func parseMemoryReclaimHold(req *oapi.MemoryReclaimRequest) (time.Duration, error) { + if req == nil { + return 0, nil + } + + if req.HoldFor == nil { + if req.ReclaimBytes > 0 { + return defaultMemoryReclaimHold, nil + } + return 0, nil + } + + holdFor, err := time.ParseDuration(*req.HoldFor) + if err != nil { + return 0, errors.New("hold_for must be a valid duration") + } + if holdFor < 0 { + return 0, errors.New("hold_for must be non-negative") + } + if holdFor > maxMemoryReclaimHold { + return 0, errors.New("hold_for must be less than or equal to 1h") + } + return holdFor, nil +} + +func memoryReclaimResponseToOAPI(resp guestmemory.ManualReclaimResponse) oapi.MemoryReclaimResponse { + out := oapi.MemoryReclaimResponse{ + RequestedReclaimBytes: resp.RequestedReclaimBytes, + PlannedReclaimBytes: resp.PlannedReclaimBytes, + AppliedReclaimBytes: resp.AppliedReclaimBytes, + HoldUntil: resp.HoldUntil, + HostAvailableBytes: resp.HostAvailableBytes, + HostPressureState: oapi.MemoryReclaimResponseHostPressureState(resp.HostPressureState), + Actions: make([]oapi.MemoryReclaimAction, 0, len(resp.Actions)), + } + + for _, action := range resp.Actions { + item := oapi.MemoryReclaimAction{ + InstanceId: action.InstanceID, + InstanceName: action.InstanceName, + Hypervisor: oapi.MemoryReclaimActionHypervisor(action.Hypervisor), + AssignedMemoryBytes: action.AssignedMemoryBytes, + ProtectedFloorBytes: action.ProtectedFloorBytes, + PreviousTargetGuestMemoryBytes: action.PreviousTargetGuestMemoryBytes, + PlannedTargetGuestMemoryBytes: action.PlannedTargetGuestMemoryBytes, + TargetGuestMemoryBytes: action.TargetGuestMemoryBytes, + AppliedReclaimBytes: action.AppliedReclaimBytes, + Status: action.Status, + } + if action.Error != "" { + item.Error = &action.Error + } + out.Actions = append(out.Actions, item) + } + + return out +} + +func derefString(v *string) string { + if v == nil { + return "" + } + return *v +} diff --git a/cmd/api/api/resources_reclaim_test.go b/cmd/api/api/resources_reclaim_test.go new file mode 100644 index 00000000..f37ff091 --- /dev/null +++ b/cmd/api/api/resources_reclaim_test.go @@ -0,0 +1,117 @@ +package api + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/kernel/hypeman/lib/guestmemory" + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/oapi" + "github.com/stretchr/testify/require" +) + +type stubGuestMemoryController struct { + response guestmemory.ManualReclaimResponse + err error + requests []guestmemory.ManualReclaimRequest +} + +func (s *stubGuestMemoryController) Start(ctx context.Context) error { + <-ctx.Done() + return nil +} + +func (s *stubGuestMemoryController) TriggerReclaim(ctx context.Context, req guestmemory.ManualReclaimRequest) (guestmemory.ManualReclaimResponse, error) { + s.requests = append(s.requests, req) + return s.response, s.err +} + +func TestReclaimMemory_DefaultHoldAndResponse(t *testing.T) { + controller := &stubGuestMemoryController{ + response: guestmemory.ManualReclaimResponse{ + RequestedReclaimBytes: 512 * 1024 * 1024, + PlannedReclaimBytes: 512 * 1024 * 1024, + AppliedReclaimBytes: 256 * 1024 * 1024, + HostAvailableBytes: 2 * 1024 * 1024 * 1024, + HostPressureState: guestmemory.HostPressureStateHealthy, + Actions: []guestmemory.ManualReclaimAction{ + { + InstanceID: "inst-123", + InstanceName: "guestmem-test", + Hypervisor: hypervisor.TypeQEMU, + AssignedMemoryBytes: 4 * 1024 * 1024 * 1024, + ProtectedFloorBytes: 2 * 1024 * 1024 * 1024, + PreviousTargetGuestMemoryBytes: 4 * 1024 * 1024 * 1024, + PlannedTargetGuestMemoryBytes: 3 * 1024 * 1024 * 1024, + TargetGuestMemoryBytes: 3758096384, + AppliedReclaimBytes: 268435456, + Status: "applied", + }, + }, + }, + } + + svc := &ApiService{GuestMemoryController: controller} + resp, err := svc.ReclaimMemory(context.Background(), oapi.ReclaimMemoryRequestObject{ + Body: &oapi.MemoryReclaimRequest{ + ReclaimBytes: 512 * 1024 * 1024, + Reason: ptr("pack host before launch"), + }, + }) + require.NoError(t, err) + + okResp, ok := resp.(oapi.ReclaimMemory200JSONResponse) + require.True(t, ok) + require.Len(t, controller.requests, 1) + require.Equal(t, 5*time.Minute, controller.requests[0].HoldFor) + require.Equal(t, "pack host before launch", controller.requests[0].Reason) + require.Equal(t, int64(512*1024*1024), okResp.RequestedReclaimBytes) + require.Equal(t, oapi.MemoryReclaimResponseHostPressureState(guestmemory.HostPressureStateHealthy), okResp.HostPressureState) + require.Len(t, okResp.Actions, 1) + require.Equal(t, oapi.MemoryReclaimActionHypervisor(hypervisor.TypeQEMU), okResp.Actions[0].Hypervisor) +} + +func TestReclaimMemory_ValidationAndFeatureDisabled(t *testing.T) { + svc := &ApiService{GuestMemoryController: &stubGuestMemoryController{err: guestmemory.ErrActiveBallooningDisabled}} + resp, err := svc.ReclaimMemory(context.Background(), oapi.ReclaimMemoryRequestObject{ + Body: &oapi.MemoryReclaimRequest{ + ReclaimBytes: 256 * 1024 * 1024, + HoldFor: ptr("2h"), + }, + }) + require.NoError(t, err) + + badReq, ok := resp.(oapi.ReclaimMemory400JSONResponse) + require.True(t, ok) + require.Equal(t, "bad_request", badReq.Code) + + resp, err = svc.ReclaimMemory(context.Background(), oapi.ReclaimMemoryRequestObject{ + Body: &oapi.MemoryReclaimRequest{ + ReclaimBytes: 256 * 1024 * 1024, + HoldFor: ptr("10m"), + }, + }) + require.NoError(t, err) + + featureDisabled, ok := resp.(oapi.ReclaimMemory400JSONResponse) + require.True(t, ok) + require.Equal(t, "feature_disabled", featureDisabled.Code) +} + +func TestReclaimMemory_InternalError(t *testing.T) { + svc := &ApiService{GuestMemoryController: &stubGuestMemoryController{err: errors.New("boom")}} + resp, err := svc.ReclaimMemory(context.Background(), oapi.ReclaimMemoryRequestObject{ + Body: &oapi.MemoryReclaimRequest{ReclaimBytes: 128 * 1024 * 1024}, + }) + require.NoError(t, err) + + internalErr, ok := resp.(oapi.ReclaimMemory500JSONResponse) + require.True(t, ok) + require.Equal(t, "internal_error", internalErr.Code) +} + +func ptr(v string) *string { + return &v +} diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index 086df055..2179d97e 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/c2h5oh/datasize" "github.com/knadh/koanf/parsers/yaml" "github.com/knadh/koanf/providers/env" "github.com/knadh/koanf/providers/file" @@ -170,10 +171,24 @@ type HypervisorConfig struct { // HypervisorMemoryConfig holds guest memory management settings. type HypervisorMemoryConfig struct { - Enabled bool `koanf:"enabled"` - KernelPageInitMode string `koanf:"kernel_page_init_mode"` - ReclaimEnabled bool `koanf:"reclaim_enabled"` - VZBalloonRequired bool `koanf:"vz_balloon_required"` + Enabled bool `koanf:"enabled"` + KernelPageInitMode string `koanf:"kernel_page_init_mode"` + ReclaimEnabled bool `koanf:"reclaim_enabled"` + VZBalloonRequired bool `koanf:"vz_balloon_required"` + ActiveBallooning HypervisorActiveBallooningConfig `koanf:"active_ballooning"` +} + +// HypervisorActiveBallooningConfig holds runtime host-driven reclaim settings. +type HypervisorActiveBallooningConfig struct { + Enabled bool `koanf:"enabled"` + PollInterval string `koanf:"poll_interval"` + PressureHighWatermarkAvailablePercent int `koanf:"pressure_high_watermark_available_percent"` + PressureLowWatermarkAvailablePercent int `koanf:"pressure_low_watermark_available_percent"` + ProtectedFloorPercent int `koanf:"protected_floor_percent"` + ProtectedFloorMinBytes string `koanf:"protected_floor_min_bytes"` + MinAdjustmentBytes string `koanf:"min_adjustment_bytes"` + PerVmMaxStepBytes string `koanf:"per_vm_max_step_bytes"` + PerVmCooldown string `koanf:"per_vm_cooldown"` } // GPUConfig holds GPU-related settings. @@ -331,6 +346,17 @@ func defaultConfig() *Config { KernelPageInitMode: "hardened", ReclaimEnabled: true, VZBalloonRequired: true, + ActiveBallooning: HypervisorActiveBallooningConfig{ + Enabled: false, + PollInterval: "2s", + PressureHighWatermarkAvailablePercent: 10, + PressureLowWatermarkAvailablePercent: 15, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: "536870912", + MinAdjustmentBytes: "67108864", + PerVmMaxStepBytes: "268435456", + PerVmCooldown: "5s", + }, }, }, @@ -449,5 +475,54 @@ func (c *Config) Validate() error { if c.Hypervisor.Memory.KernelPageInitMode != "performance" && c.Hypervisor.Memory.KernelPageInitMode != "hardened" { return fmt.Errorf("hypervisor.memory.kernel_page_init_mode must be one of {performance,hardened}, got %q", c.Hypervisor.Memory.KernelPageInitMode) } + if err := validateDuration("hypervisor.memory.active_ballooning.poll_interval", c.Hypervisor.Memory.ActiveBallooning.PollInterval); err != nil { + return err + } + if err := validateDuration("hypervisor.memory.active_ballooning.per_vm_cooldown", c.Hypervisor.Memory.ActiveBallooning.PerVmCooldown); err != nil { + return err + } + if err := validateByteSize("hypervisor.memory.active_ballooning.protected_floor_min_bytes", c.Hypervisor.Memory.ActiveBallooning.ProtectedFloorMinBytes); err != nil { + return err + } + if err := validateByteSize("hypervisor.memory.active_ballooning.min_adjustment_bytes", c.Hypervisor.Memory.ActiveBallooning.MinAdjustmentBytes); err != nil { + return err + } + if err := validateByteSize("hypervisor.memory.active_ballooning.per_vm_max_step_bytes", c.Hypervisor.Memory.ActiveBallooning.PerVmMaxStepBytes); err != nil { + return err + } + ab := c.Hypervisor.Memory.ActiveBallooning + if ab.PressureHighWatermarkAvailablePercent <= 0 || ab.PressureHighWatermarkAvailablePercent >= 100 { + return fmt.Errorf("hypervisor.memory.active_ballooning.pressure_high_watermark_available_percent must be between 1 and 99, got %d", ab.PressureHighWatermarkAvailablePercent) + } + if ab.PressureLowWatermarkAvailablePercent <= 0 || ab.PressureLowWatermarkAvailablePercent >= 100 { + return fmt.Errorf("hypervisor.memory.active_ballooning.pressure_low_watermark_available_percent must be between 1 and 99, got %d", ab.PressureLowWatermarkAvailablePercent) + } + if ab.PressureLowWatermarkAvailablePercent <= ab.PressureHighWatermarkAvailablePercent { + return fmt.Errorf("hypervisor.memory.active_ballooning.pressure_low_watermark_available_percent must be greater than pressure_high_watermark_available_percent") + } + if ab.ProtectedFloorPercent <= 0 || ab.ProtectedFloorPercent >= 100 { + return fmt.Errorf("hypervisor.memory.active_ballooning.protected_floor_percent must be between 1 and 99, got %d", ab.ProtectedFloorPercent) + } + return nil +} + +func validateByteSize(field string, value string) error { + if strings.TrimSpace(value) == "" { + return fmt.Errorf("%s must not be empty", field) + } + var size datasize.ByteSize + if err := size.UnmarshalText([]byte(value)); err != nil { + return fmt.Errorf("%s must be a valid byte size, got %q: %w", field, value, err) + } + return nil +} + +func validateDuration(field string, value string) error { + if strings.TrimSpace(value) == "" { + return fmt.Errorf("%s must not be empty", field) + } + if _, err := time.ParseDuration(value); err != nil { + return fmt.Errorf("%s must be a valid duration, got %q: %w", field, value, err) + } return nil } diff --git a/cmd/api/config/config_test.go b/cmd/api/config/config_test.go index 97828db5..0429ac49 100644 --- a/cmd/api/config/config_test.go +++ b/cmd/api/config/config_test.go @@ -3,7 +3,11 @@ package config import ( "os" "path/filepath" + "strings" "testing" + + "github.com/c2h5oh/datasize" + "github.com/kernel/hypeman/lib/guestmemory" ) func TestDefaultConfigIncludesMetricsSettings(t *testing.T) { @@ -83,3 +87,46 @@ func TestValidateRejectsInvalidVMLabelBudget(t *testing.T) { t.Fatalf("expected validation error for invalid vm label budget") } } + +func TestValidateRejectsEmptyActiveBallooningDurations(t *testing.T) { + cfg := defaultConfig() + cfg.Hypervisor.Memory.ActiveBallooning.PollInterval = " " + + err := cfg.Validate() + if err == nil || !strings.Contains(err.Error(), "poll_interval must not be empty") { + t.Fatalf("expected poll_interval empty validation error, got %v", err) + } + + cfg = defaultConfig() + cfg.Hypervisor.Memory.ActiveBallooning.PerVmCooldown = "" + + err = cfg.Validate() + if err == nil || !strings.Contains(err.Error(), "per_vm_cooldown must not be empty") { + t.Fatalf("expected per_vm_cooldown empty validation error, got %v", err) + } +} + +func TestDefaultConfigActiveBallooningMatchesGoDefaults(t *testing.T) { + cfg := defaultConfig() + want := guestmemory.DefaultActiveBallooningConfig() + + parse := func(value string) int64 { + t.Helper() + + var size datasize.ByteSize + if err := size.UnmarshalText([]byte(value)); err != nil { + t.Fatalf("parse default byte size %q: %v", value, err) + } + return int64(size) + } + + if got := parse(cfg.Hypervisor.Memory.ActiveBallooning.ProtectedFloorMinBytes); got != want.ProtectedFloorMinBytes { + t.Fatalf("protected floor default mismatch: got %d want %d", got, want.ProtectedFloorMinBytes) + } + if got := parse(cfg.Hypervisor.Memory.ActiveBallooning.MinAdjustmentBytes); got != want.MinAdjustmentBytes { + t.Fatalf("min adjustment default mismatch: got %d want %d", got, want.MinAdjustmentBytes) + } + if got := parse(cfg.Hypervisor.Memory.ActiveBallooning.PerVmMaxStepBytes); got != want.PerVMMaxStepBytes { + t.Fatalf("per-vm max step default mismatch: got %d want %d", got, want.PerVMMaxStepBytes) + } +} diff --git a/cmd/api/main.go b/cmd/api/main.go index 63fdc284..8057a36f 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -426,6 +426,14 @@ func run() error { return err } + grp.Go(func() error { + if app.GuestMemoryController == nil { + return nil + } + logger.Info("starting guest memory controller") + return app.GuestMemoryController.Start(gctx) + }) + // Run the server grp.Go(func() error { logger.Info("starting hypeman API", "port", app.Config.Port) diff --git a/cmd/api/wire.go b/cmd/api/wire.go index 93b307de..b50c27e2 100644 --- a/cmd/api/wire.go +++ b/cmd/api/wire.go @@ -11,6 +11,7 @@ import ( "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/builds" "github.com/kernel/hypeman/lib/devices" + "github.com/kernel/hypeman/lib/guestmemory" "github.com/kernel/hypeman/lib/images" "github.com/kernel/hypeman/lib/ingress" "github.com/kernel/hypeman/lib/instances" @@ -25,21 +26,22 @@ import ( // application struct to hold initialized components type application struct { - Ctx context.Context - Logger *slog.Logger - Config *config.Config - ImageManager images.Manager - SystemManager system.Manager - NetworkManager network.Manager - DeviceManager devices.Manager - InstanceManager instances.Manager - VolumeManager volumes.Manager - IngressManager ingress.Manager - BuildManager builds.Manager - ResourceManager *resources.Manager - VMMetricsManager *vm_metrics.Manager - Registry *registry.Registry - ApiService *api.ApiService + Ctx context.Context + Logger *slog.Logger + Config *config.Config + ImageManager images.Manager + SystemManager system.Manager + NetworkManager network.Manager + DeviceManager devices.Manager + InstanceManager instances.Manager + VolumeManager volumes.Manager + IngressManager ingress.Manager + BuildManager builds.Manager + ResourceManager *resources.Manager + GuestMemoryController guestmemory.Controller + VMMetricsManager *vm_metrics.Manager + Registry *registry.Registry + ApiService *api.ApiService } // initializeApp is the injector function @@ -58,6 +60,7 @@ func initializeApp() (*application, func(), error) { providers.ProvideIngressManager, providers.ProvideBuildManager, providers.ProvideResourceManager, + providers.ProvideGuestMemoryController, providers.ProvideVMMetricsManager, providers.ProvideRegistry, api.New, diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 6eb15ce6..a5007a7e 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -12,6 +12,7 @@ import ( "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/builds" "github.com/kernel/hypeman/lib/devices" + "github.com/kernel/hypeman/lib/guestmemory" "github.com/kernel/hypeman/lib/images" "github.com/kernel/hypeman/lib/ingress" "github.com/kernel/hypeman/lib/instances" @@ -64,6 +65,10 @@ func initializeApp() (*application, func(), error) { if err != nil { return nil, nil, err } + controller, err := providers.ProvideGuestMemoryController(instancesManager, config, logger) + if err != nil { + return nil, nil, err + } vm_metricsManager, err := providers.ProvideVMMetricsManager(instancesManager, config, logger) if err != nil { return nil, nil, err @@ -72,23 +77,24 @@ func initializeApp() (*application, func(), error) { if err != nil { return nil, nil, err } - apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, devicesManager, ingressManager, buildsManager, resourcesManager, vm_metricsManager) + apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, devicesManager, ingressManager, buildsManager, resourcesManager, controller, vm_metricsManager) mainApplication := &application{ - Ctx: context, - Logger: logger, - Config: config, - ImageManager: manager, - SystemManager: systemManager, - NetworkManager: networkManager, - DeviceManager: devicesManager, - InstanceManager: instancesManager, - VolumeManager: volumesManager, - IngressManager: ingressManager, - BuildManager: buildsManager, - ResourceManager: resourcesManager, - VMMetricsManager: vm_metricsManager, - Registry: registry, - ApiService: apiService, + Ctx: context, + Logger: logger, + Config: config, + ImageManager: manager, + SystemManager: systemManager, + NetworkManager: networkManager, + DeviceManager: devicesManager, + InstanceManager: instancesManager, + VolumeManager: volumesManager, + IngressManager: ingressManager, + BuildManager: buildsManager, + ResourceManager: resourcesManager, + GuestMemoryController: controller, + VMMetricsManager: vm_metricsManager, + Registry: registry, + ApiService: apiService, } return mainApplication, func() { }, nil @@ -98,19 +104,20 @@ func initializeApp() (*application, func(), error) { // application struct to hold initialized components type application struct { - Ctx context.Context - Logger *slog.Logger - Config *config.Config - ImageManager images.Manager - SystemManager system.Manager - NetworkManager network.Manager - DeviceManager devices.Manager - InstanceManager instances.Manager - VolumeManager volumes.Manager - IngressManager ingress.Manager - BuildManager builds.Manager - ResourceManager *resources.Manager - VMMetricsManager *vm_metrics.Manager - Registry *registry.Registry - ApiService *api.ApiService + Ctx context.Context + Logger *slog.Logger + Config *config.Config + ImageManager images.Manager + SystemManager system.Manager + NetworkManager network.Manager + DeviceManager devices.Manager + InstanceManager instances.Manager + VolumeManager volumes.Manager + IngressManager ingress.Manager + BuildManager builds.Manager + ResourceManager *resources.Manager + GuestMemoryController guestmemory.Controller + VMMetricsManager *vm_metrics.Manager + Registry *registry.Registry + ApiService *api.ApiService } diff --git a/cmd/vz-shim/server.go b/cmd/vz-shim/server.go index 9acf0095..74640b8e 100644 --- a/cmd/vz-shim/server.go +++ b/cmd/vz-shim/server.go @@ -45,6 +45,10 @@ type snapshotRequest struct { DestinationPath string `json:"destination_path"` } +type balloonRequest struct { + TargetGuestMemoryBytes int64 `json:"target_guest_memory_bytes"` +} + // Handler returns the HTTP handler for the control API. func (s *ShimServer) Handler() http.Handler { mux := http.NewServeMux() @@ -55,6 +59,8 @@ func (s *ShimServer) Handler() http.Handler { mux.HandleFunc("PUT /api/v1/vm.resume", s.handleResume) mux.HandleFunc("PUT /api/v1/vm.shutdown", s.handleShutdown) mux.HandleFunc("PUT /api/v1/vm.snapshot", s.handleSnapshot) + mux.HandleFunc("GET /api/v1/vm.balloon", s.handleGetBalloon) + mux.HandleFunc("PUT /api/v1/vm.balloon", s.handleSetBalloon) mux.HandleFunc("PUT /api/v1/vm.power-button", s.handlePowerButton) mux.HandleFunc("GET /api/v1/vmm.ping", s.handlePing) mux.HandleFunc("PUT /api/v1/vmm.shutdown", s.handleVMMShutdown) @@ -203,6 +209,46 @@ func (s *ShimServer) handleSnapshot(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +func (s *ShimServer) handleGetBalloon(w http.ResponseWriter, r *http.Request) { + s.mu.RLock() + defer s.mu.RUnlock() + + device, err := s.getTraditionalBalloonDevice() + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(balloonRequest{ + TargetGuestMemoryBytes: int64(device.GetTargetVirtualMachineMemorySize()), + }) +} + +func (s *ShimServer) handleSetBalloon(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + device, err := s.getTraditionalBalloonDevice() + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + var req balloonRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid balloon request: %v", err), http.StatusBadRequest) + return + } + if req.TargetGuestMemoryBytes < 0 { + http.Error(w, "target_guest_memory_bytes must be non-negative", http.StatusBadRequest) + return + } + + device.SetTargetVirtualMachineMemorySize(uint64(req.TargetGuestMemoryBytes)) + w.WriteHeader(http.StatusNoContent) +} + func (s *ShimServer) handlePowerButton(w http.ResponseWriter, r *http.Request) { s.mu.Lock() defer s.mu.Unlock() @@ -219,6 +265,15 @@ func (s *ShimServer) handlePowerButton(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +func (s *ShimServer) getTraditionalBalloonDevice() (*vz.VirtioTraditionalMemoryBalloonDevice, error) { + for _, device := range s.vm.MemoryBalloonDevices() { + if traditional := vz.AsVirtioTraditionalMemoryBalloonDevice(device); traditional != nil { + return traditional, nil + } + } + return nil, fmt.Errorf("no memory balloon device configured") +} + func (s *ShimServer) handlePing(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte("OK")) diff --git a/config.example.darwin.yaml b/config.example.darwin.yaml index a67e5ab2..b3ee8813 100644 --- a/config.example.darwin.yaml +++ b/config.example.darwin.yaml @@ -39,6 +39,16 @@ hypervisor: kernel_page_init_mode: hardened reclaim_enabled: true vz_balloon_required: true + active_ballooning: + enabled: false + poll_interval: 2s + pressure_high_watermark_available_percent: 10 + pressure_low_watermark_available_percent: 15 + protected_floor_percent: 50 + protected_floor_min_bytes: 536870912 + min_adjustment_bytes: 67108864 + per_vm_max_step_bytes: 268435456 + per_vm_cooldown: 5s # ============================================================================= # Network Configuration (DIFFERENT ON MACOS) diff --git a/config.example.yaml b/config.example.yaml index 2dff61c5..bfd30a27 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -33,6 +33,16 @@ data_dir: /var/lib/hypeman # kernel_page_init_mode: hardened # reclaim_enabled: true # vz_balloon_required: true +# active_ballooning: +# enabled: false +# poll_interval: 2s +# pressure_high_watermark_available_percent: 10 +# pressure_low_watermark_available_percent: 15 +# protected_floor_percent: 50 +# protected_floor_min_bytes: 536870912 +# min_adjustment_bytes: 67108864 +# per_vm_max_step_bytes: 268435456 +# per_vm_cooldown: 5s # ============================================================================= # Network Configuration diff --git a/lib/guestmemory/README.md b/lib/guestmemory/README.md index e60e7d81..028b2a6c 100644 --- a/lib/guestmemory/README.md +++ b/lib/guestmemory/README.md @@ -1,102 +1,205 @@ -# Guest Memory Reclaim +# Guest Memory -This feature reduces host RAM waste from guest VMs by combining three behaviors: +Hypeman's guest-memory feature combines passive reclaim and active reclaim. -1. Lazy host allocation preservation: -The VM is configured with requested memory capacity, but host pages should only back guest pages as they are touched. +- Passive reclaim gives pages back to the host when the guest has already freed them. +- Active reclaim asks the guest to give memory back by inflating its virtio balloon target. +- Linux page-init tuning controls whether the guest eagerly scrubs pages on allocation/free. -2. Guest-to-host reclaim: -When the guest frees memory, virtio balloon/reporting/hinting features let the VMM return those pages to the host. +The important distinction is that active ballooning is not `drop_caches`. Balloon inflation makes the guest kernel feel memory pressure, so the guest reclaims memory through its normal LRU and reclaim paths. That lets the guest keep hot working-set cache and evict colder pages first. -3. Guest boot page-touch reduction: -The guest kernel page-init mode controls whether Linux eagerly touches pages: -- `performance` mode sets `init_on_alloc=0 init_on_free=0` for better density and lower memory churn. -- `hardened` mode sets `init_on_alloc=1 init_on_free=1` for stronger memory hygiene at some density/perf cost. +## What Happens At Runtime -## Configuration +When `hypervisor.memory.enabled=true`, Hypeman enables the guest-memory features each hypervisor supports: -This feature is controlled by `hypervisor.memory` in server config and is default-off: +- Cloud Hypervisor configures a balloon device with free-page reporting and deflate-on-oom. +- QEMU adds a virtio balloon device and enables free-page reporting when available. +- Firecracker configures ballooning with hinting/reporting and deflate-on-oom. +- VZ attaches a traditional memory balloon device through `vz-shim`. -```yaml -hypervisor: - memory: - enabled: false - kernel_page_init_mode: hardened - reclaim_enabled: true - vz_balloon_required: true -``` +When `kernel_page_init_mode=performance`, Hypeman also adds `init_on_alloc=0 init_on_free=0` to the guest kernel command line. That reduces unnecessary guest page touching during boot and steady-state reclaim. `hardened` keeps both flags enabled. + +## Automatic Active Ballooning -To enable reclaim behavior and density-oriented kernel args, set: +Automatic ballooning is controlled by `hypervisor.memory.active_ballooning`. ```yaml hypervisor: memory: enabled: true + reclaim_enabled: true kernel_page_init_mode: performance + active_ballooning: + enabled: true + poll_interval: 2s + pressure_high_watermark_available_percent: 10 + pressure_low_watermark_available_percent: 15 + protected_floor_percent: 50 + protected_floor_min_bytes: 512MB + min_adjustment_bytes: 64MB + per_vm_max_step_bytes: 256MB + per_vm_cooldown: 5s ``` -## Runtime Flow +The automatic loop is pressure-driven by default: + +1. Hypeman samples host memory pressure. +2. If the host is under pressure, it computes a global reclaim target. +3. Eligible VMs are asked to give back memory proportionally to their reclaimable headroom. +4. Each hypervisor gets a new runtime balloon target. +5. When the host is healthy again, Hypeman gradually deflates balloons back toward full guest memory. + +The controller uses hysteresis so it does not flap when available memory hovers near the threshold: + +- `pressure_high_watermark_available_percent` enters pressure mode. +- `pressure_low_watermark_available_percent` exits pressure mode. + +### Host Pressure Signals + +Linux uses: + +- `/proc/meminfo` `MemAvailable` as the primary available-memory signal +- `/proc/pressure/memory` PSI as a secondary stress signal + +macOS uses: + +- `vm_stat` free/speculative pages to estimate available memory +- `memory_pressure -Q` as a secondary stress signal + +## Protected Floors And Allocation Rules + +Active reclaim never shrinks a guest below its protected floor: + +- `protected_floor_percent` reserves a percentage of assigned guest RAM +- `protected_floor_min_bytes` reserves an absolute minimum +- the larger of the two becomes the guest's floor + +Example: + +- a 4 GiB guest with `protected_floor_percent=50` has a 2 GiB floor +- if `protected_floor_min_bytes=512MB`, the effective floor is still 2 GiB +- Hypeman can reclaim at most 2 GiB from that guest + +Reclaim is also rate-limited: + +- `min_adjustment_bytes` skips tiny target changes +- `per_vm_max_step_bytes` caps how much one reconcile can change a guest +- `per_vm_cooldown` prevents frequent small oscillations + +## Manual Reclaim API + +Hypeman also exposes a proactive reclaim endpoint: + +- `POST /resources/memory/reclaim` + +Request fields: -- Operator config (`hypervisor.memory`) is normalized into one policy. -- The instances layer applies policy generically: - - merges kernel args with the selected page-init mode; - - sets generic memory feature toggles in `hypervisor.VMConfig.GuestMemory`. -- Each hypervisor backend maps generic toggles to native mechanisms: - - Cloud Hypervisor: `balloon` config with free page reporting and deflate-on-oom. - - QEMU: `virtio-balloon-pci` device options. - - Firecracker: `/balloon` API with free page hinting/reporting. - - VZ: attach `VirtioTraditionalMemoryBalloon` device. +- `reclaim_bytes`: required total reclaim target across eligible guests +- `hold_for`: optional duration, default `5m`, max `1h` +- `dry_run`: optional, computes the plan without applying it +- `reason`: optional operator note for logs/traces -## Backend Behavior Matrix +Manual reclaim uses the same planner and protected floors as automatic reclaim. When `hold_for` is set, Hypeman keeps at least that much reclaim in place until the hold expires, even if host pressure clears sooner. Sending `reclaim_bytes=0` with `hold_for=0s` clears the hold and allows full deflation immediately. -| Hypervisor | Lazy allocation | Balloon | Free page reporting/hinting | Deflate on OOM | -|---|---|---|---|---| -| Cloud Hypervisor | Yes | Yes | Reporting | Yes | -| QEMU | Yes | Yes | Reporting (+ hinting when enabled) | Yes | -| Firecracker | Yes | Yes | Hinting + reporting | Yes | -| VZ | macOS-managed | Yes | Host-managed + guest cooperation | Host-managed | +By design, Hypeman does not reclaim memory without a reason. Automatic reclaim only happens under real host pressure. Proactive reclaim without host pressure is only done when an operator explicitly asks for it through the API. + +## Observability + +Active ballooning emits structured logs, metrics, and traces so operators can tell whether reclaim is healthy and effective. + +Logs: + +- manual reclaim requests log start, success, and failure +- pressure state transitions log the old and new state plus current host availability +- per-VM apply failures log the affected `instance_id`, hypervisor, and requested target +- automatic reconcile summaries log when pressure changes, reclaim is applied, or errors occur + +Metrics: + +- `hypeman_guestmemory_reconcile_total` and `hypeman_guestmemory_reconcile_duration_seconds` +- `hypeman_guestmemory_reclaim_actions_total` +- `hypeman_guestmemory_pressure_transitions_total` +- `hypeman_guestmemory_sampler_errors_total` +- `hypeman_guestmemory_reclaim_bytes` +- `hypeman_guestmemory_host_available_bytes` +- `hypeman_guestmemory_target_reclaim_bytes` +- `hypeman_guestmemory_applied_reclaim_bytes` +- `hypeman_guestmemory_manual_hold_active` +- `hypeman_guestmemory_eligible_vms_total` +- `hypeman_guestmemory_pressure_state` + +Traces: + +- manual API calls create a `guestmemory.manual_reclaim` span +- each reconcile creates a `guestmemory.reconcile` span +- child spans capture host pressure sampling, VM enumeration, and balloon target application + +## Passive Reclaim vs Active Ballooning + +Passive reclaim and active reclaim are complementary: + +- free-page reporting/hinting handles "the guest freed this already" +- active ballooning handles "the host needs memory back now" + +Both are useful. Passive reporting improves density opportunistically. Active ballooning gives Hypeman a control loop for pressure events and explicit operator requests. + +## Hypervisor Expectations + +Cloud Hypervisor: + +- boot-time ballooning plus free-page reporting +- runtime target changes through `/vm.resize` + +QEMU: + +- virtio balloon device on the VM command line +- runtime target changes through QMP `balloon` + +Firecracker: + +- balloon config at boot with hinting/reporting +- runtime target changes through the balloon API +- if a custom or older binary lacks the runtime balloon endpoint, Hypeman skips active reclaim for that VM + +VZ: + +- traditional memory balloon device attached through `vz-shim` +- runtime target changes through `vz-shim` balloon endpoints ## Failure Behavior -- If policy is disabled, memory features are not applied. -- If reclaim is disabled, balloon/reporting/hinting are not applied. -- For VZ, balloon attachment is attempted when enabled. - - If `vz_balloon_required=true`, startup fails if balloon cannot be configured. - - If `vz_balloon_required=false`, startup continues without balloon and logs a warning. +- If `hypervisor.memory.enabled=false`, none of the guest-memory features are configured. +- If `reclaim_enabled=false`, passive reclaim and active ballooning are both disabled. +- If `active_ballooning.enabled=false`, the background pressure loop stays off and the manual reclaim endpoint returns a feature-disabled error. +- If a specific VM or hypervisor backend does not support runtime balloon control, Hypeman skips that VM and continues with the rest. +- `deflate_on_oom` stays enabled where supported so guests can recover memory quickly during real guest-side pressure. + +## Manual Integration Tests + +The guest-memory integration tests are manual by default and cover one test per hypervisor: -## Quick CLI Experiment +- Linux: `TestGuestMemoryPolicyCloudHypervisor` +- Linux: `TestGuestMemoryPolicyQEMU` +- Linux: `TestGuestMemoryPolicyFirecracker` +- macOS: `TestGuestMemoryPolicyVZ` -Use this A/B check to compare host memory footprint with policy enabled vs disabled: +All of them live in the existing `lib/instances` guest-memory test files and are gated by: ```bash -# 1) Start API with config A (hypervisor.memory.enabled=true), then run: -ID=$(hypeman run --hypervisor qemu --network=false --memory 1GB \ - --entrypoint /bin/sh --entrypoint -c \ - --cmd 'sleep 5; dd if=/dev/zero of=/dev/shm/hype-mem bs=1M count=256; sleep 5; rm -f /dev/shm/hype-mem; sleep 90' \ - docker.io/library/alpine:latest | tail -n1) -PID=$(jq -r '.HypervisorPID' "/guests/$ID/metadata.json") -awk '/^Pss:/ {print $2 " kB"}' "/proc/$PID/smaps_rollup" # Linux (preferred) -awk '/^VmRSS:/ {print $2 " kB"}' "/proc/$PID/status" # Linux fallback -ps -o rss= -p "$PID" # macOS -hypeman rm --force "$ID" - -# 2) Restart API with config B (hypervisor.memory.enabled=false) and run the same command. -# 3) Compare final/steady host memory between A and B. +HYPEMAN_RUN_GUESTMEMORY_TESTS=1 ``` -In one startup-focused sample run, absolute host footprint stayed far below guest memory size (for example, ~4GB guest with low host PSS on Cloud Hypervisor/Firecracker), while QEMU showed a larger fixed process overhead. +Run them with: -Sample probe results (4GB idle guest, rounded MB): - -| Hypervisor | Host RSS (MB) | Host PSS (MB) | Notes | -|---|---:|---:|---| -| Cloud Hypervisor (Linux) | ~345 | ~29 | Low actual host pressure when idle | -| Firecracker (Linux) | ~295 | ~27 | Low actual host pressure when idle | -| QEMU (Linux) | ~400 | ~116 | Higher fixed process overhead | -| VZ (macOS) | ~23 | N/A | RSS sampled with `ps` | +```bash +make test-guestmemory-linux +make test-guestmemory-vz +``` -## Out of Scope +The tests verify: -- No API surface changes. -- No scheduler/admission logic changes. -- No automatic background tuning loops outside hypervisor-supported reclaim mechanisms. +- boot-time guest-memory configuration is present +- runtime balloon target starts at full assigned memory +- manual reclaim changes the target in the expected direction +- protected floors prevent over-reclaim +- clearing the manual hold deflates back to full guest memory diff --git a/lib/guestmemory/active_ballooning.go b/lib/guestmemory/active_ballooning.go new file mode 100644 index 00000000..c9d1c8b5 --- /dev/null +++ b/lib/guestmemory/active_ballooning.go @@ -0,0 +1,228 @@ +package guestmemory + +import ( + "context" + "errors" + "log/slog" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" +) + +var ( + ErrActiveBallooningDisabled = errors.New("active ballooning is disabled") + ErrGuestMemoryDisabled = errors.New("guest memory reclaim is disabled") +) + +// ActiveBallooningConfig controls host-driven balloon reclaim behavior. +type ActiveBallooningConfig struct { + Enabled bool + + PollInterval time.Duration + + PressureHighWatermarkAvailablePercent int + PressureLowWatermarkAvailablePercent int + + ProtectedFloorPercent int + ProtectedFloorMinBytes int64 + + MinAdjustmentBytes int64 + PerVMMaxStepBytes int64 + PerVMCooldown time.Duration +} + +// DefaultActiveBallooningConfig returns conservative defaults for active reclaim. +func DefaultActiveBallooningConfig() ActiveBallooningConfig { + return ActiveBallooningConfig{ + Enabled: false, + PollInterval: 2 * time.Second, + PressureHighWatermarkAvailablePercent: 10, + PressureLowWatermarkAvailablePercent: 15, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: 512 * 1024 * 1024, + MinAdjustmentBytes: 64 * 1024 * 1024, + PerVMMaxStepBytes: 256 * 1024 * 1024, + PerVMCooldown: 5 * time.Second, + } +} + +// Normalize applies defaults and clamps invalid values. +func (c ActiveBallooningConfig) Normalize() ActiveBallooningConfig { + d := DefaultActiveBallooningConfig() + + if c.PollInterval <= 0 { + c.PollInterval = d.PollInterval + } + if c.PressureHighWatermarkAvailablePercent <= 0 || c.PressureHighWatermarkAvailablePercent >= 100 { + c.PressureHighWatermarkAvailablePercent = d.PressureHighWatermarkAvailablePercent + } + if c.PressureLowWatermarkAvailablePercent <= 0 || c.PressureLowWatermarkAvailablePercent >= 100 { + c.PressureLowWatermarkAvailablePercent = d.PressureLowWatermarkAvailablePercent + } + if c.PressureLowWatermarkAvailablePercent <= c.PressureHighWatermarkAvailablePercent { + c.PressureLowWatermarkAvailablePercent = c.PressureHighWatermarkAvailablePercent + 1 + if c.PressureLowWatermarkAvailablePercent >= 100 { + c.PressureHighWatermarkAvailablePercent = d.PressureHighWatermarkAvailablePercent + c.PressureLowWatermarkAvailablePercent = d.PressureLowWatermarkAvailablePercent + } + } + if c.ProtectedFloorPercent <= 0 || c.ProtectedFloorPercent >= 100 { + c.ProtectedFloorPercent = d.ProtectedFloorPercent + } + if c.ProtectedFloorMinBytes <= 0 { + c.ProtectedFloorMinBytes = d.ProtectedFloorMinBytes + } + if c.MinAdjustmentBytes <= 0 { + c.MinAdjustmentBytes = d.MinAdjustmentBytes + } + if c.PerVMMaxStepBytes <= 0 { + c.PerVMMaxStepBytes = d.PerVMMaxStepBytes + } + if c.PerVMCooldown <= 0 { + c.PerVMCooldown = d.PerVMCooldown + } + + return c +} + +// BalloonVM describes a running VM that may participate in reclaim. +type BalloonVM struct { + ID string + Name string + HypervisorType hypervisor.Type + SocketPath string + AssignedMemoryBytes int64 +} + +// Source lists reclaim-eligible VMs. +type Source interface { + ListBalloonVMs(ctx context.Context) ([]BalloonVM, error) +} + +// Controller coordinates automatic and manual reclaim. +type Controller interface { + Start(ctx context.Context) error + TriggerReclaim(ctx context.Context, req ManualReclaimRequest) (ManualReclaimResponse, error) +} + +// ManualReclaimRequest triggers a proactive reclaim cycle. +type ManualReclaimRequest struct { + ReclaimBytes int64 + HoldFor time.Duration + DryRun bool + Reason string +} + +// HostPressureState summarizes host memory pressure. +type HostPressureState string + +const ( + HostPressureStateHealthy HostPressureState = "healthy" + HostPressureStatePressure HostPressureState = "pressure" +) + +// ManualReclaimAction captures one VM's reclaim plan/result. +type ManualReclaimAction struct { + InstanceID string + InstanceName string + Hypervisor hypervisor.Type + AssignedMemoryBytes int64 + ProtectedFloorBytes int64 + PreviousTargetGuestMemoryBytes int64 + PlannedTargetGuestMemoryBytes int64 + TargetGuestMemoryBytes int64 + AppliedReclaimBytes int64 + Status string + Error string +} + +// ManualReclaimResponse summarizes the last reconcile result. +type ManualReclaimResponse struct { + RequestedReclaimBytes int64 + PlannedReclaimBytes int64 + AppliedReclaimBytes int64 + HoldUntil *time.Time + HostAvailableBytes int64 + HostPressureState HostPressureState + Actions []ManualReclaimAction +} + +// HostPressureSample captures the host memory snapshot used for reclaim decisions. +type HostPressureSample struct { + TotalBytes int64 + AvailableBytes int64 + AvailablePercent float64 + Stressed bool +} + +// PressureSampler provides host memory pressure samples. +type PressureSampler interface { + Sample(ctx context.Context) (HostPressureSample, error) +} + +type controller struct { + policy Policy + config ActiveBallooningConfig + source Source + sampler PressureSampler + log *slog.Logger + metrics *Metrics + tracer trace.Tracer + + reconcileMu syncState +} + +type syncState struct { + mu chan struct{} + pressureState HostPressureState + manualHold *manualHold + lastApplied map[string]time.Time + newClient func(hypervisor.Type, string) (hypervisor.Hypervisor, error) +} + +type manualHold struct { + reclaimBytes int64 + until time.Time +} + +// NewController creates the active ballooning controller. +func NewController(policy Policy, cfg ActiveBallooningConfig, source Source, log *slog.Logger) Controller { + return NewControllerWithSampler(policy, cfg, source, newHostPressureSampler(), log) +} + +// NewControllerWithSampler creates the active ballooning controller with an injected +// host pressure sampler. This is primarily useful for tests that need deterministic +// reclaim behavior. +func NewControllerWithSampler(policy Policy, cfg ActiveBallooningConfig, source Source, sampler PressureSampler, log *slog.Logger) Controller { + if log == nil { + log = slog.Default() + } + if sampler == nil { + sampler = newHostPressureSampler() + } + + metrics, err := NewMetrics(otel.GetMeterProvider().Meter("hypeman")) + if err != nil { + log.Warn("failed to initialize guest memory metrics", "error", err) + } + + c := &controller{ + policy: policy.Normalize(), + config: cfg.Normalize(), + source: source, + sampler: sampler, + log: log, + metrics: metrics, + tracer: otel.Tracer("hypeman/guestmemory"), + reconcileMu: syncState{ + mu: make(chan struct{}, 1), + pressureState: HostPressureStateHealthy, + lastApplied: make(map[string]time.Time), + newClient: hypervisor.NewClient, + }, + } + c.reconcileMu.mu <- struct{}{} + return c +} diff --git a/lib/guestmemory/controller.go b/lib/guestmemory/controller.go new file mode 100644 index 00000000..f685697c --- /dev/null +++ b/lib/guestmemory/controller.go @@ -0,0 +1,369 @@ +package guestmemory + +import ( + "context" + "fmt" + "time" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" +) + +func (c *controller) Start(ctx context.Context) error { + if !c.policy.Enabled || !c.policy.ReclaimEnabled { + <-ctx.Done() + return nil + } + if !c.config.Enabled { + <-ctx.Done() + return nil + } + + ticker := time.NewTicker(c.config.PollInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + if _, err := c.reconcile(ctx, reconcileRequest{}); err != nil { + logFromContext(ctx, c.log).WarnContext(ctx, "active ballooning reconcile failed", "operation", "active_ballooning_reconcile", "trigger", "auto", "error", err) + } + } + } +} + +func (c *controller) TriggerReclaim(ctx context.Context, req ManualReclaimRequest) (ManualReclaimResponse, error) { + if !c.policy.Enabled || !c.policy.ReclaimEnabled { + return ManualReclaimResponse{}, ErrGuestMemoryDisabled + } + if !c.config.Enabled { + return ManualReclaimResponse{}, ErrActiveBallooningDisabled + } + if req.ReclaimBytes < 0 { + return ManualReclaimResponse{}, fmt.Errorf("reclaim_bytes must be non-negative") + } + return c.reconcile(ctx, reconcileRequest{ + force: true, + dryRun: req.DryRun, + requestedReclaim: req.ReclaimBytes, + holdFor: req.HoldFor, + reason: req.Reason, + }) +} + +type reconcileRequest struct { + force bool + dryRun bool + requestedReclaim int64 + holdFor time.Duration + reason string +} + +func (c *controller) reconcile(ctx context.Context, req reconcileRequest) (ManualReclaimResponse, error) { + trigger := reconcileTrigger(req) + start := time.Now() + ctx, span := c.startReconcileSpan(ctx, req) + defer span.End() + + state := &c.reconcileMu + select { + case <-ctx.Done(): + err := ctx.Err() + c.recordReconcileError(ctx, trigger, start, span, err) + return ManualReclaimResponse{}, err + case <-state.mu: + } + defer func() { state.mu <- struct{}{} }() + + now := time.Now() + sampleCtx, sampleSpan := c.startChildSpan(ctx, "guestmemory.sample_host_pressure") + sample, err := c.sampler.Sample(sampleCtx) + if err != nil { + c.metrics.RecordSamplerError(ctx, "host_pressure") + sampleSpan.RecordError(err) + sampleSpan.SetStatus(codes.Error, err.Error()) + sampleSpan.End() + c.recordReconcileError(ctx, trigger, start, span, err) + return ManualReclaimResponse{}, fmt.Errorf("sample host pressure: %w", err) + } + sampleSpan.SetAttributes( + attribute.Int64("host_available_bytes", sample.AvailableBytes), + attribute.Float64("host_available_percent", sample.AvailablePercent), + attribute.Bool("stressed", sample.Stressed), + ) + sampleSpan.SetStatus(codes.Ok, "") + sampleSpan.End() + + summary := reconcileSummary{ + hostAvailable: sample.AvailableBytes, + hostAvailablePerc: sample.AvailablePercent, + } + + if state.manualHold != nil && !state.manualHold.until.IsZero() && now.After(state.manualHold.until) { + logFromContext(ctx, c.log).InfoContext(ctx, + "guest memory manual reclaim hold expired", + "operation", "manual_reclaim", + ) + state.manualHold = nil + } + + if req.force && !req.dryRun { + if req.holdFor <= 0 { + if state.manualHold != nil { + logFromContext(ctx, c.log).InfoContext(ctx, + "guest memory manual reclaim hold cleared", + "operation", "manual_reclaim", + ) + } + state.manualHold = nil + } else { + state.manualHold = &manualHold{ + reclaimBytes: req.requestedReclaim, + until: now.Add(req.holdFor), + } + logFromContext(ctx, c.log).InfoContext(ctx, + "guest memory manual reclaim hold set", + "operation", "manual_reclaim", + "requested_reclaim_bytes", req.requestedReclaim, + "hold_for_seconds", req.holdFor.Seconds(), + ) + } + } + + listCtx, listSpan := c.startChildSpan(ctx, "guestmemory.list_balloon_vms") + vms, err := c.source.ListBalloonVMs(listCtx) + if err != nil { + listSpan.RecordError(err) + listSpan.SetStatus(codes.Error, err.Error()) + listSpan.End() + c.recordReconcileError(ctx, trigger, start, span, err) + return ManualReclaimResponse{}, err + } + listSpan.SetAttributes(attribute.Int("vm_count", len(vms))) + listSpan.SetStatus(codes.Ok, "") + listSpan.End() + + candidates := make([]candidateState, 0, len(vms)) + actions := make([]ManualReclaimAction, 0, len(vms)) + var currentTotalReclaim int64 + for _, vm := range vms { + hv, err := state.newClient(vm.HypervisorType, vm.SocketPath) + if err != nil { + actions = append(actions, skippedAction(vm, "error", fmt.Sprintf("create hypervisor client: %v", err))) + continue + } + if !hv.Capabilities().SupportsBalloonControl { + actions = append(actions, skippedAction(vm, "unsupported", "runtime balloon control is not supported")) + continue + } + + currentTarget, err := hv.GetTargetGuestMemoryBytes(ctx) + if err != nil { + actions = append(actions, skippedAction(vm, "error", fmt.Sprintf("read balloon target: %v", err))) + continue + } + + currentTarget = clampInt64(currentTarget, 0, vm.AssignedMemoryBytes) + protectedFloor := protectedFloorBytes(c.config, vm.AssignedMemoryBytes) + if protectedFloor > vm.AssignedMemoryBytes { + protectedFloor = vm.AssignedMemoryBytes + } + + currentReclaim := vm.AssignedMemoryBytes - currentTarget + if currentReclaim < 0 { + currentReclaim = 0 + } + currentTotalReclaim += currentReclaim + + candidates = append(candidates, candidateState{ + vm: vm, + hv: hv, + currentTargetGuestBytes: currentTarget, + protectedFloorBytes: protectedFloor, + maxReclaimBytes: maxInt64(0, vm.AssignedMemoryBytes-protectedFloor), + }) + } + summary.eligibleVMs = len(candidates) + + previousPressure := state.pressureState + state.pressureState = nextPressureState(state.pressureState, c.config, sample) + summary.previousPressure = previousPressure + summary.currentPressure = state.pressureState + summary.pressureChanged = previousPressure != state.pressureState + if summary.pressureChanged { + c.metrics.RecordPressureTransition(ctx, previousPressure, state.pressureState) + } + autoTarget := automaticTargetBytes(state.pressureState, c.config, sample, currentTotalReclaim) + + manualTarget := int64(0) + if req.dryRun { + manualTarget = req.requestedReclaim + } else if req.force && req.requestedReclaim > 0 { + manualTarget = req.requestedReclaim + } else if state.manualHold != nil { + manualTarget = state.manualHold.reclaimBytes + } + totalTarget := maxInt64(autoTarget, manualTarget) + summary.autoTarget = autoTarget + summary.manualTarget = manualTarget + summary.effectiveTarget = totalTarget + summary.manualHoldActive = state.manualHold != nil + + plannedTargets := planGuestTargets(c.config, candidates, totalTarget) + + resp := ManualReclaimResponse{ + RequestedReclaimBytes: req.requestedReclaim, + HoldUntil: holdUntil(state.manualHold), + HostAvailableBytes: sample.AvailableBytes, + HostPressureState: state.pressureState, + Actions: make([]ManualReclaimAction, 0, len(actions)+len(candidates)), + } + resp.Actions = append(resp.Actions, actions...) + for _, action := range actions { + switch action.Status { + case "error": + summary.errorCount++ + case "unsupported": + summary.unsupportedCount++ + default: + summary.unchangedCount++ + } + } + + applyCtx, applySpan := c.startChildSpan(ctx, "guestmemory.apply_balloon_targets") + for _, candidate := range candidates { + plannedTarget, ok := plannedTargets[candidate.vm.ID] + if !ok { + plannedTarget = candidate.vm.AssignedMemoryBytes + } + + appliedTarget := plannedTarget + if absInt64(appliedTarget-candidate.currentTargetGuestBytes) < c.config.MinAdjustmentBytes { + appliedTarget = candidate.currentTargetGuestBytes + } + if !req.force { + if lastAppliedAt, ok := state.lastApplied[candidate.vm.ID]; ok && now.Sub(lastAppliedAt) < c.config.PerVMCooldown { + appliedTarget = candidate.currentTargetGuestBytes + } + } + delta := appliedTarget - candidate.currentTargetGuestBytes + if appliedTarget != candidate.currentTargetGuestBytes { + if delta > 0 { + appliedTarget = candidate.currentTargetGuestBytes + minInt64(delta, c.config.PerVMMaxStepBytes) + } else { + appliedTarget = candidate.currentTargetGuestBytes - minInt64(-delta, c.config.PerVMMaxStepBytes) + } + } + + appliedTarget = clampInt64(appliedTarget, candidate.protectedFloorBytes, candidate.vm.AssignedMemoryBytes) + plannedTarget = clampInt64(plannedTarget, candidate.protectedFloorBytes, candidate.vm.AssignedMemoryBytes) + + action := ManualReclaimAction{ + InstanceID: candidate.vm.ID, + InstanceName: candidate.vm.Name, + Hypervisor: candidate.vm.HypervisorType, + AssignedMemoryBytes: candidate.vm.AssignedMemoryBytes, + ProtectedFloorBytes: candidate.protectedFloorBytes, + PreviousTargetGuestMemoryBytes: candidate.currentTargetGuestBytes, + PlannedTargetGuestMemoryBytes: plannedTarget, + TargetGuestMemoryBytes: candidate.currentTargetGuestBytes, + Status: "unchanged", + } + + resp.PlannedReclaimBytes += candidate.vm.AssignedMemoryBytes - plannedTarget + + if !req.dryRun && appliedTarget != candidate.currentTargetGuestBytes { + if err := candidate.hv.SetTargetGuestMemoryBytes(applyCtx, appliedTarget); err != nil { + action.Status = "error" + action.Error = err.Error() + logFromContext(ctx, c.log).WarnContext(ctx, + "guest memory reclaim action failed", + "operation", "active_ballooning_apply", + "trigger", trigger, + "instance_id", candidate.vm.ID, + "hypervisor", candidate.vm.HypervisorType, + "previous_target_guest_memory_bytes", candidate.currentTargetGuestBytes, + "target_guest_memory_bytes", appliedTarget, + "error", err, + ) + resp.Actions = append(resp.Actions, action) + continue + } + state.lastApplied[candidate.vm.ID] = now + action.Status = "applied" + action.TargetGuestMemoryBytes = appliedTarget + } + if req.dryRun && appliedTarget != candidate.currentTargetGuestBytes { + action.Status = "planned" + action.TargetGuestMemoryBytes = appliedTarget + } + action.AppliedReclaimBytes = candidate.vm.AssignedMemoryBytes - action.TargetGuestMemoryBytes + resp.AppliedReclaimBytes += action.AppliedReclaimBytes + resp.Actions = append(resp.Actions, action) + + switch action.Status { + case "applied": + summary.appliedCount++ + case "planned": + summary.plannedCount++ + case "error": + summary.errorCount++ + case "unsupported": + summary.unsupportedCount++ + default: + summary.unchangedCount++ + } + } + // Prune lastApplied entries for VMs no longer in the candidate list. + activeVMs := make(map[string]struct{}, len(candidates)) + for _, candidate := range candidates { + activeVMs[candidate.vm.ID] = struct{}{} + } + for vmID := range state.lastApplied { + if _, ok := activeVMs[vmID]; !ok { + delete(state.lastApplied, vmID) + } + } + + applySpan.SetAttributes( + attribute.Int("eligible_vms", summary.eligibleVMs), + attribute.Int("applied_vms", summary.appliedCount), + attribute.Int("planned_vms", summary.plannedCount), + attribute.Int("error_vms", summary.errorCount), + ) + if summary.errorCount > 0 { + applySpan.SetStatus(codes.Error, "one or more balloon target updates failed") + } else { + applySpan.SetStatus(codes.Ok, "") + } + applySpan.End() + + summary.plannedReclaim = resp.PlannedReclaimBytes + summary.appliedReclaim = resp.AppliedReclaimBytes + c.recordReconcileSuccess(ctx, trigger, req, span, start, summary, resp.Actions) + c.logPressureTransition(ctx, summary) + c.logReconcileSummary(ctx, req, summary, reconcileStatus(summary)) + + return resp, nil +} + +func holdUntil(hold *manualHold) *time.Time { + if hold == nil || hold.until.IsZero() { + return nil + } + until := hold.until + return &until +} + +func skippedAction(vm BalloonVM, status, err string) ManualReclaimAction { + return ManualReclaimAction{ + InstanceID: vm.ID, + InstanceName: vm.Name, + Hypervisor: vm.HypervisorType, + AssignedMemoryBytes: vm.AssignedMemoryBytes, + Status: status, + Error: err, + } +} diff --git a/lib/guestmemory/controller_test.go b/lib/guestmemory/controller_test.go new file mode 100644 index 00000000..2b20427f --- /dev/null +++ b/lib/guestmemory/controller_test.go @@ -0,0 +1,242 @@ +package guestmemory + +import ( + "context" + "errors" + "io" + "log/slog" + "testing" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type stubSource struct { + vms []BalloonVM + err error +} + +func (s *stubSource) ListBalloonVMs(ctx context.Context) ([]BalloonVM, error) { + _ = ctx + if s.err != nil { + return nil, s.err + } + return s.vms, nil +} + +type stubSampler struct { + sample HostPressureSample + err error +} + +func (s *stubSampler) Sample(ctx context.Context) (HostPressureSample, error) { + _ = ctx + return s.sample, s.err +} + +type stubHypervisor struct { + target int64 + capabilities hypervisor.Capabilities + setErr error +} + +func (s *stubHypervisor) DeleteVM(ctx context.Context) error { return nil } +func (s *stubHypervisor) Shutdown(ctx context.Context) error { return nil } +func (s *stubHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) { + return &hypervisor.VMInfo{State: hypervisor.StateRunning}, nil +} +func (s *stubHypervisor) Pause(ctx context.Context) error { return nil } +func (s *stubHypervisor) Resume(ctx context.Context) error { return nil } +func (s *stubHypervisor) Snapshot(ctx context.Context, destPath string) error { return nil } +func (s *stubHypervisor) ResizeMemory(ctx context.Context, bytes int64) error { return nil } +func (s *stubHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { + return nil +} +func (s *stubHypervisor) Capabilities() hypervisor.Capabilities { return s.capabilities } +func (s *stubHypervisor) SetTargetGuestMemoryBytes(ctx context.Context, bytes int64) error { + _ = ctx + if s.setErr != nil { + return s.setErr + } + s.target = bytes + return nil +} +func (s *stubHypervisor) GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) { + _ = ctx + return s.target, nil +} + +func TestTriggerReclaimDistributesProportionally(t *testing.T) { + const mib = int64(1024 * 1024) + src := &stubSource{ + vms: []BalloonVM{ + {ID: "a", Name: "a", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "a", AssignedMemoryBytes: 1024 * mib}, + {ID: "b", Name: "b", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "b", AssignedMemoryBytes: 2048 * mib}, + }, + } + hvA := &stubHypervisor{target: 1024 * mib, capabilities: hypervisor.Capabilities{SupportsBalloonControl: true}} + hvB := &stubHypervisor{target: 2048 * mib, capabilities: hypervisor.Capabilities{SupportsBalloonControl: true}} + + c := NewController(Policy{Enabled: true, ReclaimEnabled: true}, ActiveBallooningConfig{ + Enabled: true, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: 0, + MinAdjustmentBytes: 1, + PerVMMaxStepBytes: 4096 * mib, + PerVMCooldown: time.Second, + }, src, slog.New(slog.NewTextHandler(io.Discard, nil))).(*controller) + c.sampler = &stubSampler{sample: HostPressureSample{TotalBytes: 4096 * mib, AvailableBytes: 4096 * mib, AvailablePercent: 100}} + c.reconcileMu.newClient = func(t hypervisor.Type, socket string) (hypervisor.Hypervisor, error) { + switch socket { + case "a": + return hvA, nil + case "b": + return hvB, nil + default: + return nil, errors.New("unknown") + } + } + + resp, err := c.TriggerReclaim(context.Background(), ManualReclaimRequest{ReclaimBytes: 768 * mib, HoldFor: time.Minute}) + require.NoError(t, err) + assert.Equal(t, int64(768*mib), resp.PlannedReclaimBytes) + assert.Equal(t, int64(768*mib), resp.AppliedReclaimBytes) + assert.Equal(t, int64(768*mib), 1024*mib-hvA.target+2048*mib-hvB.target) + assert.Equal(t, int64(768*mib), resp.Actions[0].AppliedReclaimBytes+resp.Actions[1].AppliedReclaimBytes) +} + +func TestPressureStateUsesHysteresis(t *testing.T) { + cfg := DefaultActiveBallooningConfig() + cfg.PressureHighWatermarkAvailablePercent = 10 + cfg.PressureLowWatermarkAvailablePercent = 15 + + assert.Equal(t, HostPressureStatePressure, nextPressureState(HostPressureStateHealthy, cfg, HostPressureSample{AvailablePercent: 9})) + assert.Equal(t, HostPressureStateHealthy, nextPressureState(HostPressureStateHealthy, cfg, HostPressureSample{AvailablePercent: 10})) + assert.Equal(t, HostPressureStateHealthy, nextPressureState(HostPressureStateHealthy, cfg, HostPressureSample{AvailablePercent: 10.9})) + assert.Equal(t, HostPressureStatePressure, nextPressureState(HostPressureStatePressure, cfg, HostPressureSample{AvailablePercent: 12})) + assert.Equal(t, HostPressureStatePressure, nextPressureState(HostPressureStatePressure, cfg, HostPressureSample{AvailablePercent: 14.9})) + assert.Equal(t, HostPressureStateHealthy, nextPressureState(HostPressureStatePressure, cfg, HostPressureSample{AvailablePercent: 16})) +} + +func TestTriggerReclaimReturnsWhenContextIsCanceledWhileWaitingForLock(t *testing.T) { + const mib = int64(1024 * 1024) + + src := &stubSource{ + vms: []BalloonVM{ + {ID: "a", Name: "a", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "a", AssignedMemoryBytes: 1024 * mib}, + }, + } + + c := NewController(Policy{Enabled: true, ReclaimEnabled: true}, ActiveBallooningConfig{ + Enabled: true, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: 0, + MinAdjustmentBytes: 1, + PerVMMaxStepBytes: 4096 * mib, + PerVMCooldown: time.Second, + }, src, slog.New(slog.NewTextHandler(io.Discard, nil))).(*controller) + c.sampler = &stubSampler{sample: HostPressureSample{TotalBytes: 1024 * mib, AvailableBytes: 1024 * mib, AvailablePercent: 100}} + + <-c.reconcileMu.mu + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := c.TriggerReclaim(ctx, ManualReclaimRequest{ReclaimBytes: 128 * mib}) + require.ErrorIs(t, err, context.Canceled) + + c.reconcileMu.mu <- struct{}{} +} + +func TestTriggerReclaimMinAdjustmentKeepsCurrentTarget(t *testing.T) { + const mib = int64(1024 * 1024) + + src := &stubSource{ + vms: []BalloonVM{ + {ID: "a", Name: "a", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "a", AssignedMemoryBytes: 1024 * mib}, + }, + } + hv := &stubHypervisor{target: 1024 * mib, capabilities: hypervisor.Capabilities{SupportsBalloonControl: true}} + + c := NewController(Policy{Enabled: true, ReclaimEnabled: true}, ActiveBallooningConfig{ + Enabled: true, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: 0, + MinAdjustmentBytes: 64 * mib, + PerVMMaxStepBytes: 64 * mib, + PerVMCooldown: time.Minute, + }, src, slog.New(slog.NewTextHandler(io.Discard, nil))).(*controller) + c.sampler = &stubSampler{sample: HostPressureSample{TotalBytes: 1024 * mib, AvailableBytes: 1024 * mib, AvailablePercent: 100}} + c.reconcileMu.newClient = func(t hypervisor.Type, socket string) (hypervisor.Hypervisor, error) { + return hv, nil + } + + resp, err := c.TriggerReclaim(context.Background(), ManualReclaimRequest{ReclaimBytes: 32 * mib}) + require.NoError(t, err) + require.Len(t, resp.Actions, 1) + assert.Equal(t, "unchanged", resp.Actions[0].Status) + assert.Equal(t, int64(1024*mib), resp.Actions[0].TargetGuestMemoryBytes) +} + +func TestTriggerReclaimRespectsProtectedFloor(t *testing.T) { + const mib = int64(1024 * 1024) + src := &stubSource{ + vms: []BalloonVM{ + {ID: "a", Name: "a", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "a", AssignedMemoryBytes: 1024 * mib}, + }, + } + hv := &stubHypervisor{target: 1024 * mib, capabilities: hypervisor.Capabilities{SupportsBalloonControl: true}} + c := NewController(Policy{Enabled: true, ReclaimEnabled: true}, ActiveBallooningConfig{ + Enabled: true, + ProtectedFloorPercent: 75, + ProtectedFloorMinBytes: 0, + MinAdjustmentBytes: 1, + PerVMMaxStepBytes: 4096 * mib, + PerVMCooldown: time.Second, + }, src, slog.New(slog.NewTextHandler(io.Discard, nil))).(*controller) + c.sampler = &stubSampler{sample: HostPressureSample{TotalBytes: 1024 * mib, AvailableBytes: 1024 * mib, AvailablePercent: 100}} + c.reconcileMu.newClient = func(t hypervisor.Type, socket string) (hypervisor.Hypervisor, error) { + return hv, nil + } + + resp, err := c.TriggerReclaim(context.Background(), ManualReclaimRequest{ReclaimBytes: 1024 * mib, HoldFor: time.Minute}) + require.NoError(t, err) + require.Len(t, resp.Actions, 1) + assert.Equal(t, int64(768*mib), resp.Actions[0].TargetGuestMemoryBytes) + assert.Equal(t, int64(256*mib), resp.AppliedReclaimBytes) +} + +func TestTriggerReclaimWithoutHoldAppliesRequestedReclaim(t *testing.T) { + const mib = int64(1024 * 1024) + src := &stubSource{ + vms: []BalloonVM{ + {ID: "a", Name: "a", HypervisorType: hypervisor.TypeCloudHypervisor, SocketPath: "a", AssignedMemoryBytes: 1024 * mib}, + }, + } + hv := &stubHypervisor{target: 1024 * mib, capabilities: hypervisor.Capabilities{SupportsBalloonControl: true}} + c := NewController(Policy{Enabled: true, ReclaimEnabled: true}, ActiveBallooningConfig{ + Enabled: true, + ProtectedFloorPercent: 50, + ProtectedFloorMinBytes: 0, + MinAdjustmentBytes: 1, + PerVMMaxStepBytes: 4096 * mib, + PerVMCooldown: time.Second, + }, src, slog.New(slog.NewTextHandler(io.Discard, nil))).(*controller) + c.sampler = &stubSampler{sample: HostPressureSample{TotalBytes: 1024 * mib, AvailableBytes: 1024 * mib, AvailablePercent: 100}} + c.reconcileMu.newClient = func(t hypervisor.Type, socket string) (hypervisor.Hypervisor, error) { + return hv, nil + } + + resp, err := c.TriggerReclaim(context.Background(), ManualReclaimRequest{ReclaimBytes: 256 * mib, HoldFor: 0}) + require.NoError(t, err) + require.Len(t, resp.Actions, 1) + assert.Equal(t, int64(768*mib), resp.Actions[0].TargetGuestMemoryBytes) + assert.Equal(t, int64(256*mib), resp.AppliedReclaimBytes) + assert.Nil(t, resp.HoldUntil) + + followup, err := c.TriggerReclaim(context.Background(), ManualReclaimRequest{}) + require.NoError(t, err) + assert.Equal(t, int64(0), followup.AppliedReclaimBytes) + assert.Equal(t, int64(1024*mib), hv.target) +} diff --git a/lib/guestmemory/helpers.go b/lib/guestmemory/helpers.go new file mode 100644 index 00000000..a83811e5 --- /dev/null +++ b/lib/guestmemory/helpers.go @@ -0,0 +1,8 @@ +package guestmemory + +func percentage(part, total int64) float64 { + if total <= 0 { + return 0 + } + return (float64(part) / float64(total)) * 100 +} diff --git a/lib/guestmemory/metrics.go b/lib/guestmemory/metrics.go new file mode 100644 index 00000000..9bba181c --- /dev/null +++ b/lib/guestmemory/metrics.go @@ -0,0 +1,247 @@ +package guestmemory + +import ( + "context" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +type Metrics struct { + reconcileTotal metric.Int64Counter + reconcileDuration metric.Float64Histogram + reclaimActionsTotal metric.Int64Counter + pressureTransitionsTotal metric.Int64Counter + samplerErrorsTotal metric.Int64Counter + reclaimBytes metric.Int64Histogram + + hostAvailableBytes metric.Int64Gauge + targetReclaimBytes metric.Int64Gauge + appliedReclaimBytes metric.Int64Gauge + manualHoldActive metric.Int64Gauge + eligibleVMsTotal metric.Int64Gauge + pressureState metric.Int64Gauge +} + +type GaugeObservation struct { + HostAvailableBytes int64 + AutoTargetBytes int64 + ManualTargetBytes int64 + EffectiveTarget int64 + AppliedReclaim int64 + EligibleVMs int + PressureState HostPressureState + ManualHoldActive bool +} + +func NewMetrics(meter metric.Meter) (*Metrics, error) { + if meter == nil { + return nil, nil + } + + reconcileTotal, err := meter.Int64Counter( + "hypeman_guestmemory_reconcile_total", + metric.WithDescription("Total number of guest memory reconcile cycles"), + ) + if err != nil { + return nil, err + } + + reconcileDuration, err := meter.Float64Histogram( + "hypeman_guestmemory_reconcile_duration_seconds", + metric.WithDescription("Guest memory reconcile duration"), + metric.WithUnit("s"), + ) + if err != nil { + return nil, err + } + + reclaimActionsTotal, err := meter.Int64Counter( + "hypeman_guestmemory_reclaim_actions_total", + metric.WithDescription("Total number of guest memory reclaim actions"), + ) + if err != nil { + return nil, err + } + + pressureTransitionsTotal, err := meter.Int64Counter( + "hypeman_guestmemory_pressure_transitions_total", + metric.WithDescription("Total number of guest memory pressure state transitions"), + ) + if err != nil { + return nil, err + } + + samplerErrorsTotal, err := meter.Int64Counter( + "hypeman_guestmemory_sampler_errors_total", + metric.WithDescription("Total number of guest memory host pressure sampler errors"), + ) + if err != nil { + return nil, err + } + + reclaimBytes, err := meter.Int64Histogram( + "hypeman_guestmemory_reclaim_bytes", + metric.WithDescription("Guest memory reclaim bytes observed per reconcile"), + metric.WithUnit("By"), + ) + if err != nil { + return nil, err + } + + hostAvailableBytes, err := meter.Int64Gauge( + "hypeman_guestmemory_host_available_bytes", + metric.WithDescription("Last observed host available memory"), + metric.WithUnit("By"), + ) + if err != nil { + return nil, err + } + + targetReclaimBytes, err := meter.Int64Gauge( + "hypeman_guestmemory_target_reclaim_bytes", + metric.WithDescription("Current guest memory reclaim target"), + metric.WithUnit("By"), + ) + if err != nil { + return nil, err + } + + appliedReclaimBytes, err := meter.Int64Gauge( + "hypeman_guestmemory_applied_reclaim_bytes", + metric.WithDescription("Current applied guest memory reclaim"), + metric.WithUnit("By"), + ) + if err != nil { + return nil, err + } + + manualHoldActive, err := meter.Int64Gauge( + "hypeman_guestmemory_manual_hold_active", + metric.WithDescription("Whether a manual guest memory reclaim hold is active"), + ) + if err != nil { + return nil, err + } + + eligibleVMsTotal, err := meter.Int64Gauge( + "hypeman_guestmemory_eligible_vms_total", + metric.WithDescription("Number of guest VMs eligible for active ballooning"), + ) + if err != nil { + return nil, err + } + + pressureState, err := meter.Int64Gauge( + "hypeman_guestmemory_pressure_state", + metric.WithDescription("Current guest memory host pressure state (0 healthy, 1 pressure)"), + ) + if err != nil { + return nil, err + } + + return &Metrics{ + reconcileTotal: reconcileTotal, + reconcileDuration: reconcileDuration, + reclaimActionsTotal: reclaimActionsTotal, + pressureTransitionsTotal: pressureTransitionsTotal, + samplerErrorsTotal: samplerErrorsTotal, + reclaimBytes: reclaimBytes, + hostAvailableBytes: hostAvailableBytes, + targetReclaimBytes: targetReclaimBytes, + appliedReclaimBytes: appliedReclaimBytes, + manualHoldActive: manualHoldActive, + eligibleVMsTotal: eligibleVMsTotal, + pressureState: pressureState, + }, nil +} + +func (m *Metrics) RecordReconcile(ctx context.Context, trigger, status string, duration time.Duration) { + if m == nil { + return + } + + opts := metric.WithAttributes( + attribute.String("trigger", trigger), + attribute.String("status", status), + ) + m.reconcileTotal.Add(ctx, 1, opts) + m.reconcileDuration.Record(ctx, duration.Seconds(), opts) +} + +func (m *Metrics) RecordReclaimAction(ctx context.Context, trigger, status string, hvType hypervisor.Type) { + if m == nil { + return + } + + m.reclaimActionsTotal.Add(ctx, 1, + metric.WithAttributes( + attribute.String("trigger", trigger), + attribute.String("status", status), + attribute.String("hypervisor", string(hvType)), + )) +} + +func (m *Metrics) RecordPressureTransition(ctx context.Context, from, to HostPressureState) { + if m == nil { + return + } + + m.pressureTransitionsTotal.Add(ctx, 1, + metric.WithAttributes( + attribute.String("from", string(from)), + attribute.String("to", string(to)), + )) +} + +func (m *Metrics) RecordSamplerError(ctx context.Context, sampler string) { + if m == nil { + return + } + + m.samplerErrorsTotal.Add(ctx, 1, + metric.WithAttributes(attribute.String("sampler", sampler))) +} + +func (m *Metrics) RecordReclaimBytes(ctx context.Context, trigger, kind string, bytes int64) { + if m == nil || bytes < 0 { + return + } + + m.reclaimBytes.Record(ctx, bytes, + metric.WithAttributes( + attribute.String("trigger", trigger), + attribute.String("kind", kind), + )) +} + +func (m *Metrics) RecordGaugeState(ctx context.Context, obs GaugeObservation) { + if m == nil { + return + } + + m.hostAvailableBytes.Record(ctx, obs.HostAvailableBytes) + m.targetReclaimBytes.Record(ctx, obs.AutoTargetBytes, metric.WithAttributes(attribute.String("source", "auto"))) + m.targetReclaimBytes.Record(ctx, obs.ManualTargetBytes, metric.WithAttributes(attribute.String("source", "manual"))) + m.targetReclaimBytes.Record(ctx, obs.EffectiveTarget, metric.WithAttributes(attribute.String("source", "effective"))) + m.appliedReclaimBytes.Record(ctx, obs.AppliedReclaim) + m.manualHoldActive.Record(ctx, boolToInt64(obs.ManualHoldActive)) + m.eligibleVMsTotal.Record(ctx, int64(obs.EligibleVMs)) + m.pressureState.Record(ctx, pressureStateMetricValue(obs.PressureState)) +} + +func pressureStateMetricValue(state HostPressureState) int64 { + if state == HostPressureStatePressure { + return 1 + } + return 0 +} + +func boolToInt64(v bool) int64 { + if v { + return 1 + } + return 0 +} diff --git a/lib/guestmemory/observability.go b/lib/guestmemory/observability.go new file mode 100644 index 00000000..965b5304 --- /dev/null +++ b/lib/guestmemory/observability.go @@ -0,0 +1,168 @@ +package guestmemory + +import ( + "context" + "log/slog" + "time" + + "github.com/kernel/hypeman/lib/logger" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +type reconcileSummary struct { + eligibleVMs int + appliedCount int + plannedCount int + unchangedCount int + errorCount int + unsupportedCount int + plannedReclaim int64 + appliedReclaim int64 + effectiveTarget int64 + autoTarget int64 + manualTarget int64 + manualHoldActive bool + pressureChanged bool + previousPressure HostPressureState + currentPressure HostPressureState + hostAvailable int64 + hostAvailablePerc float64 +} + +func reconcileTrigger(req reconcileRequest) string { + if req.force { + return "manual" + } + return "auto" +} + +func logFromContext(ctx context.Context, fallback *slog.Logger) *slog.Logger { + if log := logger.FromContext(ctx); log != nil && log != slog.Default() { + return log + } + if fallback != nil { + return fallback + } + return slog.Default() +} + +func (c *controller) startReconcileSpan(ctx context.Context, req reconcileRequest) (context.Context, trace.Span) { + return c.tracer.Start(ctx, "guestmemory.reconcile", + trace.WithAttributes( + attribute.String("trigger", reconcileTrigger(req)), + attribute.Bool("force", req.force), + attribute.Bool("dry_run", req.dryRun), + attribute.Int64("requested_reclaim_bytes", req.requestedReclaim), + )) +} + +func (c *controller) startChildSpan(ctx context.Context, name string) (context.Context, trace.Span) { + return c.tracer.Start(ctx, name) +} + +func reconcileStatus(summary reconcileSummary) string { + if summary.errorCount > 0 { + if summary.appliedCount > 0 || summary.unchangedCount > 0 || summary.plannedCount > 0 { + return "partial" + } + return "error" + } + return "success" +} + +func (c *controller) recordReconcileError(ctx context.Context, trigger string, start time.Time, span trace.Span, err error) { + if err == nil { + return + } + + c.metrics.RecordReconcile(ctx, trigger, "error", time.Since(start)) + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) +} + +func (c *controller) recordReconcileSuccess(ctx context.Context, trigger string, req reconcileRequest, span trace.Span, start time.Time, summary reconcileSummary, actions []ManualReclaimAction) { + status := reconcileStatus(summary) + c.metrics.RecordReconcile(ctx, trigger, status, time.Since(start)) + for _, action := range actions { + c.metrics.RecordReclaimAction(ctx, trigger, action.Status, action.Hypervisor) + } + + if !req.dryRun { + c.metrics.RecordReclaimBytes(ctx, trigger, "auto_target", summary.autoTarget) + c.metrics.RecordReclaimBytes(ctx, trigger, "manual_target", summary.manualTarget) + c.metrics.RecordReclaimBytes(ctx, trigger, "effective_target", summary.effectiveTarget) + c.metrics.RecordReclaimBytes(ctx, trigger, "planned", summary.plannedReclaim) + c.metrics.RecordReclaimBytes(ctx, trigger, "applied", summary.appliedReclaim) + c.metrics.RecordGaugeState(ctx, GaugeObservation{ + HostAvailableBytes: summary.hostAvailable, + AutoTargetBytes: summary.autoTarget, + ManualTargetBytes: summary.manualTarget, + EffectiveTarget: summary.effectiveTarget, + AppliedReclaim: summary.appliedReclaim, + EligibleVMs: summary.eligibleVMs, + PressureState: summary.currentPressure, + ManualHoldActive: summary.manualHoldActive, + }) + } + + span.SetAttributes( + attribute.String("status", status), + attribute.Int("eligible_vms", summary.eligibleVMs), + attribute.Int("applied_vms", summary.appliedCount), + attribute.Int("planned_vms", summary.plannedCount), + attribute.Int("error_vms", summary.errorCount), + attribute.Int("unsupported_vms", summary.unsupportedCount), + attribute.Int64("auto_target_reclaim_bytes", summary.autoTarget), + attribute.Int64("manual_target_reclaim_bytes", summary.manualTarget), + attribute.Int64("effective_target_reclaim_bytes", summary.effectiveTarget), + attribute.Int64("planned_reclaim_bytes", summary.plannedReclaim), + attribute.Int64("applied_reclaim_bytes", summary.appliedReclaim), + attribute.Int64("host_available_bytes", summary.hostAvailable), + attribute.Float64("host_available_percent", summary.hostAvailablePerc), + attribute.String("pressure_state", string(summary.currentPressure)), + attribute.Bool("manual_hold_active", summary.manualHoldActive), + ) + span.SetStatus(codes.Ok, "") +} + +func (c *controller) logPressureTransition(ctx context.Context, summary reconcileSummary) { + if !summary.pressureChanged { + return + } + + logFromContext(ctx, c.log).InfoContext(ctx, + "guest memory pressure state changed", + "operation", "active_ballooning_reconcile", + "from", summary.previousPressure, + "to", summary.currentPressure, + "host_available_bytes", summary.hostAvailable, + "host_available_percent", summary.hostAvailablePerc, + ) +} + +func (c *controller) logReconcileSummary(ctx context.Context, req reconcileRequest, summary reconcileSummary, status string) { + if !req.force && !summary.pressureChanged && summary.appliedCount == 0 && summary.errorCount == 0 { + return + } + + logFromContext(ctx, c.log).InfoContext(ctx, + "guest memory reconcile completed", + "operation", "active_ballooning_reconcile", + "trigger", reconcileTrigger(req), + "dry_run", req.dryRun, + "status", status, + "eligible_vms", summary.eligibleVMs, + "applied_vms", summary.appliedCount, + "planned_vms", summary.plannedCount, + "error_vms", summary.errorCount, + "unsupported_vms", summary.unsupportedCount, + "host_available_bytes", summary.hostAvailable, + "host_available_percent", summary.hostAvailablePerc, + "pressure_state", summary.currentPressure, + "planned_reclaim_bytes", summary.plannedReclaim, + "applied_reclaim_bytes", summary.appliedReclaim, + "manual_hold_active", summary.manualHoldActive, + ) +} diff --git a/lib/guestmemory/planner.go b/lib/guestmemory/planner.go new file mode 100644 index 00000000..55550a09 --- /dev/null +++ b/lib/guestmemory/planner.go @@ -0,0 +1,128 @@ +package guestmemory + +import "github.com/kernel/hypeman/lib/hypervisor" + +type candidateState struct { + vm BalloonVM + hv hypervisor.Hypervisor + currentTargetGuestBytes int64 + protectedFloorBytes int64 + maxReclaimBytes int64 +} + +func planGuestTargets(cfg ActiveBallooningConfig, candidates []candidateState, totalReclaim int64) map[string]int64 { + targets := make(map[string]int64, len(candidates)) + if len(candidates) == 0 { + return targets + } + + var totalHeadroom int64 + for _, candidate := range candidates { + totalHeadroom += candidate.maxReclaimBytes + targets[candidate.vm.ID] = candidate.vm.AssignedMemoryBytes + } + if totalHeadroom <= 0 { + return targets + } + + totalReclaim = clampInt64(totalReclaim, 0, totalHeadroom) + if totalReclaim == 0 { + return targets + } + + remainder := totalReclaim + for _, candidate := range candidates { + reclaim := (totalReclaim * candidate.maxReclaimBytes) / totalHeadroom + if reclaim > candidate.maxReclaimBytes { + reclaim = candidate.maxReclaimBytes + } + targets[candidate.vm.ID] = candidate.vm.AssignedMemoryBytes - reclaim + remainder -= reclaim + } + + for _, candidate := range candidates { + if remainder <= 0 { + break + } + currentReclaim := candidate.vm.AssignedMemoryBytes - targets[candidate.vm.ID] + headroomLeft := candidate.maxReclaimBytes - currentReclaim + if headroomLeft <= 0 { + continue + } + extra := minInt64(headroomLeft, remainder) + targets[candidate.vm.ID] -= extra + remainder -= extra + } + + return targets +} + +func protectedFloorBytes(cfg ActiveBallooningConfig, assigned int64) int64 { + percentFloor := (assigned * int64(cfg.ProtectedFloorPercent)) / 100 + return maxInt64(cfg.ProtectedFloorMinBytes, percentFloor) +} + +func nextPressureState(current HostPressureState, cfg ActiveBallooningConfig, sample HostPressureSample) HostPressureState { + availablePercent := sample.AvailablePercent + highWatermark := float64(cfg.PressureHighWatermarkAvailablePercent) + lowWatermark := float64(cfg.PressureLowWatermarkAvailablePercent) + + switch current { + case HostPressureStatePressure: + if availablePercent >= lowWatermark && !sample.Stressed { + return HostPressureStateHealthy + } + return HostPressureStatePressure + default: + if availablePercent < highWatermark || sample.Stressed { + return HostPressureStatePressure + } + return HostPressureStateHealthy + } +} + +func automaticTargetBytes(state HostPressureState, cfg ActiveBallooningConfig, sample HostPressureSample, currentTotalReclaim int64) int64 { + if state != HostPressureStatePressure || sample.TotalBytes <= 0 { + return 0 + } + lowWatermarkBytes := (sample.TotalBytes * int64(cfg.PressureLowWatermarkAvailablePercent)) / 100 + needed := lowWatermarkBytes - sample.AvailableBytes + if needed > 0 { + return needed + } + if sample.Stressed { + return currentTotalReclaim + } + return 0 +} + +func absInt64(v int64) int64 { + if v < 0 { + return -v + } + return v +} + +func clampInt64(v, minV, maxV int64) int64 { + if v < minV { + return minV + } + if v > maxV { + return maxV + } + return v +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} diff --git a/lib/guestmemory/pressure_darwin.go b/lib/guestmemory/pressure_darwin.go new file mode 100644 index 00000000..ff7d1b59 --- /dev/null +++ b/lib/guestmemory/pressure_darwin.go @@ -0,0 +1,64 @@ +//go:build darwin + +package guestmemory + +import ( + "context" + "fmt" + "os/exec" +) + +type darwinPressureSampler struct{} + +func newHostPressureSampler() PressureSampler { + return &darwinPressureSampler{} +} + +func (s *darwinPressureSampler) Sample(ctx context.Context) (HostPressureSample, error) { + total, available, err := readDarwinVMStat(ctx) + if err != nil { + return HostPressureSample{}, err + } + stressed, err := readDarwinMemoryPressure(ctx) + if err != nil { + return HostPressureSample{}, err + } + + return HostPressureSample{ + TotalBytes: total, + AvailableBytes: available, + AvailablePercent: percentage(available, total), + Stressed: stressed, + }, nil +} + +func readDarwinVMStat(ctx context.Context) (int64, int64, error) { + out, err := exec.CommandContext(ctx, "vm_stat").Output() + if err != nil { + return 0, 0, fmt.Errorf("run vm_stat: %w", err) + } + + memsizeOut, err := exec.CommandContext(ctx, "sysctl", "-n", "hw.memsize").Output() + if err != nil { + return 0, 0, fmt.Errorf("run sysctl hw.memsize: %w", err) + } + + total, available, err := parseDarwinVMStatOutput(string(out), string(memsizeOut)) + if err != nil { + return 0, 0, err + } + return total, available, nil +} + +func readDarwinMemoryPressure(ctx context.Context) (bool, error) { + out, err := exec.CommandContext(ctx, "memory_pressure", "-Q").Output() + if err != nil { + return false, fmt.Errorf("run memory_pressure -Q: %w", err) + } + + stressed, err := parseDarwinMemoryPressureOutput(string(out)) + if err != nil { + return false, err + } + return stressed, nil +} diff --git a/lib/guestmemory/pressure_linux.go b/lib/guestmemory/pressure_linux.go new file mode 100644 index 00000000..7de4416e --- /dev/null +++ b/lib/guestmemory/pressure_linux.go @@ -0,0 +1,61 @@ +//go:build linux + +package guestmemory + +import ( + "context" + "fmt" + "os" +) + +type linuxPressureSampler struct{} + +func newHostPressureSampler() PressureSampler { + return &linuxPressureSampler{} +} + +func (s *linuxPressureSampler) Sample(ctx context.Context) (HostPressureSample, error) { + _ = ctx + + total, available, err := readLinuxMeminfo() + if err != nil { + return HostPressureSample{}, err + } + stressed, err := readLinuxPSI() + if err != nil { + return HostPressureSample{}, err + } + + return HostPressureSample{ + TotalBytes: total, + AvailableBytes: available, + AvailablePercent: percentage(available, total), + Stressed: stressed, + }, nil +} + +func readLinuxMeminfo() (int64, int64, error) { + data, err := os.ReadFile("/proc/meminfo") + if err != nil { + return 0, 0, fmt.Errorf("read /proc/meminfo: %w", err) + } + + total, available, err := parseLinuxMeminfo(string(data)) + if err != nil { + return 0, 0, err + } + return total, available, nil +} + +func readLinuxPSI() (bool, error) { + data, err := os.ReadFile("/proc/pressure/memory") + if err != nil { + return false, fmt.Errorf("read /proc/pressure/memory: %w", err) + } + + stressed, err := parseLinuxPSI(string(data)) + if err != nil { + return false, err + } + return stressed, nil +} diff --git a/lib/guestmemory/pressure_parse.go b/lib/guestmemory/pressure_parse.go new file mode 100644 index 00000000..f781a3da --- /dev/null +++ b/lib/guestmemory/pressure_parse.go @@ -0,0 +1,149 @@ +package guestmemory + +import ( + "bufio" + "fmt" + "strconv" + "strings" +) + +const linuxPSIStressAvg10Threshold = 0.1 + +func parseLinuxMeminfo(data string) (int64, int64, error) { + var total, available int64 + var sawTotal, sawAvailable bool + + scanner := bufio.NewScanner(strings.NewReader(data)) + for scanner.Scan() { + line := scanner.Text() + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + + switch fields[0] { + case "MemTotal:": + value, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse MemTotal: %w", err) + } + total = value * 1024 + sawTotal = true + case "MemAvailable:": + value, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse MemAvailable: %w", err) + } + available = value * 1024 + sawAvailable = true + } + } + if err := scanner.Err(); err != nil { + return 0, 0, fmt.Errorf("scan meminfo: %w", err) + } + if !sawTotal || !sawAvailable || total <= 0 || available < 0 { + return 0, 0, fmt.Errorf("missing memory totals from /proc/meminfo") + } + + return total, available, nil +} + +func parseLinuxPSI(data string) (bool, error) { + for _, line := range strings.Split(data, "\n") { + if !strings.HasPrefix(line, "some ") { + continue + } + + fields := strings.Fields(line) + for _, field := range fields[1:] { + if !strings.HasPrefix(field, "avg10=") { + continue + } + + value, err := strconv.ParseFloat(strings.TrimPrefix(field, "avg10="), 64) + if err != nil { + return false, fmt.Errorf("parse psi avg10: %w", err) + } + return value >= linuxPSIStressAvg10Threshold, nil + } + } + + return false, nil +} + +func parseDarwinVMStatOutput(vmStatOut, memsizeOut string) (int64, int64, error) { + lines := strings.Split(vmStatOut, "\n") + pageSize := int64(4096) + var freePages, speculativePages int64 + + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.Contains(line, "page size of") { + parts := strings.Fields(line) + for i := 0; i < len(parts); i++ { + if parts[i] == "of" && i+1 < len(parts) { + n, err := strconv.ParseInt(parts[i+1], 10, 64) + if err == nil && n > 0 { + pageSize = n + } + break + } + } + } + if strings.HasPrefix(line, "Pages free:") { + n, err := parseDarwinPageCount(line) + if err != nil { + return 0, 0, err + } + freePages = n + } + if strings.HasPrefix(line, "Pages speculative:") { + n, err := parseDarwinPageCount(line) + if err != nil { + return 0, 0, err + } + speculativePages = n + } + } + + total, err := strconv.ParseInt(strings.TrimSpace(memsizeOut), 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse hw.memsize: %w", err) + } + + available := (freePages + speculativePages) * pageSize + return total, available, nil +} + +func parseDarwinPageCount(line string) (int64, error) { + parts := strings.Split(line, ":") + if len(parts) != 2 { + return 0, fmt.Errorf("parse vm_stat line %q", line) + } + + value := strings.TrimSpace(strings.TrimSuffix(parts[1], ".")) + return strconv.ParseInt(value, 10, 64) +} + +func parseDarwinMemoryPressureOutput(out string) (bool, error) { + for _, line := range strings.Split(out, "\n") { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "System-wide memory free percentage:") { + continue + } + + fields := strings.Fields(line) + if len(fields) == 0 { + break + } + + last := strings.TrimSuffix(fields[len(fields)-1], "%") + value, err := strconv.ParseInt(last, 10, 64) + if err != nil { + return false, fmt.Errorf("parse memory_pressure free percentage: %w", err) + } + return value <= 10, nil + } + + return false, nil +} diff --git a/lib/guestmemory/pressure_parse_test.go b/lib/guestmemory/pressure_parse_test.go new file mode 100644 index 00000000..76b777b7 --- /dev/null +++ b/lib/guestmemory/pressure_parse_test.go @@ -0,0 +1,108 @@ +package guestmemory + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseLinuxMeminfo(t *testing.T) { + t.Parallel() + + total, available, err := parseLinuxMeminfo(` +MemTotal: 16384256 kB +MemFree: 1122334 kB +MemAvailable: 9988776 kB +Buffers: 123456 kB +`) + require.NoError(t, err) + assert.Equal(t, int64(16384256*1024), total) + assert.Equal(t, int64(9988776*1024), available) +} + +func TestParseLinuxMeminfoRequiresTotalAndAvailable(t *testing.T) { + t.Parallel() + + _, _, err := parseLinuxMeminfo("MemTotal: 1024 kB\n") + require.Error(t, err) + assert.Contains(t, err.Error(), "missing memory totals") +} + +func TestParseLinuxPSIAboveThresholdIsStressed(t *testing.T) { + t.Parallel() + + stressed, err := parseLinuxPSI(` +some avg10=0.25 avg60=0.12 avg300=0.05 total=12345 +full avg10=0.00 avg60=0.00 avg300=0.00 total=0 +`) + require.NoError(t, err) + assert.True(t, stressed) +} + +func TestParseLinuxPSIBelowThresholdIsHealthy(t *testing.T) { + t.Parallel() + + stressed, err := parseLinuxPSI(` +some avg10=0.09 avg60=0.01 avg300=0.10 total=12345 +full avg10=0.00 avg60=0.00 avg300=0.00 total=0 +`) + require.NoError(t, err) + assert.False(t, stressed) +} + +func TestParseLinuxPSIZeroAvg10IsHealthy(t *testing.T) { + t.Parallel() + + stressed, err := parseLinuxPSI(` +some avg10=0.00 avg60=0.01 avg300=0.10 total=12345 +full avg10=0.00 avg60=0.00 avg300=0.00 total=0 +`) + require.NoError(t, err) + assert.False(t, stressed) +} + +func TestParseDarwinVMStatOutput(t *testing.T) { + t.Parallel() + + total, available, err := parseDarwinVMStatOutput(` +Mach Virtual Memory Statistics: (page size of 16384 bytes) +Pages free: 100. +Pages active: 10000. +Pages inactive: 2000. +Pages speculative: 50. +`, "17179869184\n") + require.NoError(t, err) + assert.Equal(t, int64(17179869184), total) + assert.Equal(t, int64(150*16384), available) +} + +func TestParseDarwinPageCountRejectsMalformedLine(t *testing.T) { + t.Parallel() + + _, err := parseDarwinPageCount("Pages free 100") + require.Error(t, err) + assert.Contains(t, err.Error(), "parse vm_stat line") +} + +func TestParseDarwinMemoryPressureOutput(t *testing.T) { + t.Parallel() + + stressed, err := parseDarwinMemoryPressureOutput(` +The system has 1234 pages wired down. +System-wide memory free percentage: 8% +`) + require.NoError(t, err) + assert.True(t, stressed) +} + +func TestParseDarwinMemoryPressureOutputHealthy(t *testing.T) { + t.Parallel() + + stressed, err := parseDarwinMemoryPressureOutput(` +The system has 1234 pages wired down. +System-wide memory free percentage: 21% +`) + require.NoError(t, err) + assert.False(t, stressed) +} diff --git a/lib/hypervisor/balloon_target_cache.go b/lib/hypervisor/balloon_target_cache.go new file mode 100644 index 00000000..ccfd43a6 --- /dev/null +++ b/lib/hypervisor/balloon_target_cache.go @@ -0,0 +1,90 @@ +package hypervisor + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "sync" +) + +// BalloonTargetCache keeps the last requested guest-memory target warm across +// reconnects and Hypeman restarts. +type BalloonTargetCache struct { + targets sync.Map + keys sync.Map +} + +func (c *BalloonTargetCache) Store(socketPath string, bytes int64) { + key := SocketCacheKey(socketPath) + c.targets.Store(key, bytes) + c.keys.Store(socketPath, key) + _ = os.WriteFile(balloonTargetStatePath(socketPath), []byte(fmt.Sprintf("%s\n%d\n", key, bytes)), 0o600) +} + +func (c *BalloonTargetCache) Load(socketPath string) (int64, bool) { + key := SocketCacheKey(socketPath) + if value, ok := c.loadKey(key); ok { + c.keys.Store(socketPath, key) + return value, true + } + + if indexedKey, ok := c.keys.Load(socketPath); ok { + if keyString, ok := indexedKey.(string); ok { + if value, ok := c.loadKey(keyString); ok { + return value, true + } + } + } + + value, ok := loadBalloonTargetState(socketPath, key) + if !ok { + return 0, false + } + c.targets.Store(key, value) + c.keys.Store(socketPath, key) + return value, true +} + +func (c *BalloonTargetCache) Delete(socketPath string) { + if indexedKey, ok := c.keys.LoadAndDelete(socketPath); ok { + if keyString, ok := indexedKey.(string); ok { + c.targets.Delete(keyString) + } + } + c.targets.Delete(SocketCacheKey(socketPath)) + _ = os.Remove(balloonTargetStatePath(socketPath)) +} + +func (c *BalloonTargetCache) loadKey(key string) (int64, bool) { + target, ok := c.targets.Load(key) + if !ok { + return 0, false + } + value, ok := target.(int64) + return value, ok +} + +func balloonTargetStatePath(socketPath string) string { + base := filepath.Base(socketPath) + return filepath.Join(filepath.Dir(socketPath), "."+base+".balloon-target") +} + +func loadBalloonTargetState(socketPath, expectedKey string) (int64, bool) { + data, err := os.ReadFile(balloonTargetStatePath(socketPath)) + if err != nil { + return 0, false + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) != 2 || lines[0] != expectedKey { + return 0, false + } + + value, err := strconv.ParseInt(lines[1], 10, 64) + if err != nil { + return 0, false + } + return value, true +} diff --git a/lib/hypervisor/balloon_target_cache_test.go b/lib/hypervisor/balloon_target_cache_test.go new file mode 100644 index 00000000..7ade8e37 --- /dev/null +++ b/lib/hypervisor/balloon_target_cache_test.go @@ -0,0 +1,63 @@ +package hypervisor + +import ( + "net" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBalloonTargetCachePersistsAcrossProcessRestarts(t *testing.T) { + t.Parallel() + + socketPath := testSocketPath(t) + listener, err := net.Listen("unix", socketPath) + require.NoError(t, err) + defer listener.Close() + + var cache BalloonTargetCache + cache.Store(socketPath, 384) + + var restarted BalloonTargetCache + value, ok := restarted.Load(socketPath) + require.True(t, ok) + assert.Equal(t, int64(384), value) +} + +func TestBalloonTargetCacheDeleteClearsIndexedKeyAfterSocketRemoval(t *testing.T) { + t.Parallel() + + socketPath := testSocketPath(t) + listener, err := net.Listen("unix", socketPath) + require.NoError(t, err) + + var cache BalloonTargetCache + cache.Store(socketPath, 512) + + require.NoError(t, listener.Close()) + if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) { + require.NoError(t, err) + } + + cache.Delete(socketPath) + _, ok := cache.Load(socketPath) + assert.False(t, ok) +} + +func testSocketPath(t *testing.T) string { + t.Helper() + + file, err := os.CreateTemp("", "btc-*.sock") + require.NoError(t, err) + path := file.Name() + require.NoError(t, file.Close()) + require.NoError(t, os.Remove(path)) + t.Cleanup(func() { + _ = os.Remove(path) + _ = os.Remove(balloonTargetStatePath(path)) + }) + return filepath.Clean(path) +} diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go index 2d3f77bb..95e0792a 100644 --- a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go +++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go @@ -13,7 +13,14 @@ import ( // CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM. type CloudHypervisor struct { - client *vmm.VMM + client *vmm.VMM + socketPath string +} + +var balloonTargetCache hypervisor.BalloonTargetCache + +func clearBalloonTargetCache(socketPath string) { + balloonTargetCache.Delete(socketPath) } // New creates a new Cloud Hypervisor client for an existing VMM socket. @@ -23,7 +30,8 @@ func New(socketPath string) (*CloudHypervisor, error) { return nil, fmt.Errorf("create vmm client: %w", err) } return &CloudHypervisor{ - client: client, + client: client, + socketPath: socketPath, }, nil } @@ -39,6 +47,7 @@ func capabilities() hypervisor.Capabilities { return hypervisor.Capabilities{ SupportsSnapshot: true, SupportsHotplugMemory: true, + SupportsBalloonControl: true, SupportsPause: true, SupportsVsock: true, SupportsGPUPassthrough: true, @@ -57,6 +66,7 @@ func (c *CloudHypervisor) DeleteVM(ctx context.Context) error { if resp.StatusCode() != 204 { return fmt.Errorf("delete vm failed with status %d: %s", resp.StatusCode(), string(resp.Body)) } + clearBalloonTargetCache(c.socketPath) return nil } @@ -70,6 +80,7 @@ func (c *CloudHypervisor) Shutdown(ctx context.Context) error { if resp.StatusCode() != 204 { return fmt.Errorf("shutdown vmm failed with status %d", resp.StatusCode()) } + clearBalloonTargetCache(c.socketPath) return nil } @@ -207,3 +218,59 @@ func (c *CloudHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, // Timeout reached, but resize was requested successfully return nil } + +func (c *CloudHypervisor) SetTargetGuestMemoryBytes(ctx context.Context, bytes int64) error { + info, err := c.client.GetVmInfoWithResponse(ctx) + if err != nil { + return fmt.Errorf("get vm info for balloon update: %w", err) + } + if info.StatusCode() != 200 || info.JSON200 == nil { + return fmt.Errorf("get vm info for balloon update failed with status %d", info.StatusCode()) + } + if info.JSON200.Config.Balloon == nil { + return hypervisor.ErrNotSupported + } + + assigned := assignedGuestMemoryBytes(info.JSON200) + if bytes < 0 || bytes > assigned { + return fmt.Errorf("target guest memory %d is outside valid range [0,%d]", bytes, assigned) + } + + desiredBalloon := assigned - bytes + resp, err := c.client.PutVmResizeWithResponse(ctx, vmm.VmResize{DesiredBalloon: &desiredBalloon}) + if err != nil { + return fmt.Errorf("set balloon target: %w", err) + } + if resp.StatusCode() != 204 { + return fmt.Errorf("set balloon target failed with status %d", resp.StatusCode()) + } + balloonTargetCache.Store(c.socketPath, bytes) + return nil +} + +func (c *CloudHypervisor) GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) { + if target, ok := balloonTargetCache.Load(c.socketPath); ok { + return target, nil + } + + info, err := c.client.GetVmInfoWithResponse(ctx) + if err != nil { + return 0, fmt.Errorf("get vm info for balloon read: %w", err) + } + if info.StatusCode() != 200 || info.JSON200 == nil { + return 0, fmt.Errorf("get vm info for balloon read failed with status %d", info.StatusCode()) + } + if info.JSON200.Config.Balloon == nil { + return 0, hypervisor.ErrNotSupported + } + assigned := assignedGuestMemoryBytes(info.JSON200) + return assigned - info.JSON200.Config.Balloon.Size, nil +} + +func assignedGuestMemoryBytes(info *vmm.VmInfo) int64 { + assigned := info.Config.Memory.Size + if info.MemoryActualSize != nil && info.Config.Balloon != nil { + assigned = *info.MemoryActualSize + info.Config.Balloon.Size + } + return assigned +} diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor_test.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor_test.go new file mode 100644 index 00000000..2e2596fb --- /dev/null +++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor_test.go @@ -0,0 +1,51 @@ +package cloudhypervisor + +import ( + "testing" + + "github.com/kernel/hypeman/lib/vmm" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAssignedGuestMemoryBytes(t *testing.T) { + t.Run("uses configured memory without hotplug state", func(t *testing.T) { + info := &vmm.VmInfo{ + Config: vmm.VmConfig{ + Memory: &vmm.MemoryConfig{Size: 512}, + Balloon: &vmm.BalloonConfig{Size: 64}, + }, + } + + assert.Equal(t, int64(512), assignedGuestMemoryBytes(info)) + }) + + t.Run("includes hotplugged memory via actual-plus-balloon", func(t *testing.T) { + actual := int64(640) + info := &vmm.VmInfo{ + Config: vmm.VmConfig{ + Memory: &vmm.MemoryConfig{Size: 512}, + Balloon: &vmm.BalloonConfig{Size: 128}, + }, + MemoryActualSize: &actual, + } + + assert.Equal(t, int64(768), assignedGuestMemoryBytes(info)) + }) +} + +func TestGetTargetGuestMemoryBytesUsesWarmCacheBeforeVMInfo(t *testing.T) { + t.Parallel() + + socketPath := t.TempDir() + "/cloud-hypervisor.sock" + balloonTargetCache.Store(socketPath, int64(384)) + t.Cleanup(func() { + clearBalloonTargetCache(socketPath) + }) + + hv := &CloudHypervisor{socketPath: socketPath} + + target, err := hv.GetTargetGuestMemoryBytes(t.Context()) + require.NoError(t, err) + assert.Equal(t, int64(384), target) +} diff --git a/lib/hypervisor/firecracker/firecracker.go b/lib/hypervisor/firecracker/firecracker.go index 4c2b2a69..ad7f5185 100644 --- a/lib/hypervisor/firecracker/firecracker.go +++ b/lib/hypervisor/firecracker/firecracker.go @@ -54,6 +54,7 @@ func capabilities() hypervisor.Capabilities { return hypervisor.Capabilities{ SupportsSnapshot: true, SupportsHotplugMemory: false, + SupportsBalloonControl: true, SupportsPause: true, SupportsVsock: true, SupportsGPUPassthrough: false, @@ -125,6 +126,40 @@ func (f *Firecracker) ResizeMemoryAndWait(ctx context.Context, bytes int64, time return hypervisor.ErrNotSupported } +func (f *Firecracker) SetTargetGuestMemoryBytes(ctx context.Context, bytes int64) error { + cfg, err := f.getVMConfig(ctx) + if err != nil { + return err + } + desiredBalloonMiB := cfg.MachineConfig.MemSizeMiB - guestTargetBytesToMiB(bytes) + if desiredBalloonMiB < 0 { + return fmt.Errorf("target guest memory %d exceeds configured memory %d MiB", bytes, cfg.MachineConfig.MemSizeMiB) + } + + body := map[string]int64{"amount_mib": desiredBalloonMiB} + if _, err := f.do(ctx, http.MethodPatch, "/balloon", body, http.StatusNoContent); err != nil { + if strings.Contains(err.Error(), "Invalid request method and/or path") { + if _, putErr := f.do(ctx, http.MethodPut, "/balloon", body, http.StatusNoContent); putErr != nil { + if strings.Contains(putErr.Error(), "Invalid request method and/or path") { + return hypervisor.ErrNotSupported + } + return fmt.Errorf("set balloon target: %w", putErr) + } + return nil + } + return fmt.Errorf("set balloon target: %w", err) + } + return nil +} + +func (f *Firecracker) GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) { + cfg, err := f.getVMConfig(ctx) + if err != nil { + return 0, err + } + return (cfg.MachineConfig.MemSizeMiB - cfg.Balloon.AmountMiB) * 1024 * 1024, nil +} + func (f *Firecracker) configureForBoot(ctx context.Context, cfg hypervisor.VMConfig) error { if cfg.SerialLogPath != "" { if err := os.MkdirAll(filepath.Dir(cfg.SerialLogPath), 0755); err != nil { @@ -200,6 +235,43 @@ func (f *Firecracker) postAction(ctx context.Context, action string) error { return nil } +type firecrackerVMConfig struct { + MachineConfig struct { + MemSizeMiB int64 `json:"mem_size_mib"` + } `json:"machine-config"` + Balloon struct { + AmountMiB int64 `json:"amount_mib"` + } `json:"balloon"` +} + +func (f *Firecracker) getVMConfig(ctx context.Context) (*firecrackerVMConfig, error) { + body, err := f.do(ctx, http.MethodGet, "/vm/config", nil, http.StatusOK) + if err != nil { + if strings.Contains(err.Error(), "Invalid request method and/or path") { + return nil, hypervisor.ErrNotSupported + } + return nil, fmt.Errorf("get vm config: %w", err) + } + + var cfg firecrackerVMConfig + if err := json.Unmarshal(body, &cfg); err != nil { + return nil, fmt.Errorf("decode vm config: %w", err) + } + return &cfg, nil +} + +func guestTargetBytesToMiB(bytes int64) int64 { + if bytes <= 0 { + return 0 + } + const mib = 1024 * 1024 + out := bytes / mib + if bytes%mib != 0 { + out++ + } + return out +} + func (f *Firecracker) do(ctx context.Context, method, path string, reqBody any, expectedStatus ...int) ([]byte, error) { var bodyReader io.Reader if reqBody != nil { diff --git a/lib/hypervisor/firecracker/firecracker_test.go b/lib/hypervisor/firecracker/firecracker_test.go index 8f79cb32..920b255c 100644 --- a/lib/hypervisor/firecracker/firecracker_test.go +++ b/lib/hypervisor/firecracker/firecracker_test.go @@ -24,3 +24,11 @@ func TestMapVMState(t *testing.T) { _, err = mapVMState("Shutdown") require.Error(t, err) } + +func TestGuestTargetBytesToMiB(t *testing.T) { + assert.Equal(t, int64(0), guestTargetBytesToMiB(0)) + assert.Equal(t, int64(0), guestTargetBytesToMiB(-1)) + assert.Equal(t, int64(1), guestTargetBytesToMiB(1)) + assert.Equal(t, int64(1), guestTargetBytesToMiB(1024*1024)) + assert.Equal(t, int64(2), guestTargetBytesToMiB(1024*1024+1)) +} diff --git a/lib/hypervisor/hypervisor.go b/lib/hypervisor/hypervisor.go index 5777f381..e3e1dc00 100644 --- a/lib/hypervisor/hypervisor.go +++ b/lib/hypervisor/hypervisor.go @@ -188,6 +188,16 @@ type Hypervisor interface { // Check Capabilities().SupportsHotplugMemory before calling. ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error + // SetTargetGuestMemoryBytes adjusts the runtime balloon target so the guest + // sees the requested amount of RAM. + // Check Capabilities().SupportsBalloonControl before calling. + SetTargetGuestMemoryBytes(ctx context.Context, bytes int64) error + + // GetTargetGuestMemoryBytes returns the current guest-visible RAM target after + // runtime ballooning is applied. + // Check Capabilities().SupportsBalloonControl before calling. + GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) + // Capabilities returns what features this hypervisor supports. Capabilities() Capabilities } @@ -201,6 +211,9 @@ type Capabilities struct { // SupportsHotplugMemory indicates if ResizeMemory is available SupportsHotplugMemory bool + // SupportsBalloonControl indicates if runtime balloon target changes are available. + SupportsBalloonControl bool + // SupportsPause indicates if Pause/Resume are available SupportsPause bool diff --git a/lib/hypervisor/qemu/qemu.go b/lib/hypervisor/qemu/qemu.go index 4db8a05b..1da70965 100644 --- a/lib/hypervisor/qemu/qemu.go +++ b/lib/hypervisor/qemu/qemu.go @@ -17,6 +17,12 @@ type QEMU struct { socketPath string // for self-removal from pool on error } +var balloonTargetCache hypervisor.BalloonTargetCache + +func clearBalloonTargetCache(socketPath string) { + balloonTargetCache.Delete(socketPath) +} + // New returns a QEMU client for the given socket path. // Uses a connection pool to ensure only one connection per socket exists. func New(socketPath string) (*QEMU, error) { @@ -44,6 +50,7 @@ func capabilities() hypervisor.Capabilities { return hypervisor.Capabilities{ SupportsSnapshot: true, // Uses QMP migrate file:// for snapshot SupportsHotplugMemory: false, // Not implemented - balloon not configured + SupportsBalloonControl: true, SupportsPause: true, SupportsVsock: true, SupportsGPUPassthrough: true, @@ -60,6 +67,7 @@ func (q *QEMU) DeleteVM(ctx context.Context) error { Remove(q.socketPath) return err } + clearBalloonTargetCache(q.socketPath) return nil } @@ -71,6 +79,7 @@ func (q *QEMU) Shutdown(ctx context.Context) error { } // Connection is gone after quit, remove from pool Remove(q.socketPath) + clearBalloonTargetCache(q.socketPath) return nil } @@ -175,3 +184,29 @@ func (q *QEMU) ResizeMemory(ctx context.Context, bytes int64) error { func (q *QEMU) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { return fmt.Errorf("memory resize not supported by QEMU implementation") } + +func (q *QEMU) SetTargetGuestMemoryBytes(ctx context.Context, bytes int64) error { + if bytes < 0 { + return fmt.Errorf("target guest memory %d must be non-negative", bytes) + } + if err := q.client.Balloon(bytes); err != nil { + Remove(q.socketPath) + return fmt.Errorf("set balloon target: %w", err) + } + balloonTargetCache.Store(q.socketPath, bytes) + return nil +} + +func (q *QEMU) GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) { + _ = ctx + + if target, ok := balloonTargetCache.Load(q.socketPath); ok { + return target, nil + } + + config, err := loadVMConfig(filepath.Dir(q.socketPath)) + if err != nil { + return 0, fmt.Errorf("read qemu guest memory target: %w", err) + } + return config.MemoryBytes, nil +} diff --git a/lib/hypervisor/qemu/qemu_test.go b/lib/hypervisor/qemu/qemu_test.go new file mode 100644 index 00000000..e010a44a --- /dev/null +++ b/lib/hypervisor/qemu/qemu_test.go @@ -0,0 +1,21 @@ +package qemu + +import ( + "testing" + + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetTargetGuestMemoryBytesUsesSavedConfigOnColdStart(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + require.NoError(t, saveVMConfig(dir, hypervisor.VMConfig{MemoryBytes: 768})) + + hv := &QEMU{socketPath: dir + "/qemu.sock"} + target, err := hv.GetTargetGuestMemoryBytes(t.Context()) + require.NoError(t, err) + assert.Equal(t, int64(768), target) +} diff --git a/lib/hypervisor/qemu/qmp.go b/lib/hypervisor/qemu/qmp.go index f28fcb72..29f9eb40 100644 --- a/lib/hypervisor/qemu/qmp.go +++ b/lib/hypervisor/qemu/qmp.go @@ -105,6 +105,20 @@ func (c *Client) Run(cmd qmp.Command) ([]byte, error) { return c.domain.Run(cmd) } +// Balloon updates the guest-visible target RAM for the balloon device. +func (c *Client) Balloon(bytes int64) error { + return c.raw.Balloon(bytes) +} + +// QueryBalloon returns the current guest-visible RAM target for the balloon device. +func (c *Client) QueryBalloon() (int64, error) { + info, err := c.raw.QueryBalloon() + if err != nil { + return 0, err + } + return info.Actual, nil +} + // Migrate initiates a migration to the given URI (typically "file:///path"). // This is used for saving VM state to a file for snapshot/standby. func (c *Client) Migrate(uri string) error { diff --git a/lib/hypervisor/socket_cache_key.go b/lib/hypervisor/socket_cache_key.go new file mode 100644 index 00000000..efd31cd0 --- /dev/null +++ b/lib/hypervisor/socket_cache_key.go @@ -0,0 +1,20 @@ +package hypervisor + +import ( + "fmt" + "os" + "syscall" +) + +// SocketCacheKey returns a cache key that changes when a Unix socket path is +// recreated, preventing stale state from being reused across VM restarts. +func SocketCacheKey(socketPath string) string { + info, err := os.Stat(socketPath) + if err != nil { + return socketPath + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + return fmt.Sprintf("%s:%d:%d", socketPath, stat.Dev, stat.Ino) + } + return socketPath +} diff --git a/lib/hypervisor/socket_cache_key_test.go b/lib/hypervisor/socket_cache_key_test.go new file mode 100644 index 00000000..8a7b0eed --- /dev/null +++ b/lib/hypervisor/socket_cache_key_test.go @@ -0,0 +1,30 @@ +package hypervisor + +import ( + "fmt" + "net" + "os" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestSocketCacheKeyChangesWhenSocketIsRecreated(t *testing.T) { + socketPath := fmt.Sprintf("/tmp/hypeman-socket-key-%d.sock", time.Now().UnixNano()) + t.Cleanup(func() { _ = os.Remove(socketPath) }) + + listener, err := net.Listen("unix", socketPath) + require.NoError(t, err) + firstKey := SocketCacheKey(socketPath) + require.NotEmpty(t, firstKey) + require.NoError(t, listener.Close()) + + listener, err = net.Listen("unix", socketPath) + require.NoError(t, err) + defer listener.Close() + secondKey := SocketCacheKey(socketPath) + require.NotEmpty(t, secondKey) + + require.NotEqual(t, firstKey, secondKey) +} diff --git a/lib/hypervisor/socket_pid_linux.go b/lib/hypervisor/socket_pid_linux.go new file mode 100644 index 00000000..7f46ebfa --- /dev/null +++ b/lib/hypervisor/socket_pid_linux.go @@ -0,0 +1,125 @@ +//go:build linux + +package hypervisor + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" +) + +// ResolveProcessPID finds the process currently holding the listening Unix +// socket for the given hypervisor control path. +func ResolveProcessPID(socketPath string) (int, error) { + socketRef, err := socketRefForPath(socketPath) + if err == nil { + if pid, refErr := pidBySocketRef(socketRef); refErr == nil { + return pid, nil + } + } + + if pid, cmdErr := pidByCmdline(socketPath); cmdErr == nil { + return pid, nil + } + + return 0, fmt.Errorf("resolve process pid for socket %s: no owning process found", socketPath) +} + +func pidBySocketRef(socketRef string) (int, error) { + procEntries, err := os.ReadDir("/proc") + if err != nil { + return 0, fmt.Errorf("read /proc: %w", err) + } + + for _, entry := range procEntries { + if !entry.IsDir() { + continue + } + + pid, err := strconv.Atoi(entry.Name()) + if err != nil { + continue + } + + fdEntries, err := os.ReadDir(filepath.Join("/proc", entry.Name(), "fd")) + if err != nil { + continue + } + for _, fdEntry := range fdEntries { + target, err := os.Readlink(filepath.Join("/proc", entry.Name(), "fd", fdEntry.Name())) + if err != nil { + continue + } + if strings.TrimSpace(target) == socketRef { + return pid, nil + } + } + } + + return 0, fmt.Errorf("resolve process pid for %s: no owning process found", socketRef) +} + +func pidByCmdline(socketPath string) (int, error) { + procEntries, err := os.ReadDir("/proc") + if err != nil { + return 0, fmt.Errorf("read /proc: %w", err) + } + + for _, entry := range procEntries { + if !entry.IsDir() { + continue + } + + pid, err := strconv.Atoi(entry.Name()) + if err != nil { + continue + } + + cmdline, err := os.ReadFile(filepath.Join("/proc", entry.Name(), "cmdline")) + if err != nil || len(cmdline) == 0 { + continue + } + for _, arg := range strings.Split(string(cmdline), "\x00") { + if arg == socketPath { + return pid, nil + } + } + } + + return 0, fmt.Errorf("resolve process pid for socket %s: no matching command line found", socketPath) +} + +func socketRefForPath(socketPath string) (string, error) { + file, err := os.Open("/proc/net/unix") + if err != nil { + return "", fmt.Errorf("open /proc/net/unix: %w", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) < 7 { + continue + } + if fields[0] == "Num" { + continue + } + path := fields[len(fields)-1] + if path != socketPath { + continue + } + inode := fields[6] + if inode == "" { + break + } + return fmt.Sprintf("socket:[%s]", inode), nil + } + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("scan /proc/net/unix: %w", err) + } + return "", fmt.Errorf("resolve process pid for socket %s: socket inode not found", socketPath) +} diff --git a/lib/hypervisor/socket_pid_linux_test.go b/lib/hypervisor/socket_pid_linux_test.go new file mode 100644 index 00000000..27052453 --- /dev/null +++ b/lib/hypervisor/socket_pid_linux_test.go @@ -0,0 +1,25 @@ +//go:build linux + +package hypervisor + +import ( + "net" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestResolveProcessPID(t *testing.T) { + tmpDir := t.TempDir() + socketPath := filepath.Join(tmpDir, "test.sock") + + listener, err := net.Listen("unix", socketPath) + require.NoError(t, err) + defer listener.Close() + + pid, err := ResolveProcessPID(socketPath) + require.NoError(t, err) + require.Equal(t, os.Getpid(), pid) +} diff --git a/lib/hypervisor/socket_pid_other.go b/lib/hypervisor/socket_pid_other.go new file mode 100644 index 00000000..75db657e --- /dev/null +++ b/lib/hypervisor/socket_pid_other.go @@ -0,0 +1,11 @@ +//go:build !linux + +package hypervisor + +import "fmt" + +// ResolveProcessPID is only implemented on Linux, where the project relies on +// /proc socket metadata for runtime PID discovery. +func ResolveProcessPID(socketPath string) (int, error) { + return 0, fmt.Errorf("resolve process pid for socket %s: not supported on this platform", socketPath) +} diff --git a/lib/hypervisor/vz/client.go b/lib/hypervisor/vz/client.go index 51c1f283..3d371331 100644 --- a/lib/hypervisor/vz/client.go +++ b/lib/hypervisor/vz/client.go @@ -70,6 +70,10 @@ type snapshotRequest struct { DestinationPath string `json:"destination_path"` } +type balloonResponse struct { + TargetGuestMemoryBytes int64 `json:"target_guest_memory_bytes"` +} + func (c *Client) Capabilities() hypervisor.Capabilities { return capabilities() } @@ -78,6 +82,7 @@ func capabilities() hypervisor.Capabilities { return hypervisor.Capabilities{ SupportsSnapshot: runtime.GOARCH == "arm64", SupportsHotplugMemory: false, + SupportsBalloonControl: true, SupportsPause: true, SupportsVsock: true, SupportsGPUPassthrough: false, @@ -198,3 +203,24 @@ func (c *Client) ResizeMemory(ctx context.Context, bytes int64) error { func (c *Client) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { return hypervisor.ErrNotSupported } + +func (c *Client) SetTargetGuestMemoryBytes(ctx context.Context, targetBytes int64) error { + reqBody, err := json.Marshal(balloonResponse{TargetGuestMemoryBytes: targetBytes}) + if err != nil { + return fmt.Errorf("marshal balloon target: %w", err) + } + return c.doPut(ctx, "/api/v1/vm.balloon", bytes.NewReader(reqBody)) +} + +func (c *Client) GetTargetGuestMemoryBytes(ctx context.Context) (int64, error) { + body, err := c.doGet(ctx, "/api/v1/vm.balloon") + if err != nil { + return 0, fmt.Errorf("get balloon target: %w", err) + } + + var resp balloonResponse + if err := json.Unmarshal(body, &resp); err != nil { + return 0, fmt.Errorf("decode balloon target: %w", err) + } + return resp.TargetGuestMemoryBytes, nil +} diff --git a/lib/instances/create.go b/lib/instances/create.go index d423dc0e..9c9e85d3 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -3,6 +3,7 @@ package instances import ( "context" "fmt" + "log/slog" "path/filepath" "strings" "time" @@ -641,6 +642,7 @@ func (m *manager) startAndBootVM( if err != nil { return fmt.Errorf("start vm: %w", err) } + pid = resolveRuntimeHypervisorPID(log, stored.SocketPath, pid) // Store the PID for later cleanup stored.HypervisorPID = &pid @@ -659,6 +661,18 @@ func (m *manager) startAndBootVM( return nil } +func resolveRuntimeHypervisorPID(log *slog.Logger, socketPath string, fallbackPID int) int { + if processExists(fallbackPID) { + return fallbackPID + } + pid, err := hypervisor.ResolveProcessPID(socketPath) + if err != nil { + log.Debug("using fallback hypervisor pid", "socket_path", socketPath, "pid", fallbackPID, "error", err) + return fallbackPID + } + return pid +} + // buildHypervisorConfig creates a hypervisor-agnostic VM configuration func (m *manager) buildHypervisorConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (hypervisor.VMConfig, error) { // Get system file paths diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go index a0ff212c..06a07e22 100644 --- a/lib/instances/exec_test.go +++ b/lib/instances/exec_test.go @@ -20,6 +20,7 @@ import ( // waitForExecAgent polls until exec-agent is ready func waitForExecAgent(ctx context.Context, mgr *manager, instanceID string, timeout time.Duration) error { + timeout = integrationTestTimeout(timeout) deadline := time.Now().Add(timeout) lastState := StateUnknown var lastErr error @@ -260,6 +261,12 @@ func TestExecConcurrent(t *testing.T) { // If concurrent, should complete in ~2-4s; if serialized would be ~10s maxExpected := time.Duration(streamDuration+2) * time.Second + if os.Getenv("CI") == "true" { + // GitHub runners can add a bit of scheduling jitter here even when the + // streams are overlapping correctly, but serialized execution is still far + // above this threshold. + maxExpected += time.Second + } require.Less(t, streamElapsed, maxExpected, "streams appear serialized - took %v, expected < %v", streamElapsed, maxExpected) diff --git a/lib/instances/guestmemory_active_ballooning_test_helpers_test.go b/lib/instances/guestmemory_active_ballooning_test_helpers_test.go new file mode 100644 index 00000000..4abec94f --- /dev/null +++ b/lib/instances/guestmemory_active_ballooning_test_helpers_test.go @@ -0,0 +1,159 @@ +package instances + +import ( + "context" + "log/slog" + "os" + "testing" + "time" + + "github.com/kernel/hypeman/lib/guestmemory" + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type staticBalloonSource struct { + vms []guestmemory.BalloonVM +} + +type fixedPressureSampler struct { + sample guestmemory.HostPressureSample +} + +func (s *staticBalloonSource) ListBalloonVMs(ctx context.Context) ([]guestmemory.BalloonVM, error) { + _ = ctx + return s.vms, nil +} + +func (s *fixedPressureSampler) Sample(ctx context.Context) (guestmemory.HostPressureSample, error) { + _ = ctx + return s.sample, nil +} + +func newActiveBallooningTestController(t *testing.T, inst *Instance) guestmemory.Controller { + t.Helper() + + cfg := guestmemory.DefaultActiveBallooningConfig() + cfg.Enabled = true + cfg.MinAdjustmentBytes = 1 + cfg.PerVMMaxStepBytes = inst.Size + inst.HotplugSize + cfg.PerVMCooldown = 1 * time.Millisecond + + return guestmemory.NewControllerWithSampler( + guestmemory.Policy{ + Enabled: true, + ReclaimEnabled: true, + }, + cfg, + &staticBalloonSource{ + vms: []guestmemory.BalloonVM{ + { + ID: inst.Id, + Name: inst.Name, + HypervisorType: inst.HypervisorType, + SocketPath: inst.SocketPath, + AssignedMemoryBytes: inst.Size + inst.HotplugSize, + }, + }, + }, + &fixedPressureSampler{ + sample: guestmemory.HostPressureSample{ + TotalBytes: 64 * 1024 * 1024 * 1024, + AvailableBytes: 32 * 1024 * 1024 * 1024, + AvailablePercent: 50, + Stressed: false, + }, + }, + slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelWarn})), + ) +} + +func requireRuntimeGuestMemoryTarget(t *testing.T, ctx context.Context, inst *Instance) int64 { + t.Helper() + + hv, err := hypervisor.NewClient(inst.HypervisorType, inst.SocketPath) + require.NoError(t, err) + + target, err := hv.GetTargetGuestMemoryBytes(ctx) + require.NoError(t, err) + return target +} + +func requireRuntimeGuestMemoryTargetEventually(t *testing.T, ctx context.Context, inst *Instance, expected int64) { + t.Helper() + + deadline := time.Now().Add(60 * time.Second) + var last int64 + var lastErr error + + for time.Now().Before(deadline) { + hv, err := hypervisor.NewClient(inst.HypervisorType, inst.SocketPath) + if err == nil { + last, err = hv.GetTargetGuestMemoryBytes(ctx) + lastErr = err + if err == nil && last == expected { + return + } + } else { + lastErr = err + } + time.Sleep(250 * time.Millisecond) + } + + require.NoError(t, lastErr) + require.Equal(t, expected, last) +} + +func requireManualReclaimApplied(t *testing.T, ctx context.Context, controller guestmemory.Controller, inst *Instance, reclaimBytes int64, holdFor time.Duration) guestmemory.ManualReclaimResponse { + t.Helper() + + resp, err := controller.TriggerReclaim(ctx, guestmemory.ManualReclaimRequest{ + ReclaimBytes: reclaimBytes, + HoldFor: holdFor, + Reason: "integration-test", + }) + require.NoError(t, err) + requireRuntimeGuestMemoryTargetEventually(t, ctx, inst, inst.Size+inst.HotplugSize-resp.AppliedReclaimBytes) + return resp +} + +func requireManualReclaimCleared(t *testing.T, ctx context.Context, controller guestmemory.Controller, inst *Instance) guestmemory.ManualReclaimResponse { + t.Helper() + + resp, err := controller.TriggerReclaim(ctx, guestmemory.ManualReclaimRequest{ + ReclaimBytes: 0, + HoldFor: 0, + Reason: "integration-test-clear", + }) + require.NoError(t, err) + requireRuntimeGuestMemoryTargetEventually(t, ctx, inst, inst.Size+inst.HotplugSize) + return resp +} + +func assertActiveBallooningLifecycle(t *testing.T, ctx context.Context, inst *Instance) { + t.Helper() + + assigned := inst.Size + inst.HotplugSize + initialTarget := requireRuntimeGuestMemoryTarget(t, ctx, inst) + assert.Equal(t, assigned, initialTarget, "runtime balloon target should start at full assigned memory") + + controller := newActiveBallooningTestController(t, inst) + + reclaimResp := requireManualReclaimApplied(t, ctx, controller, inst, 1*1024*1024*1024, 5*time.Minute) + require.Len(t, reclaimResp.Actions, 1) + assert.NotNil(t, reclaimResp.HoldUntil) + assert.Equal(t, int64(1*1024*1024*1024), reclaimResp.Actions[0].AppliedReclaimBytes) + assert.Equal(t, assigned-int64(1*1024*1024*1024), reclaimResp.Actions[0].TargetGuestMemoryBytes) + + clearResp := requireManualReclaimCleared(t, ctx, controller, inst) + assert.Nil(t, clearResp.HoldUntil) + + floorResp := requireManualReclaimApplied(t, ctx, controller, inst, assigned, 5*time.Minute) + require.Len(t, floorResp.Actions, 1) + expectedFloor := assigned / 2 + assert.Equal(t, expectedFloor, floorResp.Actions[0].TargetGuestMemoryBytes) + assert.Equal(t, assigned-expectedFloor, floorResp.Actions[0].AppliedReclaimBytes) + + requireManualReclaimCleared(t, ctx, controller, inst) +} diff --git a/lib/instances/guestmemory_darwin_test.go b/lib/instances/guestmemory_darwin_test.go index 560e7735..a7ef8f0f 100644 --- a/lib/instances/guestmemory_darwin_test.go +++ b/lib/instances/guestmemory_darwin_test.go @@ -69,6 +69,7 @@ func TestGuestMemoryPolicyVZ(t *testing.T) { require.NoError(t, err) require.NotNil(t, instMeta.HypervisorPID) assertLowIdleVZHostMemoryFootprint(t, *instMeta.HypervisorPID, 192*1024) + assertActiveBallooningLifecycle(t, ctx, inst) } func forceEnableGuestMemoryPolicyForVZTest(mgr *manager) { diff --git a/lib/instances/guestmemory_linux_test.go b/lib/instances/guestmemory_linux_test.go index 4cb8986a..c206dc19 100644 --- a/lib/instances/guestmemory_linux_test.go +++ b/lib/instances/guestmemory_linux_test.go @@ -45,7 +45,10 @@ func TestGuestMemoryPolicyCloudHypervisor(t *testing.T) { Cmd: []string{guestMemoryIdleScript()}, }) require.NoError(t, err) - t.Cleanup(func() { _ = mgr.DeleteInstance(ctx, inst.Id) }) + t.Cleanup(func() { + logInstanceArtifactsOnFailure(t, mgr, inst.Id) + _ = mgr.DeleteInstance(ctx, inst.Id) + }) require.NoError(t, waitForVMReady(ctx, inst.SocketPath, 10*time.Second)) @@ -64,8 +67,8 @@ func TestGuestMemoryPolicyCloudHypervisor(t *testing.T) { assert.True(t, infoResp.JSON200.Config.Balloon.DeflateOnOom != nil && *infoResp.JSON200.Config.Balloon.DeflateOnOom) assert.True(t, infoResp.JSON200.Config.Balloon.FreePageReporting != nil && *infoResp.JSON200.Config.Balloon.FreePageReporting) - pid := requireHypervisorPID(t, ctx, mgr, inst.Id) - assertLowIdleHostMemoryFootprint(t, "cloud-hypervisor", pid, 512*1024) + assertLowIdleHostMemoryFootprint(t, ctx, mgr, inst.Id, "cloud-hypervisor", 512*1024) + assertActiveBallooningLifecycle(t, ctx, inst) } func TestGuestMemoryPolicyQEMU(t *testing.T) { @@ -91,7 +94,10 @@ func TestGuestMemoryPolicyQEMU(t *testing.T) { Cmd: []string{guestMemoryIdleScript()}, }) require.NoError(t, err) - t.Cleanup(func() { _ = mgr.DeleteInstance(ctx, inst.Id) }) + t.Cleanup(func() { + logInstanceArtifactsOnFailure(t, mgr, inst.Id) + _ = mgr.DeleteInstance(ctx, inst.Id) + }) require.NoError(t, waitForQEMUReady(ctx, inst.SocketPath, 10*time.Second)) @@ -103,7 +109,8 @@ func TestGuestMemoryPolicyQEMU(t *testing.T) { assert.Contains(t, joined, "init_on_free=0") assert.Contains(t, joined, "virtio-balloon-pci", "qemu cmdline should include virtio balloon device") - assertLowIdleHostMemoryFootprint(t, "qemu", pid, 640*1024) + assertLowIdleHostMemoryFootprint(t, ctx, mgr, inst.Id, "qemu", 640*1024) + assertActiveBallooningLifecycle(t, ctx, inst) } func TestGuestMemoryPolicyFirecracker(t *testing.T) { @@ -129,7 +136,10 @@ func TestGuestMemoryPolicyFirecracker(t *testing.T) { Cmd: []string{guestMemoryIdleScript()}, }) require.NoError(t, err) - t.Cleanup(func() { _ = mgr.DeleteInstance(ctx, inst.Id) }) + t.Cleanup(func() { + logInstanceArtifactsOnFailure(t, mgr, inst.Id) + _ = mgr.DeleteInstance(ctx, inst.Id) + }) vmCfg, err := getFirecrackerVMConfig(inst.SocketPath) require.NoError(t, err) @@ -139,14 +149,33 @@ func TestGuestMemoryPolicyFirecracker(t *testing.T) { assert.True(t, vmCfg.Balloon.FreePageHinting) assert.True(t, vmCfg.Balloon.FreePageReporting) - pid := requireHypervisorPID(t, ctx, mgr, inst.Id) - assertLowIdleHostMemoryFootprint(t, "firecracker", pid, 512*1024) + assertLowIdleHostMemoryFootprint(t, ctx, mgr, inst.Id, "firecracker", 512*1024) + assertActiveBallooningLifecycle(t, ctx, inst) } func guestMemoryIdleScript() string { return "set -e; sleep 180" } +func logInstanceArtifactsOnFailure(t *testing.T, mgr *manager, instanceID string) { + t.Helper() + if !t.Failed() { + return + } + + for _, path := range []string{ + mgr.paths.InstanceVMMLog(instanceID), + mgr.paths.InstanceAppLog(instanceID), + } { + data, err := os.ReadFile(path) + if err != nil { + t.Logf("failed to read %s: %v", path, err) + continue + } + t.Logf("%s:\n%s", path, string(data)) + } +} + func forceEnableGuestMemoryPolicyForTest(mgr *manager) { mgr.guestMemoryPolicy = guestmemory.Policy{ Enabled: true, @@ -182,11 +211,17 @@ func requireHypervisorPID(t *testing.T, ctx context.Context, mgr *manager, insta t.Helper() inst, err := mgr.GetInstance(ctx, instanceID) require.NoError(t, err) + if inst.HypervisorPID != nil && processExists(*inst.HypervisorPID) { + return *inst.HypervisorPID + } + if pid, err := hypervisor.ResolveProcessPID(inst.SocketPath); err == nil { + return pid + } require.NotNil(t, inst.HypervisorPID) return *inst.HypervisorPID } -func assertLowIdleHostMemoryFootprint(t *testing.T, hypervisorName string, pid int, maxPSSKB int64) { +func assertLowIdleHostMemoryFootprint(t *testing.T, ctx context.Context, mgr *manager, instanceID string, hypervisorName string, maxPSSKB int64) { t.Helper() // Give the guest a short settle window, then sample host memory. @@ -194,8 +229,14 @@ func assertLowIdleHostMemoryFootprint(t *testing.T, hypervisorName string, pid i var pssSamplesKB []int64 var rssSamplesKB []int64 for i := 0; i < 6; i++ { - pssSamplesKB = append(pssSamplesKB, mustReadPSSKB(t, pid)) - rssSamplesKB = append(rssSamplesKB, mustReadRSSBytes(t, pid)/1024) + pid := requireHypervisorPID(t, ctx, mgr, instanceID) + pssKB, rssKB, ok := readMemorySampleKB(t, ctx, mgr, instanceID, pid) + if !ok { + t.Logf("skipping host memory footprint assertion for %s: unable to read live PSS sample", hypervisorName) + return + } + pssSamplesKB = append(pssSamplesKB, pssKB) + rssSamplesKB = append(rssSamplesKB, rssKB) time.Sleep(1 * time.Second) } @@ -218,42 +259,83 @@ func assertLowIdleHostMemoryFootprint(t *testing.T, hypervisorName string, pid i ) } +func readMemorySampleKB(t *testing.T, ctx context.Context, mgr *manager, instanceID string, initialPID int) (int64, int64, bool) { + t.Helper() + + pid := initialPID + for attempt := 0; attempt < 3; attempt++ { + pssKB, err := readPSSKB(pid) + if err == nil { + rssBytes, err := readRSSBytes(pid) + if err == nil { + return pssKB, rssBytes / 1024, true + } + } + time.Sleep(100 * time.Millisecond) + pid = requireHypervisorPID(t, ctx, mgr, instanceID) + } + + return 0, 0, false +} + func mustReadRSSBytes(t *testing.T, pid int) int64 { t.Helper() + rssBytes, err := readRSSBytes(pid) + require.NoError(t, err) + return rssBytes +} + +func readRSSBytes(pid int) (int64, error) { statusPath := fmt.Sprintf("/proc/%d/status", pid) data, err := os.ReadFile(statusPath) - require.NoError(t, err) + if err != nil { + return 0, err + } for _, line := range strings.Split(string(data), "\n") { if strings.HasPrefix(line, "VmRSS:") { fields := strings.Fields(line) - require.GreaterOrEqual(t, len(fields), 2) + if len(fields) < 2 { + return 0, fmt.Errorf("VmRSS line malformed in %s", statusPath) + } kb, err := strconv.ParseInt(fields[1], 10, 64) - require.NoError(t, err) - return kb * 1024 + if err != nil { + return 0, err + } + return kb * 1024, nil } } - t.Fatalf("VmRSS not found in %s", statusPath) - return 0 + return 0, fmt.Errorf("VmRSS not found in %s", statusPath) } func mustReadPSSKB(t *testing.T, pid int) int64 { t.Helper() + pssKB, err := readPSSKB(pid) + require.NoError(t, err) + return pssKB +} + +func readPSSKB(pid int) (int64, error) { smapsRollupPath := fmt.Sprintf("/proc/%d/smaps_rollup", pid) data, err := os.ReadFile(smapsRollupPath) - require.NoError(t, err) + if err != nil { + return 0, err + } for _, line := range strings.Split(string(data), "\n") { if strings.HasPrefix(line, "Pss:") { fields := strings.Fields(line) - require.GreaterOrEqual(t, len(fields), 2) + if len(fields) < 2 { + return 0, fmt.Errorf("Pss line malformed in %s", smapsRollupPath) + } kb, err := strconv.ParseInt(fields[1], 10, 64) - require.NoError(t, err) - return kb + if err != nil { + return 0, err + } + return kb, nil } } - t.Fatalf("Pss not found in %s", smapsRollupPath) - return 0 + return 0, fmt.Errorf("Pss not found in %s", smapsRollupPath) } type firecrackerVMConfig struct { diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 3003781c..7fc7b5c3 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -112,6 +112,7 @@ func waitForVMReady(ctx context.Context, socketPath string, timeout time.Duratio // waitForInstanceState polls GetInstance until the expected state is observed or timeout expires. func waitForInstanceState(ctx context.Context, mgr Manager, instanceID string, expected State, timeout time.Duration) (*Instance, error) { + timeout = integrationTestTimeout(timeout) deadline := time.Now().Add(timeout) lastState := StateUnknown lastErr := error(nil) @@ -136,6 +137,13 @@ func waitForInstanceState(ctx context.Context, mgr Manager, instanceID string, e return nil, fmt.Errorf("instance %s did not reach %s within %v (last state: %s)", instanceID, expected, timeout, lastState) } +func integrationTestTimeout(timeout time.Duration) time.Duration { + if os.Getenv("CI") == "true" && timeout < 45*time.Second { + return 45 * time.Second + } + return timeout +} + // waitForLogMessage polls instance logs until the message appears or times out func waitForLogMessage(ctx context.Context, mgr *manager, instanceID, message string, timeout time.Duration) error { deadline := time.Now().Add(timeout) diff --git a/lib/instances/query.go b/lib/instances/query.go index ed59f5e1..96d85f5f 100644 --- a/lib/instances/query.go +++ b/lib/instances/query.go @@ -7,9 +7,11 @@ import ( "io" "os" "path/filepath" + "runtime" "slices" "strconv" "strings" + "syscall" "time" "github.com/kernel/hypeman/lib/hypervisor" @@ -337,6 +339,7 @@ func (m *manager) toInstanceWithStateDerivation(ctx context.Context, meta *metad HasSnapshot: m.hasSnapshot(meta.StoredMetadata.DataDir), BootMarkersHydrated: result.BootMarkersHydrated, } + refreshHypervisorPID(&inst.StoredMetadata, result.State) // If VM is stopped and exit info isn't persisted yet, populate in-memory // from the serial console log. This is read-only -- no metadata writes. @@ -351,6 +354,59 @@ func (m *manager) toInstanceWithStateDerivation(ctx context.Context, meta *metad return inst } +func refreshHypervisorPID(stored *StoredMetadata, state State) { + if !state.RequiresVMM() && state != StateUnknown { + return + } + if stored.HypervisorPID != nil && processExists(*stored.HypervisorPID) { + return + } + if stored.SocketPath == "" { + return + } + if pid, err := hypervisor.ResolveProcessPID(stored.SocketPath); err == nil { + stored.HypervisorPID = &pid + return + } +} + +func processExists(pid int) bool { + if pid <= 0 { + return false + } + err := syscall.Kill(pid, 0) + if err != nil && err != syscall.EPERM { + return false + } + if runtime.GOOS != "linux" { + return true + } + state, err := readLinuxProcessState(pid) + if err != nil { + return true + } + return state != "Z" +} + +func readLinuxProcessState(pid int) (string, error) { + statusPath := filepath.Join("/proc", strconv.Itoa(pid), "status") + data, err := os.ReadFile(statusPath) + if err != nil { + return "", err + } + for _, line := range strings.Split(string(data), "\n") { + if !strings.HasPrefix(line, "State:") { + continue + } + fields := strings.Fields(line) + if len(fields) < 2 { + return "", fmt.Errorf("malformed process state in %s", statusPath) + } + return fields[1], nil + } + return "", fmt.Errorf("process state missing from %s", statusPath) +} + // parseExitSentinel reads the last lines of the serial console log to find the // HYPEMAN-EXIT sentinel written by init before shutdown. // Returns the exit code, message, and whether a sentinel was found. diff --git a/lib/instances/restore.go b/lib/instances/restore.go index 369525fe..bd6ef5aa 100644 --- a/lib/instances/restore.go +++ b/lib/instances/restore.go @@ -316,6 +316,7 @@ func (m *manager) restoreFromSnapshot( if err != nil { return 0, nil, fmt.Errorf("restore vm: %w", err) } + pid = resolveRuntimeHypervisorPID(log, stored.SocketPath, pid) log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id, "pid", pid) return pid, hv, nil diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index cb38c56d..25a6f140 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -121,6 +121,20 @@ const ( InstanceStateUnknown InstanceState = "Unknown" ) +// Defines values for MemoryReclaimActionHypervisor. +const ( + MemoryReclaimActionHypervisorCloudHypervisor MemoryReclaimActionHypervisor = "cloud-hypervisor" + MemoryReclaimActionHypervisorFirecracker MemoryReclaimActionHypervisor = "firecracker" + MemoryReclaimActionHypervisorQemu MemoryReclaimActionHypervisor = "qemu" + MemoryReclaimActionHypervisorVz MemoryReclaimActionHypervisor = "vz" +) + +// Defines values for MemoryReclaimResponseHostPressureState. +const ( + Healthy MemoryReclaimResponseHostPressureState = "healthy" + Pressure MemoryReclaimResponseHostPressureState = "pressure" +) + // Defines values for RestoreSnapshotRequestTargetHypervisor. const ( RestoreSnapshotRequestTargetHypervisorCloudHypervisor RestoreSnapshotRequestTargetHypervisor = "cloud-hypervisor" @@ -131,10 +145,10 @@ const ( // Defines values for SnapshotSourceHypervisor. const ( - CloudHypervisor SnapshotSourceHypervisor = "cloud-hypervisor" - Firecracker SnapshotSourceHypervisor = "firecracker" - Qemu SnapshotSourceHypervisor = "qemu" - Vz SnapshotSourceHypervisor = "vz" + SnapshotSourceHypervisorCloudHypervisor SnapshotSourceHypervisor = "cloud-hypervisor" + SnapshotSourceHypervisorFirecracker SnapshotSourceHypervisor = "firecracker" + SnapshotSourceHypervisorQemu SnapshotSourceHypervisor = "qemu" + SnapshotSourceHypervisorVz SnapshotSourceHypervisor = "vz" ) // Defines values for SnapshotKind. @@ -882,6 +896,59 @@ type InstanceStats struct { NetworkTxBytes int64 `json:"network_tx_bytes"` } +// MemoryReclaimAction defines model for MemoryReclaimAction. +type MemoryReclaimAction struct { + AppliedReclaimBytes int64 `json:"applied_reclaim_bytes"` + AssignedMemoryBytes int64 `json:"assigned_memory_bytes"` + + // Error Error message when status is error or unsupported. + Error *string `json:"error,omitempty"` + Hypervisor MemoryReclaimActionHypervisor `json:"hypervisor"` + InstanceId string `json:"instance_id"` + InstanceName string `json:"instance_name"` + PlannedTargetGuestMemoryBytes int64 `json:"planned_target_guest_memory_bytes"` + PreviousTargetGuestMemoryBytes int64 `json:"previous_target_guest_memory_bytes"` + ProtectedFloorBytes int64 `json:"protected_floor_bytes"` + + // Status Result of this VM's reclaim step. + Status string `json:"status"` + TargetGuestMemoryBytes int64 `json:"target_guest_memory_bytes"` +} + +// MemoryReclaimActionHypervisor defines model for MemoryReclaimAction.Hypervisor. +type MemoryReclaimActionHypervisor string + +// MemoryReclaimRequest defines model for MemoryReclaimRequest. +type MemoryReclaimRequest struct { + // DryRun Calculate a reclaim plan without applying balloon changes or creating a hold. + DryRun *bool `json:"dry_run,omitempty"` + + // HoldFor How long to keep the reclaim hold active (Go duration string). Defaults to 5m when omitted. + HoldFor *string `json:"hold_for,omitempty"` + + // Reason Optional operator-provided reason attached to logs and traces. + Reason *string `json:"reason,omitempty"` + + // ReclaimBytes Total bytes of guest memory to reclaim across eligible VMs. + ReclaimBytes int64 `json:"reclaim_bytes"` +} + +// MemoryReclaimResponse defines model for MemoryReclaimResponse. +type MemoryReclaimResponse struct { + Actions []MemoryReclaimAction `json:"actions"` + AppliedReclaimBytes int64 `json:"applied_reclaim_bytes"` + + // HoldUntil When the current manual reclaim hold expires. + HoldUntil *time.Time `json:"hold_until,omitempty"` + HostAvailableBytes int64 `json:"host_available_bytes"` + HostPressureState MemoryReclaimResponseHostPressureState `json:"host_pressure_state"` + PlannedReclaimBytes int64 `json:"planned_reclaim_bytes"` + RequestedReclaimBytes int64 `json:"requested_reclaim_bytes"` +} + +// MemoryReclaimResponseHostPressureState defines model for MemoryReclaimResponse.HostPressureState. +type MemoryReclaimResponseHostPressureState string + // PassthroughDevice Physical GPU available for passthrough type PassthroughDevice struct { // Available Whether this GPU is available (not attached to an instance) @@ -1290,6 +1357,9 @@ type StartInstanceJSONRequestBody StartInstanceJSONBody // AttachVolumeJSONRequestBody defines body for AttachVolume for application/json ContentType. type AttachVolumeJSONRequestBody = AttachVolumeRequest +// ReclaimMemoryJSONRequestBody defines body for ReclaimMemory for application/json ContentType. +type ReclaimMemoryJSONRequestBody = MemoryReclaimRequest + // ForkSnapshotJSONRequestBody defines body for ForkSnapshot for application/json ContentType. type ForkSnapshotJSONRequestBody = ForkSnapshotRequest @@ -1500,6 +1570,11 @@ type ClientInterface interface { // GetResources request GetResources(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) + // ReclaimMemoryWithBody request with any body + ReclaimMemoryWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + ReclaimMemory(ctx context.Context, body ReclaimMemoryJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + // ListSnapshots request ListSnapshots(ctx context.Context, params *ListSnapshotsParams, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -2096,6 +2171,30 @@ func (c *Client) GetResources(ctx context.Context, reqEditors ...RequestEditorFn return c.Client.Do(req) } +func (c *Client) ReclaimMemoryWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewReclaimMemoryRequestWithBody(c.Server, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) ReclaimMemory(ctx context.Context, body ReclaimMemoryJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewReclaimMemoryRequest(c.Server, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) ListSnapshots(ctx context.Context, params *ListSnapshotsParams, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewListSnapshotsRequest(c.Server, params) if err != nil { @@ -3784,6 +3883,46 @@ func NewGetResourcesRequest(server string) (*http.Request, error) { return req, nil } +// NewReclaimMemoryRequest calls the generic ReclaimMemory builder with application/json body +func NewReclaimMemoryRequest(server string, body ReclaimMemoryJSONRequestBody) (*http.Request, error) { + var bodyReader io.Reader + buf, err := json.Marshal(body) + if err != nil { + return nil, err + } + bodyReader = bytes.NewReader(buf) + return NewReclaimMemoryRequestWithBody(server, "application/json", bodyReader) +} + +// NewReclaimMemoryRequestWithBody generates requests for ReclaimMemory with any type of body +func NewReclaimMemoryRequestWithBody(server string, contentType string, body io.Reader) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/resources/memory/reclaim") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + // NewListSnapshotsRequest generates requests for ListSnapshots func NewListSnapshotsRequest(server string, params *ListSnapshotsParams) (*http.Request, error) { var err error @@ -4418,6 +4557,11 @@ type ClientWithResponsesInterface interface { // GetResourcesWithResponse request GetResourcesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetResourcesResponse, error) + // ReclaimMemoryWithBodyWithResponse request with any body + ReclaimMemoryWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*ReclaimMemoryResponse, error) + + ReclaimMemoryWithResponse(ctx context.Context, body ReclaimMemoryJSONRequestBody, reqEditors ...RequestEditorFn) (*ReclaimMemoryResponse, error) + // ListSnapshotsWithResponse request ListSnapshotsWithResponse(ctx context.Context, params *ListSnapshotsParams, reqEditors ...RequestEditorFn) (*ListSnapshotsResponse, error) @@ -5359,6 +5503,31 @@ func (r GetResourcesResponse) StatusCode() int { return 0 } +type ReclaimMemoryResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *MemoryReclaimResponse + JSON400 *Error + JSON401 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r ReclaimMemoryResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r ReclaimMemoryResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + type ListSnapshotsResponse struct { Body []byte HTTPResponse *http.Response @@ -5993,6 +6162,23 @@ func (c *ClientWithResponses) GetResourcesWithResponse(ctx context.Context, reqE return ParseGetResourcesResponse(rsp) } +// ReclaimMemoryWithBodyWithResponse request with arbitrary body returning *ReclaimMemoryResponse +func (c *ClientWithResponses) ReclaimMemoryWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*ReclaimMemoryResponse, error) { + rsp, err := c.ReclaimMemoryWithBody(ctx, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseReclaimMemoryResponse(rsp) +} + +func (c *ClientWithResponses) ReclaimMemoryWithResponse(ctx context.Context, body ReclaimMemoryJSONRequestBody, reqEditors ...RequestEditorFn) (*ReclaimMemoryResponse, error) { + rsp, err := c.ReclaimMemory(ctx, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseReclaimMemoryResponse(rsp) +} + // ListSnapshotsWithResponse request returning *ListSnapshotsResponse func (c *ClientWithResponses) ListSnapshotsWithResponse(ctx context.Context, params *ListSnapshotsParams, reqEditors ...RequestEditorFn) (*ListSnapshotsResponse, error) { rsp, err := c.ListSnapshots(ctx, params, reqEditors...) @@ -7717,6 +7903,53 @@ func ParseGetResourcesResponse(rsp *http.Response) (*GetResourcesResponse, error return response, nil } +// ParseReclaimMemoryResponse parses an HTTP response from a ReclaimMemoryWithResponse call +func ParseReclaimMemoryResponse(rsp *http.Response) (*ReclaimMemoryResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ReclaimMemoryResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest MemoryReclaimResponse + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + // ParseListSnapshotsResponse parses an HTTP response from a ListSnapshotsWithResponse call func ParseListSnapshotsResponse(rsp *http.Response) (*ListSnapshotsResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) @@ -8225,6 +8458,9 @@ type ServerInterface interface { // Get host resource capacity and allocations // (GET /resources) GetResources(w http.ResponseWriter, r *http.Request) + // Trigger proactive guest memory reclaim + // (POST /resources/memory/reclaim) + ReclaimMemory(w http.ResponseWriter, r *http.Request) // List snapshots // (GET /snapshots) ListSnapshots(w http.ResponseWriter, r *http.Request, params ListSnapshotsParams) @@ -8480,6 +8716,12 @@ func (_ Unimplemented) GetResources(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotImplemented) } +// Trigger proactive guest memory reclaim +// (POST /resources/memory/reclaim) +func (_ Unimplemented) ReclaimMemory(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + // List snapshots // (GET /snapshots) func (_ Unimplemented) ListSnapshots(w http.ResponseWriter, r *http.Request, params ListSnapshotsParams) { @@ -9705,6 +9947,26 @@ func (siw *ServerInterfaceWrapper) GetResources(w http.ResponseWriter, r *http.R handler.ServeHTTP(w, r) } +// ReclaimMemory operation middleware +func (siw *ServerInterfaceWrapper) ReclaimMemory(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ReclaimMemory(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // ListSnapshots operation middleware func (siw *ServerInterfaceWrapper) ListSnapshots(w http.ResponseWriter, r *http.Request) { @@ -10265,6 +10527,9 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/resources", wrapper.GetResources) }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/resources/memory/reclaim", wrapper.ReclaimMemory) + }) r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/snapshots", wrapper.ListSnapshots) }) @@ -11867,6 +12132,50 @@ func (response GetResources500JSONResponse) VisitGetResourcesResponse(w http.Res return json.NewEncoder(w).Encode(response) } +type ReclaimMemoryRequestObject struct { + Body *ReclaimMemoryJSONRequestBody +} + +type ReclaimMemoryResponseObject interface { + VisitReclaimMemoryResponse(w http.ResponseWriter) error +} + +type ReclaimMemory200JSONResponse MemoryReclaimResponse + +func (response ReclaimMemory200JSONResponse) VisitReclaimMemoryResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type ReclaimMemory400JSONResponse Error + +func (response ReclaimMemory400JSONResponse) VisitReclaimMemoryResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + + return json.NewEncoder(w).Encode(response) +} + +type ReclaimMemory401JSONResponse Error + +func (response ReclaimMemory401JSONResponse) VisitReclaimMemoryResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type ReclaimMemory500JSONResponse Error + +func (response ReclaimMemory500JSONResponse) VisitReclaimMemoryResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + type ListSnapshotsRequestObject struct { Params ListSnapshotsParams } @@ -12358,6 +12667,9 @@ type StrictServerInterface interface { // Get host resource capacity and allocations // (GET /resources) GetResources(ctx context.Context, request GetResourcesRequestObject) (GetResourcesResponseObject, error) + // Trigger proactive guest memory reclaim + // (POST /resources/memory/reclaim) + ReclaimMemory(ctx context.Context, request ReclaimMemoryRequestObject) (ReclaimMemoryResponseObject, error) // List snapshots // (GET /snapshots) ListSnapshots(ctx context.Context, request ListSnapshotsRequestObject) (ListSnapshotsResponseObject, error) @@ -13445,6 +13757,37 @@ func (sh *strictHandler) GetResources(w http.ResponseWriter, r *http.Request) { } } +// ReclaimMemory operation middleware +func (sh *strictHandler) ReclaimMemory(w http.ResponseWriter, r *http.Request) { + var request ReclaimMemoryRequestObject + + var body ReclaimMemoryJSONRequestBody + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + sh.options.RequestErrorHandlerFunc(w, r, fmt.Errorf("can't decode JSON body: %w", err)) + return + } + request.Body = &body + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ReclaimMemory(ctx, request.(ReclaimMemoryRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ReclaimMemory") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ReclaimMemoryResponseObject); ok { + if err := validResponse.VisitReclaimMemoryResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + // ListSnapshots operation middleware func (sh *strictHandler) ListSnapshots(w http.ResponseWriter, r *http.Request, params ListSnapshotsParams) { var request ListSnapshotsRequestObject @@ -13696,222 +14039,233 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+x963IbudHoq6B48tVSCUlRF8syv9r6jizZXmUtW8eylJMsfShwBiSxmgFmAQxl2uW/", - "eYA8Yp7kFBrA3IghR7IlW7FTqTU1g8Gl0d3obvTlYyvgccIZYUq2Bh9bMpiRGMPPA6VwMLvgURqTN+SP", - "lEilHyeCJ0QoSqBRzFOmRglWM/1XSGQgaKIoZ61B6xSrGbqeEUHQHHpBcsbTKERjguA7ErY6LfIex0lE", - "WoPWZszUZogVbnVaapHoR1IJyqatT52WIDjkLFqYYSY4jVRrMMGRJJ3KsCe6a4Ql0p904ZusvzHnEcGs", - "9Ql6/COlgoStwW/FZbzLGvPx7yRQevCDOaYRHkfkiMxpQJbBEKRCEKZGoaBzIpZBcWjeRws05ikLkWmH", - "2iyNIkQniHFGNkrAYHMaUg0J3UQP3RookRIPZEKY04iGnh04PEbmNTo+Qu0ZeV8eZPvxeL9V3yXDMVnu", - "9Jc0xqyrgaun5fqHtsW+X+76eqY8jtPRVPA0We75+PXJyTmCl4il8ZiIYo/721l/lCkyJUJ3mAR0hMNQ", - "ECn963cvi3Pr9/v9Ad4e9Pu9vm+Wc8JCLmpBal77QbrVD8mKLhuB1Pa/BNJXF8dHxwfokIuECwzfLo1U", - "QewieIrrKqJNeVd8+P80pVG4jPVj/ZiIEWVSYVaDg8f2pQYXnyA1I8h+hy5OUHvCBQrJOJ1OKZtuNMF3", - "zbAiokg4wmp5OJgqsm0oZ0jRmEiF46TVaU24iPVHrRAr0tVvGg0oCF4znG7RaLBlUkvNTo5iWde7a4Io", - "QzGNIipJwFkoi2NQpvZ26xdTIBgiBPdwqGf6MYqJlHhKUFuzTc27GZIKq1QiKtEE04iEjfbIhwhmMb/z", - "MaIhYYpOaJm+DTp18TjY2t7x8o4YT8kopFN7EpW7P4LnGsV0PwpBa/9CNKEtmq0DhhRksjzec2DdMIgg", - "EyKIxvHPHC4RfE6YphY93p9g3Nb/2syP6E17Pm8CME/z5p86rT9SkpJRwiU1M1ziXPaNRiMANYIv/HOG", - "V6v2uoBRUmGxmj6gxRegRDO/RrA5M00/dVoKT9d+8la3qfJOYI12yBIXqGWRz+aEeYSkgDNlX5Sh85JP", - "UUQZQbaF3QvNE/UAP0ccWOIXgkMG/mXi1/O+BfMyD2p60+86LcLSWAMz4tMiNGcECzUmJWDWHGG2o3x2", - "teA/LZFP5azCkoxWc5BTyhgJkW5pCdu0RKkESXVp+UBFV1SN5kRIL83BtH6lCtkWtV1FPLia0IiMZljO", - "zIxxGAK94ui0tBKPtFYSf3GimaDrEKQIiRRHZ78cbD/aQ3YADwwlT0VgZrC8ksLXunvTFiksxjiKvLhR", - "j243P6OXMcSPAWcZYdSdPRkGOsQ0nK5ld1N332klqZyZX8C79azg7NNsQKNXpH+/8yz6EJiE0RJqdSa/", - "DPg6MZuNphHXMF2glNE/0pKA3UPHWldQSB8UNCRhB2F4oVk2ThXvTgkjQvMpNBE8BmmrIASjNulNex00", - "1HJhV0vBXbzd7fe7/WGrLMZGu91pkmpQYKWI0BP8f7/h7oeD7j/63Sfv8p+jXvfdX/7kQ4CmkrmTCu06", - "2472O8hNtiiuVye6TpS/NfcvTt/HccxWH2s+cdOdPjxeFhzMWkMeXBHRo3wzomOBxWKTTSl7P4iwIlKV", - "V7667ReFBaxjBRDYVIPphmCoKD2Axu2IXxMRaA4cEY14sqOZMFWyg7DWm4F5IX1K/jcKMNO0YIQLLhBh", - "IbqmaoYwtCtDK150cUK71Ey11WnF+P1LwqZq1hrs7SzhuUbytv3Rffdn92jjf7yoLtKIeJD8DU8VZVME", - "r82pPqMS5XOgisRrd8RBN41AzIspOzafbWUzwULgxefvsFvIqp02ylztVgexR/J/PSdC0NCdqocnR6gd", - "0Sti0R2JlKFh2u/vBNAAfhL7JOBxjFlonm300OuYKn2apfkhbaxBveJ2/9YiwYyDnBFFXC8oA3WNEJPD", - "MBAE9BMcrTyGV4HYC6zDrN/lQ/sXLlU3xgxPCWiTtiEaC35F9ERRwiMaUCLRFVloIWWBprrT7pxKqsmH", - "sDmaY2M06A3Z2xmXxDRxr7QiEhA6JyjmwRVKIhyQGQdFfI6jlMgOup5piUEzY0FwZB8jQWJM2ZDN9CRl", - "wBMSah3CNIOloUvC5pcoxglQKRYESBTFWBFBcUQ/kBBx80lMQqoPqCEjgNcowZpkg4ALffrqvSU4mBWg", - "8JNEl0beuITuLynTWHlp6Ko3ZMWd/9h6ff726evzV0ej16fPXh0cj3599nf92HzUGvz2sWXsm5mg8ZRg", - "QQT600dY7ycjnYZEtAatg1TNuKAfjLHlU6elYSA1fuGE9nhCGKa9gMetTuvPxT/ffXrn5Ck9FGFzTQae", - "iX3yyjLmKPRwlCNnzJPIGohAtMNgqgUO8+L0fFMfrgmWUs0ET6ezMmHYk/1GJBFSeTWifDROfHOi8god", - "b75GWu5AEdUEmskZW/3+ydNNOWzpPx65PzZ66MhQLUxfsxAurPgjZxp9tBAOKHN4eo5wFPHAmkAmWlea", - "0GkqSNirWN6gdx9/JkyJRcKpTwerMKe86TKP6nbztzdgRZtjyjal3oZucDO4A97cWhN4xuZUcBZrbWyO", - "BdXHrCzTyqvXR89Gz15dtAaaj4dpYI2Kp6/fvG0NWjv9fr/lQ1CNQWt44IvT80PYKUM2KonS6UjSDx5J", - "4CBbH4pJzIXRgO03qD0rCwqGbhFszrC18+KpQa6tF4BXblNCKqG168V0XMaY7RdPfdgyWyREzKn0mcl+", - "yd65nS8c64bdl3FbEjEnIkNawOJeQf0IIp6G3cKQndaEChIIrNGu1Wn9QWIth88/aNTJ5+75zm+9aiR/", - "rhEscZRQRlZIlt+IhHfNxVXEcdjd+sICHiNK9728xFfmRXl/LU6QDCVanSVrBAuvaahmo5BfMz1lD1+1", - "b1DWOGOu7/VKcPTvf/7r4iRXk7ZejBPLabe2H30mp63wVt211wSSLSRN/Ms4T/yLuDj59z//5VbydRdh", - "BJFbCXV2/5+ZHoBla1wPS9eUxppZBsvfZkTNiCic3g5Z9COjD8PnyOFeYSkl82jxTnOJUfM5ERFeFBiv", - "nVNrqw/crzIrQRXQqv1Os9ErpD9ew4Z1b+6Qf1HV0bf7fkbrmZRnTk81r7DnQpOZZBPZ2j6xP7eXp1Qz", - "oyuajEBqHuFpZrJdddt8dkUTK4rDF2Ybo8gwgjAF4X3MueoN2d9mhCHYO9hg8p4EwPOkwgodnB5LdE2j", - "CAw8wFSWjxYt2OdsxTSXSv9XpKyDxqnS0jpXBFm9CQZJYS7QeExQyrC7zq7IznaBVbyyYLkigpFoZGRj", - "2RAy5iNkP6oFDix1gqUiwnD7NCnD6+jXkzPUPlowHNMA/Wp6PeFhGhF0liaaH2yUodcZskSQuVYh2BSM", - "jdSOyyeIp6rLJ10lCHFTjKGzzERm71rnL07P7W293OgN2RuiAUtYSEKYsztxJFIzrFDI2U+aYklY7rY4", - "fgXoflq+iSrfac2DJC3vyHZ1N17Bfbpe+5wKleJIs8qSNOi9XjeOGx6p3/iFFLUPy7Yy5MSqfC/a1N5h", - "egYvjmWZ2G+2MIJOY7NFQRNfMmA4NfFjs8mu6f+YuYmsNNvkmuJnjHVmOqmCyPbdcSu7BZSOM5iUYYW/", - "DHgOZEGzrjWLh0Qqygw66bbICnQStS+1Mm7xWKvflx10+efSA026TjPQ4sE1MtAAdsD0o2L/VZvCWm2/", - "uU5X2Rwsb78fB7LWzwjNt5ASmEl9NGoRKSE99AvwYKRInGhGxKaISiQN7yQhYvz6vxE3Mon7dMj01KTx", - "0rDgyGw+kk4ZZdMNLaXrcwWHoTEMTVKVCt1uTmUOzTLqOONLdQFvzeyIYadxKvWBGkRpSNClM9BclsW6", - "ZfPNskZn7TlLCooBCSgmoKupzThVeni94BirYKbhxFNl3Lbs0mV5AmUj0brrTDuX7KLrFvt/lrGLMlCt", - "uaDC+PXi7BULWPUK5sU6K56VM/wWxiuygC131kS8ZE8sGhL95j5BJI/mxJ6aRVPkGAdX5igxnhPWCmns", - "idaEqMm/QqJe49q6rdDwagz+sqS/jEpgwbWLzTHGCu/GfLvIuJBenBmvo/VaSQD4oDkMEEhTlx2j6hAw", - "ICCmkSVCIRUkUEvdUzYdMvDguLRPera3S03kWsTwEaFPV/GKcgVlxXxT2lpU2FkntUE3emk8pkqRsFOW", - "Da4ISeT6RWnp2NqdPcZxQa4FdYzM2nvChtIVYRMuAhJbGf/z9L5nhc68WtjNulh2qDDwLczZ4hPCSRJR", - "EhrvHbMfYCWVdp/ARFr12A0rSpe5wC8PeYmj6BK1baMNJIhei3R7xTjLkf3t4alDgezS+eKkozFSc4HL", - "mVLJSP9HjjQVX1Y7s986Ctfd6TNJov0+qEe7uzt2V63NzEy40m3ZPOZ1SqjfmjOGEznjqvZe64qycB2i", - "uE5+1W1rjWKZQCNt87u2iyWCdNNkKjA4pn5Jq9itbxsBmvWcd43Puc+5MINqkErF44KLIWpXHCNo2YWi", - "DKw5j7ohVhgsiA3NnGa6y+668cJ0ZXSoOgPIaDr2eNvQD5pboimd4vFClc32W32fpva5V79uLr5tqXN7", - "N5ofCUeKr3b8pRPk2jbx84NzYKT4aD6hnp6z4yj3GqESBRUfe6uP6i66SUCtFg+ySTAzfpkGCCDsXZwU", - "r8x6Q9aFY3OAjrIBsm6zLjHIhDg0FxZtLgqToODshcaLDYTRxUkPvc1m+5NEWtGYExcHMMMSjQlhKAWL", - "L5xiXXOGFieQSjjsVPVza7IwIQMbcDPI7bse+mWRkBhb848mhRgrGoCD0ZhW1gPHiNkoexWLWdH41MhY", - "tMpd+g2ZUqlExVkatd88P9zZ2XlSNRtuP+r2t7pbj95u9Qd9/f9/NPer/vJREb6+Dsq8xbpsFbnP4fnx", - "0ba1UZbHUR928ZP99++xerJHr+WTD/FYTH/fwfcSN+FnZUe5rxlqp5KIrmOTGqt8HmYFR64aD7JbO4bd", - "kZ9X7ra6qq2BxFvd8i4CQnyuxtbR9eYhG1WGudZZubC4ZQ18kYC+mFNJQfKyPoEB9Xo/HlF59VQQfBXy", - "a+Y5t2M8JXJkzjO/G0EqjW8LeW+tEoJzNZHmurJsrdzafby7v7O3u9/ve+IglhGeB3QU6BOo0QReHx6j", - "CC+IQPANasM9U4jGER+XEf3Rzt7+4/6Tre2m8zA3K83gkClM7ivUthD5i4upc29Kk9refry3s7PT39vb", - "3m00K2vnbTQpZxMuiSSPdx7vbu1v7zaCgk8Qf+biUqq+86HPY0DrPeaOrysTEtAJDRBEtiD9AWrHcISR", - "7JKoTJNjHI6s0cN/dihMI7nSUcEMZlsaA1mcRoomETHvYEMa2ZBh5UfQk88JhDJGxCgL27lBTzaaZ+3F", - "vFtL1gSVorJKoDuhEqSQXHiiJAoHhkLX8jnYzXxi7+rwwK6hITa81KpTNyJzEhWRwBxderIxFwRleGI2", - "rbQqyuY4ouGIsiT1okQtKJ+nAmRR0ynCY54qc7sHG1YcBHyFQfeYaHbdTD99zsXVWq9LfRKPRMqY7mat", - "NecADOATa2KBUxwj+7Vz7C8IfdktnLmrtO8lemO+MJad/HGSKkSZ4lo7ZeF40YGRrAWIIUGk4sBJraHP", - "dtNUuvTLLWDkdF4XZrycd96Ty0l3Ym7pv6yGLaZEjaTCaq3EojHlLbQ/g+aNnbj1h2sNIA3gzsj1fQAd", - "vNy7Gm27kuHkbiC+ygcsszXkjeAUFjQkPQTUBc4oLqquQmlniicJCTP7T2/IzgypZI+kufnQHxo4qBmh", - "AnFBp7Q8cNkwdpfOZDdBRYdNt0bH4ofLEiq8BK+JeqLHE0WEgaALGC5G/dhNaHVaFvatTstyojJo3EMP", - "RHIPx6Upvjg9v6lLWCL4hEae5YILgn1rNTPnLPVyt3/W3fo/xvFR4xuIaJQZt4WYh6RXicmH9s1Onhen", - "56d1c8oSIqDi7JbWlDmaeDhH5o/gIGIvg+xtotVgHPrrgyUbJJe9n/hk2YnAMRmnkwkRo9hjXHuu3yPT", - "wHgUUYZOnpblWS03N9WaT0ubA2rzBAc2nr0Z9D0GucoyOgVovvNv1xtijuG6KDi9VcK2sYFwPfQqS0GB", - "XpyeS5Q7B3ksdeXtrXVTP50tJA1wZHo0Qa2UFQ1sgJyNJeTT/ENrivTIybFXNnSEgNrzaZICGZ696R6/", - "vtiMQzLvlOYEDj0zHhE9740Ct5i7WLjcp77EJOZ1lg6DGLIpARVglVFwYyAV6NUDHcUVjkYy4j4ni7f6", - "JYKXqH3x3MQq6Rl0UFLaSv28AIUSfu95KUZzpLphz2DAqsm0ROBe3bGcucWYVwrLKw3qI5VfCI5Mwpoy", - "Pudh1W7j+VV5o/nVWuq1nfjGPXb+2A1ipg5PjozAEHCmMGVEoJgobNPjFFxTQBxqdVpdfUaFmMTg4Tb5", - "79VeKTUm+GIQVK0R93Ap28WdGHBrorTfGNeBEMWY0QmRykZpl0aWM7z9aG9gckmEZLL7aK/X6900NORZ", - "HgvSaCs2jed8IUqkJ2eftw93EAHSZC0fW6cHb39pDVqbqRSbEQ9wtCnHlA0Kf2d/5i/gh/lzTJk3cqRR", - "+hE6WUo7Ur7S1GeWeT7QK2HWlUvjEgcFfu0VU40+Ax4JEK7mjdJVeKr1E4NxnxuOe+uEHXnWKFVI1FF0", - "5GyQtIN+WG0JdYIRtLFjpkzRKM9nsmwDvVVGGrkyaH8pYD8hLAvTjyLzK+BsrqnCF7NfYuDu3WfdH1jv", - "lFFIPZj8N6vtGecGCGZaT2+tTZwk69HWLyhm/K9prhIbUew5ib4617/NHVt59NfTv/7xf+Xp49+3/nh5", - "cfH3+Yu/Hr2if7+ITl9/VuDS6mDyrxoR/sWCwOFiqRQJ3hSVTrAKPALVjEtVA2H7Bilu/Cx76BAUv8GQ", - "ddFLqojA0QANWxXX3mELtcl7HCjzFeIM6a5sgMGG/vjUmH/0xx+dbvmp2kdoIwmE3ZAsgEim45DHmLKN", - "IRsy2xdyC5Fwp69/hSjAiUoF0bunZdhogcYCB3kEQT54B33ESfJpY8hAwyXvldArSLBQWfYLNwIghZ2V", - "8RmwzUno4rGNhjxk2bmUhWMbG00vM4KAbb7qKekHild94aIcAbPf9wWug7eW3siISkXAoTrDbI1GmRsZ", - "2u+XWMV+f7+/VsDPcGgF+gElLOemdEjZgJYMAsPQhnGDZ1kDW7rmTYZG0C9v355qMOh/z5DrKIdFtsVG", - "yTO+e9LYCFUkC157Gy1/RIje3YYLMkYy+CxqEKzzzLh1vn15hhQRsXO0bwcanBMa6PXB9T+VMtWoSDE6", - "ODx5ttFrkFwTYJvNf8U+vs1WWA3KsEazOltghvEavh10fARutZZCcwEO3Gqec4Eiw2Byuh6gc0nKPqqw", - "VeZW3+xktMgtb+YEGLY2XI9JlVMM0JtMbsTZVDIHyRwZXJc5XUK39uLF+Pws9V7xpwVvJqsXWdYGHj5Y", - "Zc7d+sStZwWryd8DcaB5649dsGnejLaLxlA9mB818r2/c2ll56Y66k3zIpRDFwthr1lqhOY5De4iN8Cy", - "vvaeqlHtJTzSr+2Vu9NKLk7QDEv2k4KXFd1ka+dxoySVetSm19fFi2s+MVPKqMrFQWbXriYi9IpGkfFm", - "kHTKcISeoPbZ8Ytfj1++3EBd9Pr1SXUrVn3h258GKRIcar84PYcoFSxH7gao3ukR547D5D2VSi6HiTa6", - "SF2dkuGXUtoEb9ztxhfMpeBun5eWcR9ZEr6mW9+3l6FhZU6Fz02MYIXdO8qLUMtcfTkFynzWPP6yGQ7u", - "ZDqlmB0ffyjKBM7n+tYpBTot6vE3PZCaBZIQHZ/mmQVzo5TrvrKmJ9u9rb393la/39vqNzHRxThYMfbJ", - "wWHzwfvbxhAxwONBEA7I5DNMhBaxjfCGo2u8kGjoxOthy8jzBUG+QLZWBG90/bqcueF2iRqqAsW6VAw3", - "Sb3QLKfCivTAZ+XEwI1ltEf/+KwcwqTpyWxdF+xXo5sYrwkKeBqFWg4aa8ozahUJrfYnicpzLgOxnrMr", - "xq9ZeenGhqnp94+UiAW6ODkpWbwFmdiUsg0WDi4PNfvAkxttw/YaUXntbG6Z3uA+UhpUuWbhtPriCQyK", - "JjfnQmkwtIHpLZcevdfelJmt0XiyYk0Vo0lI5qM09QlF+pULnDg/Pz4qIQfGe1v7/f0n3f3x1l53N+xv", - "dfHWzl53+xHuT3aCxzs1Sd2bu73c3pOlTM31gUoAeDBAmji0cKDpLXNFGacKZW5qmpAPtXSJCmKsCcsB", - "m8AxowoyH1I21d2Aim6lXBMXaZIzUkYVBOJDFhfK9JLBFqI7sc5HA/QC2sIrHEO4kJuE1m3KZgAcLowZ", - "VDMGN3QCf62e8tksVVrsgm/kLFVI/wXL1mCw2sbqLgyPGaBXHL4RzkeU8araYpqD79Vy86qK07ZeQc57", - "FAazDHOAnmdMMmOzlq22JbE/De+2js3gtL1Rcp2zO97S2JLvXMErrNMyEG11Wg5Q4D227Edm5+UNkSii", - "ou9+gOAIWGjup5MqGtncArASKhUNjNKHYXPrKNmmwSLhyJzgdbd9xvnDnvLZR45RXJygNkQj/gVZnVD/", - "tZHdDBapcnf7ye6TvcfbT/YaxRzkE1zP4A/BNWl5cmu5fZCkI1cvo2bph6fncPbpc1WmsVHy7doLLp6J", - "4IEWNilDeQGOfPAnvSfFUIuQp+OoYDSycVngz9+kWkrN9dYfNJrTyYT98SG42v5d0Hjr/Z7cHnt1s2wg", - "vyB7XDR0Lml9ZNw1qQv93vCAUELWBoy8IRJWgM6IQoA/XYQDOKQzjyKLci6sxELci1i7Ozs7+48fbTfC", - "Kzu7AuGMQP1cnuWJnUGBxKAlar85O0ObBYQzfTo3S0jLwKwA56czZLMY90semFr12fFhSY28lGON7Xse", - "14L8wgpBdlEW6OAYlQlIS1TuhfbOTv/x7qP9R83I2CpcI/F+NYdxqTAMeGz2kOLOt8E4/vbgFOnexQQH", - "ZQVja3tn99He4/0bzUrdaFaQ+cZkrLjBxPYf7z3a3dneahb55DOA25i+EsGWeZeH6DxI4dkNDyiWWW+n", - "7rTwCZ7L3pgrHUBzj9Kq++BN/IXzmG8qoVdacFVFbS2XFWXcQtzyRhMzh59F6nHqqnBpCbSpK+9qz91T", - "rGbHbMI9CX1uoG9afyhn+U60HCSh5khIGCWh412Z4mlFK/CwiiRBYUos5IyoJLAFODa3PJC4hzmZjLJp", - "2bd8acAmWqCZw+oIfxjXNmxisJJ+v5y3IgVYGROzRDj30GlkL6dy5FdUljsWZJpGWKCqu/qKKctFHFF2", - "1aR3uYjHPKIB0h9UrQkTHkX8eqRfyZ9hLRuNVqc/GOUXzBXrgJmcdS8wG1IZN1/Cz3qVGxXnJjj5N833", - "m1BmsYn9z3vr9FzrTsaj+5zR9wVEL4fA7m736/zeajotebwtRwPclLdblPVRvHPUP8gy1XpuN839UUUp", - "LsvBpfX6VgsXlKu8/JYlAdR2JkUXYlyGayHUt9FB3OyOtGo8d7PZlCQoj767/+jxXsNY688StVcUovsM", - "wXoerxCoa3bqpInUtv9o/8mTnd1HT7ZvJB+5e5aa/am7aynuTyUhdUVme9SH/91oUuamxT+lmtuW8oRK", - "yaVvPaFPK0g3j7Gp0bpXFYHNd9Kp+WUBvJmIu0JaOiiJXIX6CW0ymZBA0TkZGbh188lUfLMazSHACQ6o", - "Wng0QHxt8nRmTSqxIg16r0zWA1Lbtw3305xLpuPcHaDtBkd/NppdBRf2G6dskOm4Tot8XR3V6JA2N1vF", - "QtHAQJBng63eyV9nwETXWJYuFfTvABLt5fUxqrdPpkXzQn4O17Nafvm9ui/eyV+3r7j9le0saB0lIbkK", - "8VVHaD0JaomgcR5fz4nsKw203qejwh/sAXi7r0bjYjKVldlqSplX8lP35uM2q+yx/J05wW4+XsGB4CYf", - "VvNKAD7aOViQ5313SihRg02Ki/VpAO8gOtyYtG8VH26t4fcSIm4f30lY+NJ2nBW8oJr7/Lmv/OWYS/eY", - "e93+Tre/93ZrZ/Bob7C1dRcBCtkdRp0p9/GHrevH0Tae7Eb7i8d/bM0eT7fjHa/Xxx2kn6wUUahko7Rr", - "SIioZgSpZtKRJKKMdGV2/bH+InpF6JExyiV4AULeCo3sJmqAK2y6gmrPyossEi9WOXCqCe3vwz/Nzn6l", - "LlOd/vHR6mnf6j6hOhE/glWnAvjUbDIQMLf1RTONgl0VqMcLyJpF+VCmdB9fQuJ3KzjYr5Zw6ziVdQu3", - "M8yzUzhicveLJazJXy8BysdiVyfQqBxC5o60mK8kc0n9stkz3totrXNgLiR12X60V87qctD9h8nigka9", - "webPf/nf3Xd//pM/k1dJdZREdEMyAYn5iiy6JrW4Rq5eOfrVFOyTCtv8V4rgGLhdcEUMd43x++J8H/Uz", - "W/biFY6XlgCqRkxZ9vfaBfmL6i4h2nkSNqnLeRdxxIqjFEZH7ZiIqUvW4czDUHNBizlXZCFRIVjJXqw4", - "kvtJZp8Us3vbko89yEk/phDzKYcMC4JwEJBEkbBngzYozEVwwOdqjnsbNKWnxlMjM2GIFLW3O83qR7YY", - "ue6aEcKuRp3dR3u2rEcRkltLO+TbM+MgVJdOV0PZczK+pFIhPnHpElChMWqTOFELFxLs7kE2buawdJB1", - "WFMV9YsGW/SffInQ0POVsaDfYTLnoj+Zm9BaT7Kl/a8NwPIbUo+qft2GJm2CyrIfciXtnlTdejtrzFOm", - "RnBdsGwT1e/MVYQNfpym1SwQmzFTmzbUejlCl+AQ8savvHzKqcw5V3Xho/V3KitvegsrK8ykfm+MM+Fy", - "hOMKAJ1q0FzPiCCFjYAP8njRG4LMXgysjyY0zlRae+hWM5iaJDuCwk2DBZABrAZBdnm0fEO12h/6BL/P", - "RgCtAcslDQfWUShN++IpJM564zJZ0onrAqZRLR74dD0WNamhsbwZRaxaXrdp7yU8y6tWcL862qogZz5G", - "CTXf+U4zSYJUULU402zIRm1ABZ6D1KAh8CdYBDzOB4eIWih4TO1td+W2XWvQNEAHp8e2/g4D+QtdnKCI", - "TkiwCCJiAyKXnBBB9Hh9eNw1kdxZXTY9PFUAEJcJ/eD0GBIrC2nG7fe2e1DmFko6JbQ1aO30tiDNtAYD", - "LHETEnDAT3tFqukQTr3j0J7OT00T/ZXAMVFQJe83z1WjIsIk9JAgC+FpQRhNMBVWGk0iuAA1WjTV34IP", - "uGPwA3NKdAzAcVNnaqkW1hxMktd2W99pdJAJZ9Js6Ha/b7KtMmWPA5wn3N38XZr7ynzcRlIGgMfjEL0k", - "bTpJx4L8U6e129+60XzW5sj1DXvOsC3kRGCaj24IhFsNeszMHZWrE0xsw5zOAIWKFPabKS6exjEWCweu", - "HFYJl3UiGpEIQ5ZOk03mdz7uIauuQwinnPE0CqFGZGLqEGg2ipHCojf9gLAIZnROhsyeHibfMRYQ4h4j", - "fWoYYbpMGmZos/uG7RCpnvJwUYFu1t2m7g6krTKAq6FikozAZX5Ulyoqs3UllDFIOZtX6nY5U5Y4uskR", - "DmXufeXJGGYqTzltkoNfEXAJnND33g4b+bZqhgfbQqAWRZYyYXvDf6sOEYB+h5Sj7B2y4C0fclpHsEXV", - "MknAXfRgMcZR5M2UNY34GEc2h/oV8QhOL6CFBUoxWNIduYyHxAS+JQs148z8TscpU6n5PRb8WhKhD2Yb", - "AG9h7UpIGdSFYhY0hiB0k15Hj7lpprj58YosPvWG7CCMXeokW1oVR5Lb5PJZkbGsWPCQ1YZo1ti6Dm2x", - "GZPYuZgL10yTpypJVQ+ZhRBlo/ahOaRKljMSDpni6KMwlTEWnzY/5iN+Aoma4FDjSaGJWdLmRxp+qpu1", - "HGG9+hE09egkBAAwbOnTZdjSv6cCa4k6lTNQsCUo1dPilrYNYXMB0spGFcIBZijhSWrr9xFkc+aX+oAM", - "KDiKkAJSct9qGQh2smY91kHCl87TekeY6+wKGUFizwIx9Xf3/fQkSSCIT+3+69nrVwiOKr0HplluzAAY", - "mVKSWTlbPXpvyJ7hYGZrvEHAxrBFw2Erk3nDDZhrKu31TbcLgtfPemo/m2E6NPy519NdGZlugH77aHoZ", - "aFpK4pHiV4QNW586qPBiStUsHWfv3vkBWnfJfFZiBKhteP+Gy18FRdnyY9CcG5iFiFteGy0QRjkHKmr3", - "Y8qwWJl8ywN6C0GtYOKpLALj4xDMcsPWYOgMc8NWZ9gibA7PrPVu2Prkh4BNFlcfHWDyj9lmORLt9fsb", - "672/LHw9InSpoSa/T0vS1/YXEzys0LUseJjFudAmvYMmk5wRt+5B8nmKs3qaP0S8NSKe1acLwht8XzwH", - "DPpGxNj7KxKYVsAjJ4Gt1E4MWkBsH2gczlfTKBzUSXA58hbVj6qSuaxW7NZRWQBTjBz+7d4D/sG4eTUG", - "GPfJfY2LI1M3zOUmf1joCJvlELHj14hfEPUtYFz/vlipKxrzFfH3oeDPC2LlvhxoFW62CYX7i+aWary6", - "IDiWthfTWOuqZzCn7hlhCj2Dpz37r9N4ILz3MuLTywEyIIz4FEWU2Tuiwh2GPhQtLOEjk14z+85mqA1m", - "mE2JRG1zfv77n/+CSVE2/fc//6WlafMLyH3TxGhA9OrljGChxgSrywH6lZCkiyM6J24xEF9G5kQs0E5f", - "2lrA+pUn360csiF7Q1QqWOEuLeJTgInp0JZf0euhLCUSSQAhFBSc2LCBn/NCwX5aNqC8V4ruLOlcdgWF", - "BehT0eEA+IFSE8Jr9a+W33pm1lyyn1UtuEs2/fX8RZH3ymBv10zwhgwGQOyjO3hhF43aZ2fPNnoIdAyD", - "FRAaAhJz3o0Vnns/eNJ6nmQ4SpmhAJQNbyqUOqi1/x7ZNs0MwLbH78kCXFe7od4EbEweRJDQweuHrtDE", - "HOyHmzMN++yzR67UY72B9vbrLQ7hXFQaKcJfbp8d7i3D3NY8zUH2NVRg1LYl6LK0o6XCql8L6e/l1CjU", - "482ODsRNstN7U8sOOZtENFCo6+YCmU1ikqlqZQR5KOzgjZ01wm5d1SDs4vm2WYopqj3psvCi/Mi7+9Oj", - "MuhNjpE8UDzHtR8nyTrUOaIy4PrbArZ0A5zYpKtGfMnotIhF6wxSR/A8O3JWiktHWaVuS5D3Z5qyQ6es", - "ejbcA1M8qjDEr8gIK4kkC6kVHhI2n2e76Mpar7BcfVuo2b8/Kei+rVg+NH9IZqywAjbNBWdZebE69LIF", - "yO5wo+0InoWfEeGo2kzUJCXMl2U+RcGMBFdmQbb6+iqJ4NgVaG+i+pr+vifN11R+u4HEYkH+Q0RpoOzm", - "sFql4B7b7Jp3p9/CCDdSb7/cPa9FMA+Qwdlk7CzWJnEllgsWbHxXV733cppVK7w/IEo6TaPI3XjMiVB5", - "/bviGbD5EdyS1sv2jtpWHgfnb152CQs4+KFlPlR+IcqVpfqyEr7ZMLOUH2jSRCcEUDnEqBegP2P/jbsg", - "ymoc/Nf2c1vl4L+2n5s6B/+1c2AqHWzcGbL074s137fE/YCRTwvctAw0YE2meNQ6CTVr1VBIde2/KznV", - "FiK8iaSawfWHsNpEWC2Ca6W8mtWEvEOJ1ZbP+zpXMhmy+aANr5x/4ncmqd6vlc9ipMsiQ2X52sOmCeUi", - "L1ln67Q/PAdKmmFc8dhoaK7OCXLl8eFQ9/ioY6sRmhqCWYDIPRmv3TzuXbi1496/5fogHtNpylNZjD2B", - "4pNE2mCliJQZ8EMTu/PjuVbw/oaxtH+fR8e9y9U/8P6OJP7qhhrmbW6g1sn8rlVTmd+2hzKPpoKIiV17", - "4yqT2NQ3GzVOha52T1M0LpWZWnZ29M3Lp4ugc62o5OoCAg1iMGT/o/WP3xTB8bufXZBM2u9v78Fzwubv", - "fnZxMuzEoQphSlAiERYEHbw6gmu/KUSvQwK7PCSvOg+Tls7U87ZpVf7jFKT85rO5huSw8IeG1EhDKoBr", - "tYaUVb65SxWpnFnp3nUkh28+gNvUGj+0pPvQkmQ6mdCAEqbyrM1LTmI26fsDjC1j9n6o4NxROmgba0l5", - "OarVAmieqvDeHXuywe9fOXJZER+mjzw3UTGhU0fyw7BeH/nW8KF/v8z5/vWQh4xiRuBfBl2iZUpfMQDI", - "PxinphhuniEEvD6zgouuxx7Kc/DLNEm4UNLkMAQBGJIyq5kWgH35DsspDH05CxMe6RNCdoYMCgDr1yaW", - "f/OKLEyGQspZlowwW6nNSuiLvSonePyqZPTlZSx/9spGMtY9k7HJdPkVZayvxjruRdI6ttEPVunOCAMU", - "yjFxlLzxoDxODXPK1lLIX+QRrTYnNg2+X+V5zsVVUybgyQb9AHhBcYXfoLalp0cKtXa/otIF6oihF400", - "984nllJ8f00ndVrlHEGUhvrgzaot28N2Ing8sg9NnklNFbamGShxge31azMZPfo9qNSvuEI0TiKi5RwS", - "oq7BJihxbYQjl6yZykJC/JsxQU02xZABk67L1qLvIFs2AK4f3Ia14SZyebu8XDPi0/VpArLBXUy8J0/A", - "kJlk0sRlnr5EGZNFiiNJIhIodD2jwQxyBuhn0L9JKYCT5DJLErThSnYXcyXB4G1JhBYVA84kj0zV7Mt5", - "HF8OlnNaXpycwEcmXYDJXnk5QC6PZXZASN2qmANAryLCUqFXNrNBW2OS4FFkdvRSy9WF9W3Y7AB5Eqch", - "82UKYOTadkgn6LKQNOCyJmuAY6gv+VR+LdG1U596z6xFcSQAcAY3CQtbdaZqGvnzBWz1+76MUA1zF5hp", - "3HHqgqXJvOTTLO1fCZVxkjRFXztNwOJ5HK/AYdQulNSQKuSp+otUIRECPrbYXYfcqI0D84fCVxpRmS2I", - "6YqSAPp5L2RMHi4vqDRTLVQ1MH/N47jVadn5eGq4f34OiGqHyxcHemcKiR5+2A5uksKhzOwLORwqJ4et", - "nlQvctuiUN+9BcsCKvwe9NCyxT+fBWVOVIG95XlVlAcVC27qhVVlMVPCxUcjWcGxeiopX5Od5cVi/gNV", - "VLPWapW4e1ZSMxD7NLNSkaWvrp1mNZ9+aKiZhsoFClMzXKXq2nerdmYMBaWspHla8fS2umeWNjMDM1TD", - "ZSuvOHOet/nR/Ty+hbjwjXDCTm3tsboEbfmivwWWW1OZ81u8InBykj1WCwLCV2TBrkboV7tL0OpexuW+", - "CTZsCC7jxkWeowRmkrryvz+YcckMaCylt2XGTvhcsgUW2DNl3STCdXzZyqm1DNjWIvzu9bVcV/nONbaA", - "C2GcYcG99iEFYxd8AwqqZzvBqSSdjGA6zhPn4uRko45ohFpJMkI9YN+CSnHoOPRX5xc0dGU3Dk+ObJEO", - "KpFIWQ+9jinUwrgiJIEku5Sn0pSN7BUrKNbV4sxKJBKmxCLhlKm1s8ib3s1kPt2q7MA98ymbjuK7NyvZ", - "cu8PjUkB79Cnt13AaqVKmcKh3ms6d21FmakVooUPPOap7n2pwiOa0IjIhVQkNnd2kzQCIoKERTaftf3O", - "eON2EFUSaXrogPdiQkRMpaScySEbk4mWShIi9NhQ9phGpHD94LvZOlM445qnhvV9G1dbUPQRbnOwqoNa", - "ud4jThJX79F3fZKVqLz1lJ7DXRWSi3jMIxqgiLIridoRvTIyOJpLFOkfGysvu0bw3ZfO1n17ytKQPmYT", - "7k1oanA2Q+bvw4GrzNbcZf6DY2svSJFYHP+BjfazNbmWrwmCIyhrnAUOoFTRiH4wrE53QqWigakCl7ut", - "QgEr67k6ZCdECd0GC4ICHkUkUM7WsJkIHmwO035/J0goRHjtEJgcMLz61zGMeHh6Du1Mka3OkOk/oOO3", - "B6eIaphOsFWZCxNlRF1zcYWON1+vuf4/AzD9B+tjZoEr/Ue9G/7jZvfmXuG1NCRrSJQnqxQgnnz3BgMr", - "wf2wFjxMawGE5WSraU8FDkAolrNUhfya+S0Dpuaz3PxofhyvC+5SOJhduOL334a0a+tfrxvGLfBBEKVd", - "U0hMwuWvYq+3JcofaII6DTi3BBBiimFq/lPgQH2P2P3lL+uKcPwGb+osRF0y82+Gtu775LNzcDHLRXg8", - "FDI3mOZWAkV4i9anLEB7rW4WmIBASG6Vi5YBTnBA1aKDcOTqQ9uCb5kNqZsduWNB8JU+aXtD9iYLDbcF", - "57R21XGqFQqpvDI9WO2ph17PiZDpOJscAsZk9DwAvi0RHeAoMLWVyWRCAkXnxBQ9ljXaVzaVu0w0ng/i", - "2Wj30oLuoakcfpyA3cvRwmodJU+52oQ0Z1mrZglpsl4L3jAFT5GVPs8j13AEJ9FNTHaewa9orVu8fXUz", - "77Vf9UcNxy57SfknYV995iq/lzyfZwXnlKZpbHIMf2gZZQozL5FqycFrfWqLxh5dd+lhtS61RTb4fae2", - "OPM6+TywBHu45LZVl9Pi20OE/v16F993TouHjVtalJBLoKvnRA0iwb8JDLybEPCv7F1/ixDwb8rfE0J4", - "v57f/Tfl6Wk9FjNPzx9B3nfp4GkivSGgtc7B03A9a3leqShd2DbN1CTb4/ckwVtj5Q3kdwf2H0koG6gM", - "BWC5U7jCboD3S4vwJE7Uwlmj+AT8bvIsqZJ+AO89X+BcZnS+u3i1W9hjvxx6ODyttcb+SF55bwbfPMP/", - "8dHDz1hZpLnSwbKpT50uFsGMzkvxWqso2IIoEaSb8ATsrKEBmIWHO8sUFr3pB2S77w3Z2xlxfyHqsmWQ", - "EIVUkEBFC0SZ4sARzBg/SSS41gTgPRcLn/m2SLnPBY8P7GrWnIeWpqwxLHfzixfdECvcnTtus8KE9hlX", - "Vif4PY3TGBgeogy9eIra5L0SJnkDmmjNB9FJBlLyPiAklICTG8UJb/VrLJv0AxlNx01muSINx2ub5gQF", - "qVQ8dnt/fITaOFW8OyVM74UW9ScgySaCz2losn7nQJ3zyEB1qwagN7W7aqHC+oPnyoWZ3FeRYZocSNMP", - "NCmzBeP22Bq0xpRhmNzahBdlmjIeuHo8TMEPLqcdhzmtH0eY1fzaTtnRmKiVHAdExTmKtES/8eOYe8jH", - "XNGTwZ1ppdOuWV7mZs4NDX0O7iInc+b4cr9m64tv5z6+UGf9AZrO55lCWmc2/7ZQsH9/58N9m8svHrD/", - "1gvilO+CqRw60D36EOYlD3CEQjInEU8gZbNp2+q0UhG1Bq2ZUslgczPS7WZcqsF+f7/f+vTu0/8PAAD/", - "//O3dQn9MAEA", + "H4sIAAAAAAAC/+x97XIbOZLgqyB4s9HUNElRH5ZlbXTsyZLt1rZl6yxLezNNHwVWgSRaVUA1gKJMO/x3", + "HmAecZ7kAgmgvogii7IlW2NvbEzLLHwmMhOZifz42Ap4nHBGmJKtg48tGUxJjOHPQ6VwML3kURqTN+TP", + "lEilf04ET4hQlECjmKdMDROspvpfIZGBoIminLUOWmdYTdHNlAiCZjAKklOeRiEaEQT9SNjqtMh7HCcR", + "aR20NmOmNkOscKvTUvNE/ySVoGzS+tRpCYJDzqK5mWaM00i1DsY4kqRTmfZUD42wRLpLF/pk4404jwhm", + "rU8w4p8pFSRsHfxe3Ma7rDEf/UECpSc/nGEa4VFEjsmMBmQRDEEqBGFqGAo6I2IRFEfmezRHI56yEJl2", + "qM3SKEJ0jBhnZKMEDDajIdWQ0E301K0DJVLigUwIaxrS0HMCRyfIfEYnx6g9Je/Lk2w/Hu236odkOCaL", + "g/6axph1NXD1stz40LY49std38iUx3E6nAieJosjn7w+Pb1A8BGxNB4RURxxfzsbjzJFJkToAZOADnEY", + "CiKlf//uY3Ft/X6/f4C3D/r9Xt+3yhlhIRe1IDWf/SDd6odkyZCNQGrHXwDpq8uT45NDdMRFwgWGvgsz", + "VRC7CJ7ivopoUz4VH/4/TWkULmL9SP9MxJAyqTCrwcET+1GDi4+RmhJk+6HLU9Qec4FCMkonE8omG03w", + "XTOsiCgSDrFanA6WimwbyhlSNCZS4ThpdVpjLmLdqRViRbr6S6MJBcErptMtGk22SGqpOclhLOtGd00Q", + "ZSimUUQlCTgLZXEOytTebv1mCgRDhOAeDvVM/4xiIiWeENTWbFPzboakwiqViEo0xjQiYaMz8iGC2cwf", + "fIRoSJiiY1qmb4NOXTwKtrZ3vLwjxhMyDOnE3kTl4Y/hd41iehyFoLV/I5rQ5s32AVMKMl6c7zmwbphE", + "kDERROP4Z06XCD4jTFOLnu8vMG/rf23mV/SmvZ83AZhnefNPndafKUnJMOGSmhUucC77RaMRgBpBD/+a", + "4dOysy5glFRYLKcPaPEFKNGsrxFszk3TT52WwpOVXd7qNlXeCazRTlniArUs8tmMMI+QFHCm7IcydF7y", + "CYooI8i2sGeheaKe4JeIA0v8QnDIwL9I/Hrdt2Be5oea0fS3TouwNNbAjPikCM0pwUKNSAmYNVeYHShf", + "XS34z0rkU7mrsCTD5RzkjDJGQqRbWsI2LVEqQVJd2D5Q0TVVwxkR0ktzsKzfqEK2Re1QEQ+uxzQiwymW", + "U7NiHIZArzg6K+3EI62VxF+caCboBgQpQiLF0fmvh9uP9pCdwANDyVMRmBUs7qTQWw9v2iKFxQhHkRc3", + "6tFt/Tt6EUP8GHCeEUbd3ZNhoENMw+la9jT18J1Wksqp+Qt4t14V3H2aDWj0ivTf7zybPgImYbSEWp3J", + "LwO+Tsxho0nENUznKGX0z7QkYPfQidYVFNIXBQ1J2EEYPmiWjVPFuxPCiNB8Co0Fj0HaKgjBqE16k14H", + "DbRc2NVScBdvd/v9bn/QKoux0W53kqQaFFgpIvQC/9/vuPvhsPv3fvfJu/zPYa/77ue/+BCgqWTupEK7", + "z7aj/Q5yiy2K69WFrhLlb839i8v3cRxz1CeaT6x70kcni4KD2WvIg2siepRvRnQksJhvsgll7w8irIhU", + "5Z0vb/tFYQH7WAIENtFgWhMMFaUH0Lgd8RsiAs2BI6IRT3Y0E6ZKdhDWejMwL6Rvyf9EAWaaFoxwwQUi", + "LEQ3VE0RhnZlaMXzLk5ol5qltjqtGL9/SdhETVsHezsLeK6RvG3/6L77q/tp47+8qC7SiHiQ/A1PFWUT", + "BJ/NrT6lEuVroIrEK0/EQTeNQMyLKTsx3baylWAh8PzzT9htZNlJG2Wu9qiD2CP5v54RIWjobtWj02PU", + "jug1seiORMrQIO33dwJoAH8S+0vA4xiz0Py20UOvY6r0bZbml7SxBvWKx/17iwRTDnJGFHG9oQzUNUJM", + "DsNAENBPcLT0Gl4GYi+wjrJxFy/tX7lU3RgzPCGgTdqGaCT4NdELRQmPaECJRNdkroWUOZroQbszKqkm", + "H8JmaIaN0aA3YG+nXBLTxH3SikhA6IygmAfXKIlwQKYcFPEZjlIiO+hmqiUGzYwFwZH9GQkSY8oGbKoX", + "KQOekFDrEKYZbA1dETa7QjFOgEqxIECiKMaKCIoj+oGEiJsuMQmpvqAGjABeowRrkg0CLvTtq8+W4GBa", + "gMJPEl0ZeeMKhr+iTGPllaGr3oAVT/5j6/XF26evL14dD1+fPXt1eDL87dnf9M+mU+vg948tY9/MBI2n", + "BAsi0F8+wn4/Gek0JKJ10DpM1ZQL+sEYWz51WhoGUuMXTmiPJ4Rh2gt43Oq0/lr857tP75w8pacibKbJ", + "wLOwT15ZxlyFHo5y7Ix5ElkDEYh2GEy1wGFenF1s6ss1wVKqqeDpZFomDHuzr0USIZXXQ8qHo8S3Jiqv", + "0cnma6TlDhRRTaCZnLHV758+3ZSDlv7HI/ePjR46NlQLy9cshAsr/sipRh8thAPKHJ1dIBxFPLAmkLHW", + "lcZ0kgoS9iqWNxjdx58JU2KecOrTwSrMKW+6yKO63fzrGqxoc0TZptTH0A3Wgzvgza01gWdsRgVnsdbG", + "ZlhQfc3KMq28en38bPjs1WXrQPPxMA2sUfHs9Zu3rYPWTr/fb/kQVGPQCh744uziCE7KkI1KonQylPSD", + "RxI4zPaHYhJzYTRg2we1p2VBwdAtgsMZtHZePDXItfUC8ModSkgltHajmIHLGLP94qkPW6bzhIgZlT4z", + "2a/ZN3fyhWvdsPsybksiZkRkSAtY3CuoH0HE07BbmLLTGlNBAoE12rU6rT9JrOXw2QeNOvnaPf381qtG", + "8ucKwRJHCWVkiWT5jUh4N1xcRxyH3a0vLOAxovTYi1t8ZT6Uz9fiBMlQotVZsEaw8IaGajoM+Q3TS/bw", + "VfsFZY0z5vpe7wRH//rHPy9PczVp68UosZx2a/vRZ3LaCm/VQ3tNINlG0sS/jYvEv4nL03/9459uJ193", + "E0YQuZVQZ8//mRkBWLbG9bD0TGmsmWWw/M+UqCkRhdvbIYv+yejD0B053CtspWQeLb5pLjBqPiMiwvMC", + "47Vram31gftVViWoAlq1/TQbvUa68wo2rEdzl/yLqo6+3fczWs+iPGt6qnmFvRearCRbyNb2qf1ze3FJ", + "NSu6pskQpOYhnmQm22WvzefXNLGiOPQwxxhFhhGEKQjvI85Vb8D+Z0oYgrODAybvSQA8Tyqs0OHZiUQ3", + "NIrAwANMZfFq0YJ9zlZMc6n0/4qUddAoVVpa54ogqzfBJCmsBRqPCEoZds/ZFdnZbrCKVxYs10QwEg2N", + "bCwbQsZ0QrZTLXBgq2MsFRGG26dJGV7Hv52eo/bxnOGYBug3M+opD9OIoPM00fxgowy9zoAlgsy0CsEm", + "YGykdl4+RjxVXT7uKkGIW2IMg2UmMvvWOntxdmFf6+VGb8DeEA1YwkISwprdjSORmmKFQs5+0hRLwvKw", + "xfkrQPfT8jqqfKc1C5K0fCLb1dN4Be/peu8zKlSKI80qS9Kg93ndOG54pH7jF1LUPizbypATq/K7aFN7", + "hxkZvDgWZWK/2cIIOo3NFgVNfMGA4dTEj80Wu2L8E+YWstRsk2uKnzHXuRmkCiI7dsft7BZQOslgUoYV", + "/jLgOZQFzbrWLB4SqSgz6KTbIivQSdS+0sq4xWOtfl910NVfSz9o0nWagRYPbpCBBrADpn8qjl+1KazU", + "9pvrdJXDwfL253Eoa/2M0GwLKYGZ1FejFpES0kO/Ag9GisSJZkRsgqhE0vBOEiLGb/4TcSOTuK4Dppcm", + "jZeGBUdm85F0wiibbGgpXd8rOAyNYWicqlTodjMqc2iWUccZX6obeGtWRww7jVOpL9QgSkOCrpyB5qos", + "1i2abxY1OmvPWVBQDEhAMQFdTW3GqdLT6w3HWAVTDSeeKuO2ZbcuywsoG4lWPWfatWQPXbc4//OMXZSB", + "as0FFcavN2efWMCqVzAv1lnxrJzhtzBekzkcubMm4gV7YtGQ6Df3CSJ5NCP21iyaIkc4uDZXifGcsFZI", + "Y0+0JkRN/hUS9RrXVh2Fhldj8Jcl/UVUAguu3WyOMVZ4N+bbecaF9ObMfB2t10oCwAfN4QCBNHXVMaoO", + "AQMCYhpZIhRSQQK1MDxlkwEDD44r+0vPjnaliVyLGD4i9OkqXlGuoKyYPqWjRYWTdVIbDKO3xmOqFAk7", + "ZdngmpBErt6Ulo6t3dljHBfkRlDHyKy9J2woXRE25iIgsZXxP0/ve1YYzKuFrTfEokOFgW9hzRafEE6S", + "iJLQeO+Y8wArqbTnBCbSqsduWFG6zAN+ecorHEVXqG0bbSBB9F6kOyvGWY7sb4/OHApkj86Xpx2NkZoL", + "XE2VSob6f+RQU/FVdTDb11G4Hk7fSRLt90E92t3dsadqbWZmwZVhy+Yxr1NC/dGcM5zIKVe171rXlIWr", + "EMUN8ptuW2sUywQaaZvftV0sEaSbJhOBwTH1S1rFbv3aCNCs57wrfM59zoUZVINUKh4XXAxRu+IYQcsu", + "FGVgzXjUDbHCYEFsaOY0y110143nZiijQ9UZQIaTkcfbhn7Q3BJN6ASP5qpstt/q+zS1z336dWvxHUud", + "27vR/Eg4VHy54y8dI9e2iZ8f3ANDxYezMfWMnF1HudcIlSio+NhbfVQP0U0CarV4kE2CqfHLNEAAYe/y", + "tPhk1huwLlybB+g4myAbNhsSg0yIQ/Ng0eaisAgKzl5oNN9AGF2e9tDbbLU/SaQVjRlxcQBTLNGIEIZS", + "sPjCLdY1d2hxAamEy05Vu1uThQkZ2ICXQW6/9dCv84TE2Jp/NCnEWNEAHIxGtLIfuEbMQdmnWMyKxqdG", + "xqJl7tJvyIRKJSrO0qj95vnRzs7Ok6rZcPtRt7/V3Xr0dqt/0Nf///fmftVfPirCN9ZhmbdYl60i9zm6", + "ODnetjbK8jzqwy5+sv/+PVZP9uiNfPIhHonJHzv4XuIm/KzsOPc1Q+1UEtF1bFJjlc/DrODIVeNBdmvH", + "sDvy88rdVpe1NZB4q1veRUCIz9XYOrquH7JRZZgrnZULm1vUwOcJ6Is5lRQkL+sTGFCv9+MxlddPBcHX", + "Ib9hnns7xhMih+Y+87sRpNL4tpD31iohOFdjaZ4ry9bKrd3Hu/s7e7v7/b4nDmIR4XlAh4G+gRot4PXR", + "CYrwnAgEfVAb3plCNIr4qIzoj3b29h/3n2xtN12HeVlpBodMYXK9UNtC5GcXU+e+lBa1vf14b2dnp7+3", + "t73baFXWzttoUc4mXBJJHu883t3a395tBAWfIP7MxaVUfedDn8eA1nvMG19XJiSgYxogiGxBugNqx3CF", + "keyRqEyTIxwOrdHDf3coTCO51FHBTGZbGgNZnEaKJhEx3+BAGtmQYefHMJLPCYQyRsQwC9tZYyQbzbPy", + "Yd7tJWuCSlFZJdCdUglSSC48URKFB4ZCV/I5OM18Ye/q8MDuoSE2vNSqUzciMxIVkcBcXXqxMRcEZXhi", + "Dq20K8pmOKLhkLIk9aJELSifpwJkUTMowiOeKvO6BwdWnAR8hUH3GGt23Uw/fc7F9UqvS30TD0XKmB5m", + "pTXnEAzgY2tigVscI9vbOfYXhL7sFc68VdrvEr0xPYxlJ/85SRWiTHGtnbJwNO/ATNYCxJAgUnHgpNbQ", + "Z4dpKl365RYwcjqvCzNfzjvvyeWkOzav9F9WwxYTooZSYbVSYtGY8hban0Pzxk7cuuNKA0gDuDNycx9A", + "By/3rkbbrmQ4uRuIL/MBy2wNeSO4hQUNSQ8BdYEziouqq1DaueJJQsLM/tMbsHNDKtlP0rx86I4GDmpK", + "qEBc0AktT1w2jN2lM9k6qOiw6dboWOy4KKHCR/CaqCd6PFZEGAi6gOFi1I89hFanZWHf6rQsJyqDxv3o", + "gUju4biwxBdnF+u6hCWCj2nk2S64INivVjNzzlIvd/vn3a3/YxwfNb6BiEaZcVuIeUh6lZh8aN/s5nlx", + "dnFWt6YsIQIqrm5hT5mjiYdzZP4IDiL2Mci+JloNxqG/vliySXLZ+4lPlh0LHJNROh4TMYw9xrXn+jsy", + "DYxHEWXo9GlZntVyc1Ot+ax0OKA2j3Fg49mbQd9jkKtso1OA5jv/cb0h5hqui4LTRyVsGxsI10OvshQU", + "6MXZhUS5c5DHUlc+3lo39bPpXNIAR2ZEE9RKWdHABsjZWEI+yztaU6RHTo69sqEjBNSeTZIUyPD8Tffk", + "9eVmHJJZp7QmcOiZ8ojodW8UuMXMxcLlPvUlJjGrs3QYxJBNCagAq4yCGwOpQK8e6CiucDSUEfc5WbzV", + "HxF8RO3L5yZWSa+gg5LSUerfC1Ao4feel2I0R6qb9hwmrJpMSwTu1R3LmVuMeaWwvdKkPlL5leDIJKwp", + "43MeVu0Onl+XD5pfr6ReO4hv3hPnj90gZuro9NgIDAFnClNGBIqJwjY9TsE1BcShVqfV1XdUiEkMHm7j", + "/1zulVJjgi8GQdUacY8Wsl3ciQG3Jkr7jXEdCFGMGR0TqWyUdmlmOcXbj/YOTC6JkIx3H+31er11Q0Oe", + "5bEgjY5i03jOF6JEenL6eedwBxEgTfbysXV2+PbX1kFrM5ViM+IBjjbliLKDwr+zf+Yf4A/zzxFl3siR", + "RulH6Hgh7Uj5SVPfWeb3A70TZl25NC5xUOBXPjHV6DPgkQDhat4oXYUnWj8xGPe54bi3TtiRZ41ShUQd", + "RUfOBkk76IflllAnGEEbO2fKFI3yfCaLNtBbZaSRS4P2FwL2E8KyMP0oMn8FnM00Vfhi9ksM3H37rPcD", + "650yDKkHk//HanvGuQGCmVbTW2sTJ8lqtPULihn/a5qrxEYUe26ir871b/PGVp799eS///y/8uzxH1t/", + "vry8/NvsxX8fv6J/u4zOXn9W4NLyYPKvGhH+xYLA4WGpFAneFJVOsQo8AtWUS1UDYfsFKW78LHvoCBS/", + "gwHropdUEYGjAzRoVVx7By3UJu9xoEwvxBnSQ9kAgw3d+cyYf3Tnj063/FQdI7SRBMIeSBZAJNNRyGNM", + "2caADZgdC7mNSHjT13+FKMCJSgXRp6dl2GiORgIHeQRBPnkHfcRJ8mljwEDDJe+V0DtIsFBZ9gs3AyCF", + "XZXxGbDNSejisY2GPGDZvZSFYxsbTS8zgoBtvuop6QeKV33hohwBs9/3Ba6Dt5Y+yIhKRcChOsNsjUaZ", + "Gxna75dYxX5/v79SwM9waAn6ASUs5qZ0SNmAlgwCw9SGcYNnWQNbuuZNhkbQr2/fnmkw6P+eIzdQDovs", + "iI2SZ3z3pLERqkgWvPY2Wv6IEH26DTdkjGTQLWoQrPPMuHW+fXmOFBGxc7RvBxqcYxro/cHzP5Uy1ahI", + "MTo8On220WuQXBNgm61/yTm+zXZYDcqwRrM6W2CG8Rq+HXRyDG61lkJzAQ7cap5zgSLDYHK6PkAXkpR9", + "VOGozKu+OclonlvezA0waG24EZMqpzhAbzK5EWdLyRwkc2RwQ+Z0CcPahxfj87MwesWfFryZrF5kWRt4", + "+GCVOXfrG7eeFSwnfw/EgeatP3bBprkebReNoXoyP2rkZ3/n0srOujrqunkRyqGLhbDXLDVC85wGd5Eb", + "YFFfe0/VsPYRHunP9sndaSWXp2iKJftJwceKbrK187hRkko9a9Pn6+LDNR+bJWVU5eIgs2dXExF6TaPI", + "eDNIOmE4Qk9Q+/zkxW8nL19uoC56/fq0ehTLevjOp0GKBIfaL84uIEoFy6F7Aap3esS54zB5T6WSi2Gi", + "jR5Sl6dk+LWUNsEbd7vxBXMpuNfnhW3cR5aEr+nW9+1laFiaU+FzEyNYYfeO8iLUMldfToEynzU/f9kM", + "B3eynFLMjo8/FGUC53N965QCnRb1+JseSs0CSYhOzvLMgrlRyg1f2dOT7d7W3n5vq9/vbfWbmOhiHCyZ", + "+/TwqPnk/W1jiDjAo4MgPCDjzzARWsQ2whuObvBcooETrwctI88XBPkC2VoRvNHz62LmhtslaqgKFKtS", + "MayTeqFZToUl6YHPy4mBG8toj/7+WTmESdOb2bou2F7DdYzXBAU8jUItB4005Rm1ioRW+5NE5TmXgVgv", + "2DXjN6y8dWPD1PT7Z0rEHF2enpYs3oKMbUrZBhsHl4eac+DJWsewvUJUXrmaW6Y3uI+UBlWuWbitvngC", + "g6LJzblQGgxtYHrLpUfvszdl5mg0nizZU8VoEpLZME19QpH+5AInLi5OjkvIgfHe1n5//0l3f7S1190N", + "+1tdvLWz191+hPvjneDxTk1S9+ZuL7f3ZClTc32gEgAeDJAmDi080PSWuaKMUoUyNzVNyEdaukQFMdaE", + "5YBN4IRRBZkPKZvoYUBFt1KuiYs0yRkpowoC8SGLC2V6y2AL0YNY56MD9ALawiccQ7iQW4TWbcpmABzO", + "jRlUMwY3dQL/Wr7k82mqtNgFfeQ0VUj/C7atwWC1jeVDGB5zgF5x6COcjyjjVbXFNAffq8XmVRWnbb2C", + "nPcoTGYZ5gF6njHJjM1attqWxP5peLd1bAan7Y2S65w98ZbGlvzkCl5hnZaBaKvTcoAC77FFPzK7Lm+I", + "RBEVfe8DBEfAQnM/nVTRyOYWgJ1QqWhglD4Mh1tHyTYNFgmH5gave+0zzh/2ls86OUZxeYraEI34M7I6", + "of7XRvYyWKTK3e0nu0/2Hm8/2WsUc5AvcDWDPwLXpMXFreT2QZIOXb2Mmq0fnV3A3afvVZnGRsm3ey+4", + "eCaCB1rYpAzlBTjyyZ/0nhRDLUKejqKC0cjGZYE/f5NqKTXPW3/SaEbHY/bnh+B6+w9B4633e3J75NXN", + "son8guxJ0dC5oPWRUdekLvR7wwNCCVkbMPKGSNgBOicKAf50EQ7gks48iizKubASC3EvYu3u7OzsP360", + "3Qiv7OoKhDME9XNxlad2BQUSg5ao/eb8HG0WEM6M6dwsIS0DswKcn86QzWLcL3lgatVnx4clNfJSjjV2", + "7FlcC/JLKwTZTVmgg2NUJiAtULkX2js7/ce7j/YfNSNjq3ANxfvlHMalwjDgsdlDiiffBuP428MzpEcX", + "YxyUFYyt7Z3dR3uP99dalVprVZD5xmSsWGNh+4/3Hu3ubG81i3zyGcBtTF+JYMu8y0N0HqTwnIYHFIus", + "t1N3W/gET4Ngb0gQYRofBs55pXL7mMwUQ2Ga5YfQ5GKwOv7CxdWgbyMVrVLsxogGXKCUZfmQequtmbcz", + "TtazaXMfrGbjizJ0hJkGl3XRN/kLbwG7RJAZ5an8AgNxRQKNTOOIc7FW3zpvoDdEppEyFkQq0eXpT8BE", + "NHIhqUhS9nS36LckkOGWm1uLgEs44cfqOmA1Oo0mR79sw50aMu0s82ItkX9tvFCoWVXKVr88H+EoSCHl", + "F87OU+8KPP95quCdfG58NKKIc4aCKWYTAhnQTYJBNkEYTXkU9lr+l44oHI69LxD8BkXcZDq4JiSx2bDM", + "InQ3LbPQGUHtFzwvw2ZQqZKU9lFsuIrNd1TGxkdxTUlL6fP7y+KLNDyx4oUgfNOlpM1HfCJBC1TgfdKr", + "5n5JsDBOJZiZ7G6z2CiP5cCpbX3be5ZY4d6+K9RcnXxsNVorYyieQRIHgkuJSEQnkEns8rS8zGXugzFl", + "NNZ8dvVrcnmxDVBXJpxJX1oTuNNk4ySQvgvR45f1OVci4DC4X3ot/+Yd3nrSoxizFPJjFRCZvE+oMOjR", + "7G17yqUaZsEgay5WqqGWkWUqSB4x5u7LKbjfzw2Lgzbee9GxttuAyzo93Kr3Alb5h6pbYD1P9ULUD61O", + "hoM+NF4Mh1kagZOH9FTjN9YJ2MqT7lAJo9JCrBBqM65KbKmQOGajyTuTX0fV89SVQX252z9vGku1PHTq", + "DKvpCRtzT0bFNQz+1iHduR4kRMRUQtG3kDBKQqc8ZpZ/a9sCF/dIEhSmxELOCKQCW4BjQ96QOZE5oxhl", + "kwqvr07YxAxv1rA8xRLMaxs2eTGUfsfotyIFWJk3folw7iLdyGGByqHfUrw4sCCTNMICVeMFlyxZzuOI", + "susmo8t5POIRDZDuUH3OGfMo4jdD/Un+AnvZaLQ73WGYe/hVnmfM4qx/pzmQyrz5Fn7Ru9yoeJeD6WXT", + "9N+EOtdNHmC9bj/PaURsSN0Fo+8LiF7OQbK73a8LPKgZtBRysBiOuS7ntijro3gXKXmYlQrwuJcZB57K", + "q0TZEFnar2+34CG2LMxi0RSD2u5N1+V4KcO1kGulkSWkmZNa1XvBrWZTkqA8++7+o8d7DZPdfJatc0kl", + "4M+wbM7iJRbNmpM6bWI223+0/+TJzu6jJ9trGaico0vN+dQ5uxTPp1IRpGI0e9SH/1trUcbVxb+kGneX", + "8oJK1T1uvaBPS0g3D3KuefZYVoU/P0n3zlK2gDazMS6Rlg5LIlehgFWbjMcElMqhgVs3X0zFOb7RGgKc", + "4ICqucdggm9MovSsSSVYt4k1rbxYD0jt2DbfguZcMh3l/phtNzn6qzGtV3Bhv3HOLJmO6sz4r6uzGiN+", + "bgMqPhE1eKHJ0/Evmguy/dxgWfLq0H8HkOk4L1BWdf8xLZpXUna4nhVTzh0bfQHn/sLJxeOvHGfB7FsS", + "kqsQX3aF1pPgWjq050b21WZc7VRb4Q/2Arxdr+GomM1uabrAUuq7/NZdf95mpdUW+5kbbP35Ch6c63Ss", + "JvYCfLRrsCDPx+6UUKIGmxQXq/Mw30F6HuNTcKsEPdYd4V5y9Nif7yQvz8JxnBfc0JsHXbhe5bovrrB3", + "yZFsr9vf6fb33m7tHDzaO9jauosI0cyJpO4t/fGHrZvH0TYe70b788d/bk0fT7bjHa/b7R3k/65Usaqk", + "A7d7SIiopmSrpjKUJKKMdGXmf7LaE3BJ7Lcx6SZ4DkLeEo1sHTXAVZZfQrXn5U0WiRerHDjVikL3ESBg", + "V79Ul6ku/+R4+bJv5dBRXYgfwapLAXxqthjIWLD1RVO9w7sYUI8XkDWb8qFMySGyhMTvlnCw3yzh1nEq", + "G5dnV5inB3PE5By8SliTf14AlI/FLs9gVrmEjJNaMWFcFhP0ZdOXvbVHWhdBVn0cKqbVO+z+3aTRQ8Pe", + "weYvP//v7ru//sWfSrWkOkoiuiEZg8R8TeZdU9tFI1evnH7EVEyWCtsEpIrgGLhdcE0Md43x++J6H/Uz", + "W/b8FY4XtgCqRkxZ9u+VG/r5L/WCegHRLpKwSWH0u0jkojhKYXbUjomYuGxpzjwMRa+0mHNN5hIVosWt", + "Z4sjuZ9k1qVYXsXW3O5BUaARhaQbcsCwIAgHAUkUCXs2apbCWgQHfK4WGbJR6+45V2M6hlQd1r2mWQHv", + "FiM3XTND2NWos/toz9ZVK0Jya+GEfGdmPLTr6hloKHtuxpdUgvuBe2UrNEZtEidq7nKyuHeQjfU8xg+z", + "AWvK0n/RaNf+ky+Rm+NiaTKO77CaRtGh3y1opSv/wvnXRsD7DanH1cA6Q5M2Q3g5EKyS91iqbr2dNeYp", + "U0N4Lli0iepv5inCZp+YpNU0XJsxU5s2143P/SGEwj1LH59yKnPe7V3otPpNZamnTmFnhZXUn42J5lhM", + "MbEEQGcaNDdTIkjhIKBDnrBjTZDZh4EGTjUmI0VCRLeaQt5kORQUXhosgAxgNQiyx6PFF6rlAWmn+H02", + "A2gNWC5oOLCPPDQbKvVu9NAbl0qcjt0QsIxq9eanq7GoSRGzxcMoYtXivk17L+FZXrWE+9XRVgU58zlK", + "qPnOd5tJEqSCqvm5ZkM2bBZKIB6mBg2BP8Em4Od8ckhp8ukTPKCMPXbUF1qDpgE6PDuxBRAZyF/o8hRF", + "dEyCeRARm5FiIQoERI/XRyddk0onK4yrp6cKAOJK0RyenUBlCyHNvP3edq8PKJYQhhPaOmjt9LagzocG", + "A2xxEzKgwZ/2idR4R1HOTkJ7Oz81TXQvgWOioEzx756nRkWEyagmQRbCk4IwmmAqrDSaRPAAarRoqvtC", + "EJ5j8AfmlugYgOOm0WxSza05mCSv7bG+0+hg3JFgi9v9vkl3z5S9DnBe8WDzD+s2ls/bSMoA8Hgi0hak", + "TSfpWJB/6rR2+1trrWdlkQLftBcM20qaBJb5aE0g3GrSE2beqJCJdrBp+4t0BihUpLDf3+nzkmkcYzF3", + "4MphlXBZJ6IRiTCkSTfp/P7gox6y6jrk0JBTnkYhFOlOTCEozUYxUlj0Jh8QFsGUzsiA2dvDFJzAAnIM", + "xUjfGkaYLpOGmdqcfuYa9ZSH8wp0s+E29XAgbZUBXI3Vl2QIMYvDulydma0roYxBzn9JbGKDLGndAkc3", + "RVpkwL3VaQjDTOU1P0x1lmsCMRlj+t47YKPgIs3w4FgIFAPLclZtb/hf1SEFg98h5Tj7hix4y5ec1hFs", + "VdtMEnAPPViMcBR5vc8nER/hyBaxuSYewekFtLBAKWarcFcu4yExmQeSuZpyZv5ORylTqfl7JPiNJEJf", + "zDYDkYW1q+FpUBeqidEYsgCZ/IZ6zk2zxM2P12T+qTdgh2Hsclfa2vY4ktxW98mqvLqXAYO7/hwZNbau", + "I1vtz1TWKBYjMMvkqUpS1UNmI0TZtEnQHGpVyCkJB0xx9FGY0mTzT5sf8xk/gURNcKjxpNDEbGnzIw0/", + "1a1aDrHe/RCaenQSAgAYtPTtMmjpvycCa4k6lVNQsCUo1ZPikbYzj2AtrWxUIRxghhKepLaAsuYnULSo", + "NAakoMNRhBSQkuurZSA4yZr9WAcJXz516x1hnrMrZASZ1QvE1N/d99OTJIEgPrX7v89fv0JwVekzMM1y", + "YwbAyNTyRmEK8iXM3huwZziY2iK7EDE7aNFw0Mpk3nAD1ppK+3zT7YLg9Yte2i9mmg4Nf+n19FBGpjtA", + "v380oxxoWkrioeLXhA1anzqo8GFC1TQdZd/e+QFa98h8XmIEqG14/4ZLIArO7vk1aO4NzELELa+N5gij", + "nAMVtfsRZVgszX7qAb2FoFYw8UQWgfFxAGa5Qetg4Axzg1Zn0CJsBr9Z692g9ckPAZuttz480ySAtc1y", + "JNrr9zdWe39Z+HpE6FJDTX6fFqSv7S8meFiha1HwMJtzseX6BE0qXyNu3YPk8xRnBc1/iHgrRDyrTxeE", + "N+hfvAcM+kbE2PsrEphWwCMngS3VTgxaQHIF0Dicr6ZROKiT4HLkLaofVSVzUa3YraOyAJYYOfzbvQf8", + "g3nzclgw75P7mhdHpnCrKw7zsNARDsshYsevEb8g6lvAuP59sVJXte8r4u9DwZ8XxMp9OdAq3GyTzNwr", + "iN8jXQmCY2lHMY21rnoOa+qeE6bQM/i1Z//rNB7Ir3IV8cnVATIgjPgERZTZN6LCG4a+FC0soZMJSsz6", + "2QBeFw7YNvfnv/7xT1gUZZN//eOfWpo2fwG5b5oYDUgfcjUlWKgRwerqAP1GSNLFEZ0RtxkI8CczIuZo", + "pw9iZiLgk6fggBywAXtDVCpY4S3NROZJO6Ctf6f3Q1lKJJIAQqjoPLZhA8bs6VHhHS0bUN4rRXcWdC67", + "g8IG9K3ocAD8QKnJoWL1r5bfemb2XLKfVS24Czb91fxFkffKYG/XLHBNBgMg9tEdfLCbRu3z82cbPQQ6", + "hsEKCA0BiTkfxgrPvR88aTVPMhylzFAAyoY3FWpN1dp/j22bZgZgO+L3ZAGuK55VbwI2Jg8iSOjg9UNX", + "aGIO9sPNmYZ99tljV2u73kB7+/0Wp3AuKo0U4S93zg73FmFui87nIPsaKjBq2xrAWd73UmX7r4X093Jr", + "5KXe86sDcZNt/t7UsiPOxhENFOq6tUBquZhkqloZQR4KO3hjV42w21c1CLt4v22WYopqb7osvCi/8u7+", + "9qhMus41kgeK57j24yZZhTrHVAZc9y1gSzfAic16b8SXjE6LWLTKIHUMv2dXzlJxybLnk2NHkPdnmrJT", + "p6x6N9wDUzyuMMSvyAgrmbwLqRUeEjZfZKdo97XMcvVtoWb//qSg+7Zi+dD8IZmxwgrYNBecZvVd69DL", + "VoC9w4O2M3g2fk6Eo2qXCQd2nW3LdEXBlATXZkPwIL1c9z0xTZqpvma870nzNaV315BYLMh/iCgNlN0c", + "VssU3BOb3vzu9FuYYS319su981oE8wAZnE1GzmJtModjOWfBxnf11Hsvt5kB9oO8zM7SKHIvHjMiVF6A", + "uHgHbH4Et6TVsr2jtqXXwcWbl13CAg5+aJkPlV+IcnVBv6yEbw7MbOUHmjTRCQFUDjHqBejPOH/jLoiy", + "IlP/sf3clpn6j+3nptDUf+wcmlJTG3eGLP37Ys33LXE/YOTTAjctAw1Yk6neuUpCzVo1FFJd++9KTrWV", + "oNeRVDO4/hBWmwirRXAtlVezotx3KLHa+sVf50kmQzYftOGT80/8ziTV+7XyWYx0WWSoLD972DShXOQ1", + "gylDqSQP0IGSZhhXvDYamqtzglx6fTjUPTnu2HLQpohzFiByT8Zrt457F27tvPdvuT6MR3SS8lQWY0+g", + "+jeRNlgpImUG/NDE7vx6rhW8v2Es7d/n1XHvcvUPvL8jib96oIZ5mxeoVTK/a9VU5rftoc62KeFmYtfe", + "uNJwNvXNRo1Tocuf3hSNS3U+F50dfevy6SLoQisqubqAQIM4GLD/0vrH74rg+N0vLkgm7fe39+B3wmbv", + "fnFxMuzUoQphSlAiERYEHb46hme/CUSvQwK7PCSvug6Tlg5Qz6VV+bdTkPKXz+YaksPCHxpSIw2pAK7l", + "GlJWevAuVaRyZqV715EcvvkAblNr/NCS7kNLkul4TANKmMqzNi84idmk7w8wtozZ96GCc0fpom2sJeX1", + "QJcLoHmqwnt37Mkmv3/lyGVFfJg+8txExYROHckvw3p95FvDh/79Muf710MeMooZgX8RdImWKX3FACD/", + "YJwqcErMM4SA12dW8dqN2EN5Dn5balGaHIYgAENSZjXVArAv32E5haEvZ2HCI31DyM6AEYZHkf5sYvk3", + "r8ncZCikPK8tl+3UZiX0xV6VEzx+VTL68jKWP3tlIxnrnsnYZLr8ijLWV2Md9yJpndjoB6t0Z4QBCuWI", + "ZJTMs+A+KJC+8aA8UA2zyvZWyGfkEbU2xzYtvl8Fes7FdVOm4MkO/QB4Q3GH36D2pZcH+ZO+vhIG6omh", + "H4009843FlJ+f02ndVrlJEGUhpp1OBbiLt+x4PHQ/mjyTmqqsDXOQKkL7Khfm8no2e9BxX7FFaJxEhEt", + "95AQdQ026dPM6lKb5M1UFhLkr8cENdkUQwhM+i5prKodZMsIwHOEO7A2vEwuHpeXa0Z8sjptQDa5i5H3", + "5A0YMJNcmrhM1FcoY7JIcSRJRAKFbqY0mEIOAf0bjG9SDOAkucqSBm0coBdAqcXcSTB5WxKhRceAM8kj", + "YlIDzOL46mAxx+Xl6Sl0MukDTDbLqwPk8lpmF4TUrYo5AfQuIiwVemUzHbQ1JgkeReZEr7ScXdjfhs0W", + "kCd1GjBf5gBGbuyAdIyuCkkErmqyCDiG+pJP5NcSZTv1qfjMXqB0sAacwU3Cwlad6ZpG/vwBW31vveCG", + "uQzMMu44lcHCYl7ySZYGsITKOEmaoq9dJmDxLI6X4DBqF0psSBXyVP0sVUiEgM4Wu+uQG7VtgVqk8LVG", + "VGYLZLoiJYB+3gcak5fLCyrNVAtVDsy/ZnFsS6nHmHkqBX9+TojqgIsPCfpkCokfftgS1knpUGb2hZwO", + "lZvDVlOqF7ltkajv3qJlARV+D3pp+QUgXwVlTlSBs+V5lZQHFRtu6odVZTFT0sVHI1kBsnoqKT+bnefF", + "Y/4NVVSz12rVuHtWUjMQ+zSzUtGlr66dZjWgfmiomYbKBQpTM12lCtt3q3ZmDAWlrKR5WvH0trpnlkYz", + "AzNUx2VLnzxznrf50f15cgtx4RvhhJ3aWmR1CdvyTX8LLLemUue3+GTg5CR7rRYEhK/Igl3N0K/2tqDV", + "vYzLfRNs2BBcxo2LPEcJzCR15YB/MOOSGdBYSm/LjJ3wuWALLLBnyrpJhOv4spVTaxmwrU343etrua7y", + "nWtsARfCOMeCu+1DCs4u+AoUVM92glNJOhnBdJxnzuXp6UYd0Qi1lGSEesC+BpVi0XHor9YvaOjKcByd", + "HtuiHVQikbIeeh1TqI1xTUgCSXcpT6UpI9krVlSsq82ZlUwkTIl5wilTK1eRN72bxXy6VRmCe+ZTNj3F", + "d29WsuXfHxqTAt6hb2+7geVKlTKFRL3PdO7ZijJTO0QLH3jEUz36QsVHNKYRkXOpSGze7MZpBEQECYxs", + "fmvbz3jndhBVEml66IA3Y0JETKWknMkBG5GxlkoSIvTcUAaZRqTw/OB72TpXOOOaZ4b1fRtPW1AEEl5z", + "sKqDWrn+I04SV//R93ySlay89ZKew1sVkvN4xCMaoIiya4naEb02MjiaSRTpPzaWPnYNod+Xzt59e8rS", + "kD5hY+5NcGpwNkPm78Ohq8zW3GP+g2NrL0iRWBz/gYP2szW5kq8JgiMoc5wFEqBU0Yh+MKxOD0KlooGp", + "Cpe7sUJBK+vJOmCnRAndBguCAh5FJFDO1rCZCB5sDtJ+fydIKER87RBYHDC8+s8xzHh0dgHtTNGtzoDp", + "f8DAbw/PENUwHWOrMhcWyoi64eIanWy+XvH8fw5g+jfWx8wGl/qTeg/8x8vu+l7itTQka0iUJ8sUIJ58", + "9wYDK8H9sBY8TGsBhOlku2lPBA5AKJbTVIX8hvktA6YGtNz8aP44WRXspXAwvXTF8L8NadfWw141jdvg", + "gyBKu6eQmATMX8Veb0uWP9CEdRpwbgsgxBTD1vy3wKH6HrH7yz/WFeH4Db7UWYi65ObfDG3d981n1+Bi", + "mIvweChkbjDN7QSK8hatT1nA9krdLDABgpDsKhctA5zggKp5B+HI1Yu2BeAyG1I3u3JHguBrfdP2BuxN", + "FipuC9Bp7arjVCsUUnltRrDaUw+9nhEh01G2OASMyeh5AHxbMjrAUWBqLZPxmASKzogpgixrtK9sKXeZ", + "eDyfxHPQ7qMF3UNTOfw4AaeXo4WsYNymOepNQYII07ioelSBAwwSDP9gHBjpQTlDlI0jawQNBJcS2aG6", + "JKITOoqsSU/20NspQRLHZMCSCDNGoL40PNbqpXcTQaRM4fEfBoDKDgajOigPlEkEV9aYEHEupNH/NYZf", + "niKpSLIEzd6YkU1N7jtKzGEGtzN9pWulsgYzix/lDaj1gRhMMQDXeJRGSt6bO8hJ5gBiFvSj9nIzwn8r", + "6GRChKYKbJisMaAbsnbgNERfco+tzUp1nrVqlpUqG7XgAldwD1sa6DB0DYcgfq5jp/dMfk1rY2Hsp/Vc", + "Vn/TnRrOXXaN9C/CfvrMXX4vyX7PCx5pTXNZ5Rj+0NJKFVZeItWSV+fq/DaN3Tjv0q1yVX6bbPL7zm9z", + "7vXse2BZNnHJV7Musc23hwj9+w0puO/ENg8bt7T+IBdAV8+JGqR/+CYw8G7yPnzlkJpb5H34ppy8IW7/", + "6wXbfFPu3dZNOXPv/pHZ4S69uk16B4hir/PqNlzPPjctVZQubZtmapId8XuS4O0LxRryuwP7D62/gcpQ", + "AJbfZGcCyqRFeBInau5M0HwMznZ5qmRJP4DLri9aNntpursg1Vs8wnw59HB4WvsE8yOD7b298uRlPk6O", + "H37a2iLNlS6WTX3rdLEIpnRG6o3uZQq2IEoE6SY8gceV0ADMwsPdZQqL3uQDssP3BuztlLh/IepS5JAQ", + "hVSQQEVzRJniwBHMHD9JJLjWBOA7F3OfMb1Iuc8Fjw/tblbch5amrDEs9+2N590QK9ydOW6zxIT2Ge/U", + "p/g9jdMYGB6iDL14itrkvRImYwsaa80H0XEGUvI+ICSUgJMbxQVv9Wssm/QDGU5GTVa5JPfOa5vbCAWp", + "VDx2Z39yjNo4Vbw7IUyfhRb1xyDJJoLPaGhS/+dAnfHIQHWrBqDr2l21UGGDQHLlwizuq8gwTS6kyQea", + "lNmC8XVuHbRGlGFY3MosN2WaMm73ej5Mwfk1px2HOa0fV5jV/NpO2dGYqJUcB0TFOYq0RL/x45p7yNdc", + "0X3J3Wml265ZcvZmHk0NHY3uIjF75u12v2bry2/HCYfKB+l/Y03ns0whrTObf1so2L+/++G+zeWXD9hp", + "8wVxynfBVA4D6BF9CPOSBzhCIZmRiCeQt920bXVaqYhaB62pUsnB5mak2025VAf7/f1+69O7T/8/AAD/", + "/0qTWDyDPgEA", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/otel/README.md b/lib/otel/README.md index 525da933..afff2054 100644 --- a/lib/otel/README.md +++ b/lib/otel/README.md @@ -84,6 +84,22 @@ This keeps pull and push views aligned because both are sourced from the same OT | `hypeman_vmm_api_duration_seconds` | histogram | operation, status | CH API latency | | `hypeman_vmm_api_errors_total` | counter | operation | CH API errors | +### Guest Memory +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `hypeman_guestmemory_reconcile_total` | counter | trigger, status | Active ballooning reconcile cycles | +| `hypeman_guestmemory_reconcile_duration_seconds` | histogram | trigger, status | Reconcile latency | +| `hypeman_guestmemory_reclaim_actions_total` | counter | trigger, status, hypervisor | Per-VM reclaim action outcomes | +| `hypeman_guestmemory_pressure_transitions_total` | counter | from, to | Host pressure state transitions | +| `hypeman_guestmemory_sampler_errors_total` | counter | sampler | Host pressure sampling errors | +| `hypeman_guestmemory_reclaim_bytes` | histogram | trigger, kind | Reclaim byte targets and outcomes | +| `hypeman_guestmemory_host_available_bytes` | gauge | | Last observed host available memory | +| `hypeman_guestmemory_target_reclaim_bytes` | gauge | source | Current reclaim target (auto, manual, effective) | +| `hypeman_guestmemory_applied_reclaim_bytes` | gauge | | Current applied reclaim across eligible VMs | +| `hypeman_guestmemory_manual_hold_active` | gauge | | Whether a manual reclaim hold is active | +| `hypeman_guestmemory_eligible_vms_total` | gauge | | Eligible VM count seen by the controller | +| `hypeman_guestmemory_pressure_state` | gauge | | Current host pressure state (0 healthy, 1 pressure) | + ### Exec | Metric | Type | Labels | Description | |--------|------|--------|-------------| diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 815f9c3b..501f738d 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -135,6 +135,51 @@ func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager ima return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, defaultHypervisor, meter, tracer, memoryPolicy), nil } +// ProvideGuestMemoryController provides the active ballooning controller. +func ProvideGuestMemoryController(instanceManager instances.Manager, cfg *config.Config, log *slog.Logger) (guestmemory.Controller, error) { + pollInterval, err := parseRequiredDuration(cfg.Hypervisor.Memory.ActiveBallooning.PollInterval) + if err != nil { + return nil, fmt.Errorf("parse active ballooning poll interval: %w", err) + } + perVMCooldown, err := parseRequiredDuration(cfg.Hypervisor.Memory.ActiveBallooning.PerVmCooldown) + if err != nil { + return nil, fmt.Errorf("parse active ballooning per-vm cooldown: %w", err) + } + protectedFloorMinBytes, err := parseByteSize(cfg.Hypervisor.Memory.ActiveBallooning.ProtectedFloorMinBytes) + if err != nil { + return nil, fmt.Errorf("parse active ballooning protected floor: %w", err) + } + minAdjustmentBytes, err := parseByteSize(cfg.Hypervisor.Memory.ActiveBallooning.MinAdjustmentBytes) + if err != nil { + return nil, fmt.Errorf("parse active ballooning min adjustment: %w", err) + } + perVMMaxStepBytes, err := parseByteSize(cfg.Hypervisor.Memory.ActiveBallooning.PerVmMaxStepBytes) + if err != nil { + return nil, fmt.Errorf("parse active ballooning per-vm max step: %w", err) + } + + policy := guestmemory.Policy{ + Enabled: cfg.Hypervisor.Memory.Enabled, + KernelPageInitMode: guestmemory.KernelPageInitMode(cfg.Hypervisor.Memory.KernelPageInitMode), + ReclaimEnabled: cfg.Hypervisor.Memory.ReclaimEnabled, + VZBalloonRequired: cfg.Hypervisor.Memory.VZBalloonRequired, + } + + controllerCfg := guestmemory.ActiveBallooningConfig{ + Enabled: cfg.Hypervisor.Memory.ActiveBallooning.Enabled, + PollInterval: pollInterval, + PressureHighWatermarkAvailablePercent: cfg.Hypervisor.Memory.ActiveBallooning.PressureHighWatermarkAvailablePercent, + PressureLowWatermarkAvailablePercent: cfg.Hypervisor.Memory.ActiveBallooning.PressureLowWatermarkAvailablePercent, + ProtectedFloorPercent: cfg.Hypervisor.Memory.ActiveBallooning.ProtectedFloorPercent, + ProtectedFloorMinBytes: protectedFloorMinBytes, + MinAdjustmentBytes: minAdjustmentBytes, + PerVMMaxStepBytes: perVMMaxStepBytes, + PerVMCooldown: perVMCooldown, + } + + return guestmemory.NewController(policy, controllerCfg, &guestMemoryInstanceSource{manager: instanceManager}, log.With("component", "guestmemory")), nil +} + // ProvideVolumeManager provides the volume manager func ProvideVolumeManager(p *paths.Paths, cfg *config.Config) (volumes.Manager, error) { // Parse max total volume storage (empty or "0" means unlimited) @@ -151,6 +196,14 @@ func ProvideVolumeManager(p *paths.Paths, cfg *config.Config) (volumes.Manager, return volumes.NewManager(p, maxTotalVolumeStorage, meter), nil } +func parseRequiredDuration(value string) (time.Duration, error) { + value = strings.TrimSpace(value) + if value == "" { + return 0, fmt.Errorf("must not be empty") + } + return time.ParseDuration(value) +} + // ProvideRegistry provides the OCI registry for image push func ProvideRegistry(p *paths.Paths, imageManager images.Manager) (*registry.Registry, error) { return registry.New(p, imageManager) @@ -192,6 +245,40 @@ func ProvideVMMetricsManager(instanceManager instances.Manager, cfg *config.Conf return mgr, nil } +type guestMemoryInstanceSource struct { + manager instances.Manager +} + +func (s *guestMemoryInstanceSource) ListBalloonVMs(ctx context.Context) ([]guestmemory.BalloonVM, error) { + insts, err := s.manager.ListInstances(ctx, nil) + if err != nil { + return nil, err + } + + vms := make([]guestmemory.BalloonVM, 0, len(insts)) + for _, inst := range insts { + if inst.State != instances.StateRunning && inst.State != instances.StateInitializing { + continue + } + vms = append(vms, guestmemory.BalloonVM{ + ID: inst.Id, + Name: inst.Name, + HypervisorType: inst.HypervisorType, + SocketPath: inst.SocketPath, + AssignedMemoryBytes: inst.Size + inst.HotplugSize, + }) + } + return vms, nil +} + +func parseByteSize(value string) (int64, error) { + var size datasize.ByteSize + if err := size.UnmarshalText([]byte(value)); err != nil { + return 0, err + } + return int64(size), nil +} + // ProvideIngressManager provides the ingress manager func ProvideIngressManager(p *paths.Paths, cfg *config.Config, instanceManager instances.Manager) (ingress.Manager, error) { // Parse DNS provider - fail if invalid diff --git a/lib/scopes/scopes.go b/lib/scopes/scopes.go index f1ae6087..be2a7fed 100644 --- a/lib/scopes/scopes.go +++ b/lib/scopes/scopes.go @@ -53,8 +53,9 @@ const ( IngressWrite Scope = "ingress:write" IngressDelete Scope = "ingress:delete" - // Resource/health scopes (read-only) - ResourceRead Scope = "resource:read" + // Resource/health scopes + ResourceRead Scope = "resource:read" + ResourceWrite Scope = "resource:write" // Wildcard scope — grants all permissions All Scope = "*" @@ -69,7 +70,7 @@ var allScopes = []Scope{ BuildRead, BuildWrite, BuildDelete, DeviceRead, DeviceWrite, DeviceDelete, IngressRead, IngressWrite, IngressDelete, - ResourceRead, + ResourceRead, ResourceWrite, } // AllScopes returns the complete list of valid scopes (excluding wildcard). @@ -209,8 +210,9 @@ var RouteScopes = map[string]Scope{ "GET /devices/{id}": DeviceRead, // Health & Resources - "GET /health": ResourceRead, - "GET /resources": ResourceRead, + "GET /health": ResourceRead, + "GET /resources": ResourceRead, + "POST /resources/memory/reclaim": ResourceWrite, // Images "GET /images": ImageRead, diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 3ef4c103..27c45412 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -75,8 +75,9 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) return "", fmt.Errorf("download kernel headers: %w", err) } - // Generate timestamp for this build - timestamp := strconv.FormatInt(time.Now().Unix(), 10) + // Use a unique build identifier so concurrent rebuilds against a shared + // prewarmed cache do not collide on the output directory. + timestamp := strconv.FormatInt(time.Now().UnixNano(), 10) // Package as cpio.gz outputPath := m.paths.SystemInitrdTimestamp(timestamp, arch) @@ -95,12 +96,9 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) return "", fmt.Errorf("write hash file: %w", err) } - // Update 'latest' symlink + // Update 'latest' symlink atomically so parallel rebuilds can safely race. latestLink := m.paths.SystemInitrdLatest(arch) - // Remove old symlink if it exists - os.Remove(latestLink) - // Create new symlink (relative path) - if err := os.Symlink(timestamp, latestLink); err != nil { + if err := replaceSymlinkAtomic(latestLink, timestamp); err != nil { return "", fmt.Errorf("create latest symlink: %w", err) } diff --git a/lib/system/symlink.go b/lib/system/symlink.go new file mode 100644 index 00000000..83488a9d --- /dev/null +++ b/lib/system/symlink.go @@ -0,0 +1,27 @@ +package system + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "time" +) + +func replaceSymlinkAtomic(linkPath, target string) error { + tmpLink := filepath.Join( + filepath.Dir(linkPath), + ".tmp-"+filepath.Base(linkPath)+"-"+strconv.FormatInt(time.Now().UnixNano(), 10), + ) + + if err := os.Symlink(target, tmpLink); err != nil { + return err + } + + if err := os.Rename(tmpLink, linkPath); err != nil { + _ = os.Remove(tmpLink) + return fmt.Errorf("rename temp symlink: %w", err) + } + + return nil +} diff --git a/lib/system/symlink_test.go b/lib/system/symlink_test.go new file mode 100644 index 00000000..befddfae --- /dev/null +++ b/lib/system/symlink_test.go @@ -0,0 +1,28 @@ +package system + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestReplaceSymlinkAtomic(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + linkPath := filepath.Join(tmpDir, "latest") + + require.NoError(t, replaceSymlinkAtomic(linkPath, "first")) + + target, err := os.Readlink(linkPath) + require.NoError(t, err) + require.Equal(t, "first", target) + + require.NoError(t, replaceSymlinkAtomic(linkPath, "second")) + + target, err = os.Readlink(linkPath) + require.NoError(t, err) + require.Equal(t, "second", target) +} diff --git a/openapi.yaml b/openapi.yaml index a54bf367..4b0b59d4 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -1410,6 +1410,111 @@ components: items: $ref: "#/components/schemas/ResourceAllocation" + MemoryReclaimRequest: + type: object + required: [reclaim_bytes] + properties: + reclaim_bytes: + type: integer + format: int64 + minimum: 0 + description: Total bytes of guest memory to reclaim across eligible VMs. + example: 536870912 + hold_for: + type: string + description: How long to keep the reclaim hold active (Go duration string). Defaults to 5m when omitted. + example: 5m + dry_run: + type: boolean + description: Calculate a reclaim plan without applying balloon changes or creating a hold. + default: false + reason: + type: string + maxLength: 256 + description: Optional operator-provided reason attached to logs and traces. + example: prepare for another vm start + + MemoryReclaimAction: + type: object + required: + - instance_id + - instance_name + - hypervisor + - assigned_memory_bytes + - protected_floor_bytes + - previous_target_guest_memory_bytes + - planned_target_guest_memory_bytes + - target_guest_memory_bytes + - applied_reclaim_bytes + - status + properties: + instance_id: + type: string + instance_name: + type: string + hypervisor: + type: string + enum: [cloud-hypervisor, firecracker, qemu, vz] + assigned_memory_bytes: + type: integer + format: int64 + protected_floor_bytes: + type: integer + format: int64 + previous_target_guest_memory_bytes: + type: integer + format: int64 + planned_target_guest_memory_bytes: + type: integer + format: int64 + target_guest_memory_bytes: + type: integer + format: int64 + applied_reclaim_bytes: + type: integer + format: int64 + status: + type: string + description: Result of this VM's reclaim step. + example: applied + error: + type: string + description: Error message when status is error or unsupported. + + MemoryReclaimResponse: + type: object + required: + - requested_reclaim_bytes + - planned_reclaim_bytes + - applied_reclaim_bytes + - host_available_bytes + - host_pressure_state + - actions + properties: + requested_reclaim_bytes: + type: integer + format: int64 + planned_reclaim_bytes: + type: integer + format: int64 + applied_reclaim_bytes: + type: integer + format: int64 + hold_until: + type: string + format: date-time + description: When the current manual reclaim hold expires. + host_available_bytes: + type: integer + format: int64 + host_pressure_state: + type: string + enum: [healthy, pressure] + actions: + type: array + items: + $ref: "#/components/schemas/MemoryReclaimAction" + paths: /health: get: @@ -1446,6 +1551,48 @@ paths: application/json: schema: $ref: "#/components/schemas/Error" + + /resources/memory/reclaim: + post: + summary: Trigger proactive guest memory reclaim + description: | + Requests runtime balloon inflation across reclaim-eligible guests. The same + planner used by host-pressure reclaim is applied, including protected floors + and per-VM step limits. + operationId: reclaimMemory + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/MemoryReclaimRequest" + responses: + 200: + description: Reclaim plan and applied results + content: + application/json: + schema: + $ref: "#/components/schemas/MemoryReclaimResponse" + 400: + description: Invalid reclaim request + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" /images: get: diff --git a/stainless.yaml b/stainless.yaml index afeb7216..d1e746f4 100644 --- a/stainless.yaml +++ b/stainless.yaml @@ -152,8 +152,12 @@ resources: gpu_resource_status: "#/components/schemas/GPUResourceStatus" gpu_profile: "#/components/schemas/GPUProfile" passthrough_device: "#/components/schemas/PassthroughDevice" + memory_reclaim_request: "#/components/schemas/MemoryReclaimRequest" + memory_reclaim_action: "#/components/schemas/MemoryReclaimAction" + memory_reclaim_response: "#/components/schemas/MemoryReclaimResponse" methods: get: get /resources + reclaim_memory: post /resources/memory/reclaim builds: models: