Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/Masterminds/semver/v3 v3.4.0
github.com/PaesslerAG/jsonpath v0.1.1
github.com/alecthomas/assert/v2 v2.11.0
github.com/beclab/Olares/framework/app-service v0.0.0-20251225061130-909b7656fd70
github.com/beclab/Olares/framework/app-service v0.0.0-20260311124303-23a6533bc2ad
github.com/beclab/api v0.0.2
github.com/cavaliergopher/grab/v3 v3.0.1
github.com/containerd/containerd v1.7.29
Expand Down Expand Up @@ -200,6 +200,7 @@ require (
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
go.opentelemetry.io/otel v1.39.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 // indirect
go.opentelemetry.io/otel/metric v1.39.0 // indirect
go.opentelemetry.io/otel/trace v1.39.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions cli/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/beclab/Olares/framework/app-service v0.0.0-20251225061130-909b7656fd70 h1:U3z6m0hokD1gzl788BrUdxCbDyAjdOBBXA8ilYgn6VQ=
github.com/beclab/Olares/framework/app-service v0.0.0-20251225061130-909b7656fd70/go.mod h1:D9wl7y3obLqXMqfubMROMgdxWAwInnKNrFC//d0nyIA=
github.com/beclab/Olares/framework/app-service v0.0.0-20260311124303-23a6533bc2ad h1:nmQCNbJNtgTqcusySeeyd9LQOK2jyk78QAjzmvyyPYg=
github.com/beclab/Olares/framework/app-service v0.0.0-20260311124303-23a6533bc2ad/go.mod h1:D9wl7y3obLqXMqfubMROMgdxWAwInnKNrFC//d0nyIA=
github.com/beclab/api v0.0.2 h1:aD5RcMie2uqa/FZI7aQBa1F4yVEib8/x3IIZSLiHkBM=
github.com/beclab/api v0.0.2/go.mod h1:ESZLe8cf4934QFkU6cqbskKfiTyNk67i1qbv/ctS6js=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down Expand Up @@ -506,8 +506,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7Z
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 h1:1fTNlAIJZGWLP5FVu0fikVry1IsiUnXjf7QFvoNN3Xw=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0/go.mod h1:zjPK58DtkqQFn+YUMbx0M2XV3QgKU0gS9LeGohREyK4=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 h1:xJ2qHD0C1BeYVTLLR9sX12+Qb95kfeD/byKj6Ky1pXg=
Expand Down
47 changes: 47 additions & 0 deletions cli/pkg/upgrade/1_12_6_20260408.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package upgrade

import (
"time"

"github.com/Masterminds/semver/v3"
"github.com/beclab/Olares/cli/pkg/core/task"
)

type upgrader_1_12_6_20260408 struct {
breakingUpgraderBase
}

func (u upgrader_1_12_6_20260408) Version() *semver.Version {
return semver.MustParse("1.12.6-20260408")
}

func (u upgrader_1_12_6_20260408) PrepareForUpgrade() []task.Interface {
return u.upgraderBase.PrepareForUpgrade()
}

func (u upgrader_1_12_6_20260408) UpgradeSystemComponents() []task.Interface {
tasks := append([]task.Interface{}, u.upgraderBase.UpgradeSystemComponents()...)
tasks = append(tasks,
&task.LocalTask{
Name: "WaitForAppServiceReady",
Action: &waitForStatefulSetReady{
Namespace: "os-framework",
Name: "app-service",
InitDelay: 5 * time.Second,
},
Retry: 30,
Delay: 10 * time.Second,
},
&task.LocalTask{
Name: "BackfillAppGPUConfig",
Action: new(backfillAppGPUConfig),
Retry: 3,
Delay: 5 * time.Second,
},
)
return tasks
}

func init() {
registerMainUpgrader(upgrader_1_12_6_20260408{})
}
177 changes: 177 additions & 0 deletions cli/pkg/upgrade/task_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package upgrade

import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -30,8 +31,17 @@ import (
"github.com/beclab/Olares/cli/pkg/phase"
"github.com/beclab/Olares/cli/pkg/terminus"
"github.com/beclab/Olares/cli/pkg/utils"
appv1alpha1 "github.com/beclab/Olares/framework/app-service/api/app.bytetrade.io/v1alpha1"
"github.com/beclab/Olares/framework/app-service/pkg/appcfg"

"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
)

const cacheRebootNeeded = "reboot.needed"
Expand Down Expand Up @@ -461,3 +471,170 @@ func upgradeKubernetesPrometheusRule() []task.Interface {
},
}
}

type waitForStatefulSetReady struct {
common.KubeAction
Namespace string
Name string
InitDelay time.Duration
}

func (w *waitForStatefulSetReady) Execute(_ connector.Runtime) error {
if w.InitDelay > 0 {
logger.Infof("waiting %s before checking statefulset %s/%s", w.InitDelay, w.Namespace, w.Name)
time.Sleep(w.InitDelay)
}

config, err := ctrl.GetConfig()
if err != nil {
return errors.Wrap(errors.WithStack(err), "failed to get kubernetes config")
}

scheme := kruntime.NewScheme()
if err := appsv1.AddToScheme(scheme); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to add apps/v1 scheme")
}

c, err := ctrlclient.New(config, ctrlclient.Options{Scheme: scheme})
if err != nil {
return errors.Wrap(errors.WithStack(err), "failed to create client")
}

ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

var sts appsv1.StatefulSet
key := ctrlclient.ObjectKey{Namespace: w.Namespace, Name: w.Name}
if err := c.Get(ctx, key, &sts); err != nil {
return errors.Wrapf(errors.WithStack(err), "failed to get statefulset %s/%s", w.Namespace, w.Name)
}

if sts.Status.ObservedGeneration < sts.Generation {
return fmt.Errorf("statefulset %s/%s rollout not observed yet (generation %d, observed %d)",
w.Namespace, w.Name, sts.Generation, sts.Status.ObservedGeneration)
}

replicas := int32(1)
if sts.Spec.Replicas != nil {
replicas = *sts.Spec.Replicas
}

if sts.Status.UpdatedReplicas < replicas {
return fmt.Errorf("statefulset %s/%s not fully updated: %d/%d updated",
w.Namespace, w.Name, sts.Status.UpdatedReplicas, replicas)
}

if sts.Status.ReadyReplicas < replicas {
return fmt.Errorf("statefulset %s/%s not ready: %d/%d ready",
w.Namespace, w.Name, sts.Status.ReadyReplicas, replicas)
}

if sts.Status.CurrentRevision != sts.Status.UpdateRevision {
return fmt.Errorf("statefulset %s/%s revision mismatch: current=%s update=%s",
w.Namespace, w.Name, sts.Status.CurrentRevision, sts.Status.UpdateRevision)
}

logger.Infof("statefulset %s/%s is ready", w.Namespace, w.Name)
return nil
}

type backfillAppGPUConfig struct {
common.KubeAction
}

func (a *backfillAppGPUConfig) Execute(_ connector.Runtime) error {
config, err := ctrl.GetConfig()
if err != nil {
return errors.Wrap(errors.WithStack(err), "failed to get kubernetes config")
}

scheme := kruntime.NewScheme()
if err := appv1alpha1.AddToScheme(scheme); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to add app-service scheme")
}
if err := appsv1.AddToScheme(scheme); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to add apps/v1 scheme")
}
if err := corev1.AddToScheme(scheme); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to add corev1 scheme")
}

c, err := ctrlclient.New(config, ctrlclient.Options{Scheme: scheme})
if err != nil {
return errors.Wrap(errors.WithStack(err), "failed to create controller-runtime client")
}

ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()

var amList appv1alpha1.ApplicationManagerList
if err := c.List(ctx, &amList); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to list applicationmanagers")
}
var gpuType = "nvidia"
var nodeList corev1.NodeList
if err := c.List(ctx, &nodeList); err != nil {
return errors.Wrap(errors.WithStack(err), "failed to list nodes")
}
for _, node := range nodeList.Items {
annoGpuType := node.Annotations["gpu.bytetrade.io/type"]
if annoGpuType != "" {
gpuType = annoGpuType
break
}
}

patchedCount := 0
for i := range amList.Items {
am := &amList.Items[i]
if am.Spec.Config == "" {
continue
}

var appCfg appcfg.ApplicationConfig
if err := json.Unmarshal([]byte(am.Spec.Config), &appCfg); err != nil {
return errors.Wrapf(errors.WithStack(err), "failed to unmarshal config for applicationmanager %s", am.Name)
}

if appCfg.RequiredGPU == "" {
continue
}

modified := false

if appCfg.SelectedGpuType == "" {
appCfg.SelectedGpuType = gpuType
modified = true
}

if !modified {
continue
}

updatedConfig, err := json.Marshal(&appCfg)
if err != nil {
return errors.Wrapf(errors.WithStack(err), "failed to marshal updated config for %s", am.Name)
}

patchObj := map[string]interface{}{
"spec": map[string]interface{}{
"config": string(updatedConfig),
},
}
patchContent, err := json.Marshal(patchObj)
if err != nil {
return errors.Wrapf(errors.WithStack(err), "failed to build patch for %s", am.Name)
}

if err := c.Patch(ctx, am, ctrlclient.RawPatch(types.MergePatchType, patchContent)); err != nil {
return errors.Wrapf(errors.WithStack(err), "failed to patch applicationmanager %s", am.Name)
}

logger.Infof("backfilled GPU config for applicationmanager %s", am.Name)
patchedCount++

}

logger.Infof("backfilled GPU config for %d applicationmanagers", patchedCount)
return nil
}
Loading