diff --git a/CHANGELOG.md b/CHANGELOG.md index fe99ed50..3285d0a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## unreleased -## v4.0.0-beta1 - 2026.02.27 +## v4.0.0 - 2026.03.30 ⚠️ See the [update instructions](https://github.com/cloudscale-ch/csi-cloudscale#from-csi-cloudscale-v3x-to-v4x). **Breaking change:** This release **requires** Kubernetes VolumeSnapshot CRDs and the external snapshot controller to be installed **before** installing or upgrading this driver. Clusters without these components must install them first. Otherwise, snapshot-related operations and the snapshot resource in the Helm chart will fail to work. @@ -8,10 +8,15 @@ * Add CSI snapshot support, including create, delete, list, and creating volumes from snapshots. * Add `VolumeSnapshotClass` and other snapshot-related resources to the Helm chart and examples. When using Helm, snapshot classes are now created from the `csi.snapshotClasses` configuration. * Cleaned up RBAC rules in Helm chart. +* Prevent silent volume migration in ControllerPublishVolume. * Update `github.com/cloudscale-ch/cloudscale-go-sdk` fron `v6.0.1` to `v7.0.0`. -* Bump `google.golang.org/grpc` from `v1.77.0` to `v1.79.1`. * Bump `github.com/sirupsen/logrus` from `v1.9.3` to `v1.9.4`. -* Bump `golang.org/x/sys` from `v0.39.0` to `v0.41.0`. +* Bump `google.golang.org/grpc` from `v1.77.0` to `v1.79.3`. +* Bump `golang.org/x/sys` from `v0.39.0` to `v0.42.0`. +* Bump `golang.org/x/oauth2` from `v0.35.0` to `v0.36.0`. + +## v3.6.1 - 2026.03.27 +* Backport for 3.6.x: Prevent silent volume migration in ControllerPublishVolume. ## v3.6.0 - 2026.01.15 diff --git a/README.md b/README.md index dd388c81..75036573 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ secret `my-pvc-luks-key`. ## Releases The cloudscale.ch CSI plugin follows [semantic versioning](https://semver.org/). -The current version is: **`v4.0.0-beta1`**. +The current version is: **`v4.0.0`**. * Bug fixes will be released as a `PATCH` update. * New features (such as CSI spec bumps) will be released as a `MINOR` update. @@ -92,14 +92,14 @@ We recommend using the latest cloudscale.ch CSI driver compatible with your Kube | 1.25 | v3.3.0 | v3.5.6 | | 1.26 | v3.3.0 | v3.5.6 | | 1.27 | v3.3.0 | v3.5.6 | -| 1.28 | v3.3.0 | v4.0.0-beta1 | -| 1.29 | v3.3.0 | v4.0.0-beta1 | -| 1.30 | v3.3.0 | v4.0.0-beta1 | -| 1.31 | v3.3.0 | v4.0.0-beta1 | -| 1.32 | v3.3.0 | v4.0.0-beta1 | -| 1.33 | v3.3.0 | v4.0.0-beta1 | -| 1.34 [1] | v3.3.0 | v4.0.0-beta1 | -| 1.35 | v3.4.1 | v4.0.0-beta1 | +| 1.28 | v3.3.0 | v4.0.0 | +| 1.29 | v3.3.0 | v4.0.0 | +| 1.30 | v3.3.0 | v4.0.0 | +| 1.31 | v3.3.0 | v4.0.0 | +| 1.32 | v3.3.0 | v4.0.0 | +| 1.33 | v3.3.0 | v4.0.0 | +| 1.34 [1] | v3.3.0 | v4.0.0 | +| 1.35 | v3.4.1 | v4.0.0 | [1] Prometheus `kubelet_volume_stats_*` metrics not available in 1.34.0 and 1.34.1 due to a [bug in Kubelet](https://github.com/kubernetes/kubernetes/issues/133847). Fixed in `1.34.2`. @@ -215,10 +215,10 @@ $ helm install -g -n kube-system --set controller.image.tag=dev --set node.image Before you continue, be sure to checkout to a [tagged release](https://github.com/cloudscale-ch/csi-cloudscale/releases). Always use the [latest stable version](https://github.com/cloudscale-ch/csi-cloudscale/releases/latest) -For example, to use the latest stable version (`v4.0.0-beta1`) you can execute the following command: +For example, to use the latest stable version (`v4.0.0`) you can execute the following command: ``` -$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v4.0.0-beta1.yaml +$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v4.0.0.yaml ``` The storage classes `cloudscale-volume-ssd` and `cloudscale-volume-bulk` will be created. The @@ -445,15 +445,15 @@ $ git push origin After it's merged to master, [create a new Github release](https://github.com/cloudscale-ch/csi-cloudscale/releases/new) from -master with the version `v4.0.0-beta1` and then publish a new docker build: +master with the version `v4.0.0` and then publish a new docker build: ``` $ git checkout master $ make publish ``` -This will create a binary with version `v4.0.0-beta1` and docker image pushed to -`cloudscalech/cloudscale-csi-plugin:v4.0.0-beta1` +This will create a binary with version `v4.0.0` and docker image pushed to +`cloudscalech/cloudscale-csi-plugin:v4.0.0` ### Release a pre-release version diff --git a/VERSION b/VERSION index 1ff6dedb..857572fc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v4.0.0-beta1 +v4.0.0 diff --git a/charts/csi-cloudscale/Chart.yaml b/charts/csi-cloudscale/Chart.yaml index f437149b..a6183815 100644 --- a/charts/csi-cloudscale/Chart.yaml +++ b/charts/csi-cloudscale/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: csi-cloudscale description: A Container Storage Interface Driver for cloudscale.ch volumes. type: application -version: 1.5.0-beta1 -appVersion: "4.0.0-beta1" +version: 1.5.0 +appVersion: "4.0.0" home: https://github.com/cloudscale-ch/csi-cloudscale sources: - https://github.com/cloudscale-ch/csi-cloudscale.git diff --git a/charts/csi-cloudscale/values.yaml b/charts/csi-cloudscale/values.yaml index f940c1eb..1d45c3bc 100644 --- a/charts/csi-cloudscale/values.yaml +++ b/charts/csi-cloudscale/values.yaml @@ -107,7 +107,7 @@ controller: image: registry: quay.io repository: cloudscalech/cloudscale-csi-plugin - tag: v4.0.0-beta1 + tag: v4.0.0 pullPolicy: IfNotPresent serviceAccountName: logLevel: info @@ -123,7 +123,7 @@ node: image: registry: quay.io repository: cloudscalech/cloudscale-csi-plugin - tag: v4.0.0-beta1 + tag: v4.0.0 pullPolicy: IfNotPresent nodeSelector: {} tolerations: [] diff --git a/deploy/kubernetes/releases/csi-cloudscale-v4.0.0.yaml b/deploy/kubernetes/releases/csi-cloudscale-v4.0.0.yaml new file mode 100644 index 00000000..d0d68fc4 --- /dev/null +++ b/deploy/kubernetes/releases/csi-cloudscale-v4.0.0.yaml @@ -0,0 +1,478 @@ +--- +# Source: csi-cloudscale/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-controller-sa + namespace: kube-system +--- +# Source: csi-cloudscale/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-node-sa + namespace: kube-system +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-ssd + namespace: kube-system + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: ssd +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-ssd-luks + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: ssd + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-bulk + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: bulk +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-bulk-luks + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: bulk + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "patch", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["get", "list"] + - apiGroups: [ "coordination.k8s.io" ] + resources: [ "leases" ] + verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] + - apiGroups: [ "storage.k8s.io" ] + resources: [ "csinodes" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments/status"] + verbs: ["patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-snapshotter-role +rules: + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: [ "get", "list", "watch", "update" ] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["update", "patch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattributesclasses"] + verbs: ["get", "list", "watch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-node-driver-registrar-role + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-provisioner-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-snapshotter-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-snapshotter-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-resizer-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-attacher-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-node-driver-registrar-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-node-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-node-driver-registrar-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/daemonset.yaml +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-node + namespace: kube-system +spec: + selector: + matchLabels: + app: csi-cloudscale-node + template: + metadata: + labels: + app: csi-cloudscale-node + role: csi-cloudscale + spec: + priorityClassName: system-node-critical + serviceAccountName: csi-cloudscale-node-sa + hostNetwork: true + containers: + - name: csi-node-driver-registrar + image: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.15.0" + imagePullPolicy: IfNotPresent + args: + - "--v=5" + - "--csi-address=$(ADDRESS)" + - "--kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)" + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "rm -rf /registration/csi.cloudscale.ch /registration/csi.cloudscale.ch-reg.sock"] + env: + - name: ADDRESS + value: /csi/csi.sock + - name: DRIVER_REG_SOCK_PATH + value: /var/lib/kubelet/plugins/csi.cloudscale.ch/csi.sock + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: plugin-dir + mountPath: /csi/ + - name: registration-dir + mountPath: /registration/ + - name: csi-cloudscale-plugin + image: "quay.io/cloudscalech/cloudscale-csi-plugin:v4.0.0" + imagePullPolicy: IfNotPresent + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level=info" + env: + - name: CSI_ENDPOINT + value: unix:///csi/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_MAX_CSI_VOLUMES_PER_NODE + value: "125" + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + volumeMounts: + - name: plugin-dir + mountPath: /csi + - name: pods-mount-dir + mountPath: /var/lib/kubelet + # needed so that any mounts setup inside this container are + # propagated back to the host machine. + mountPropagation: "Bidirectional" + - name: device-dir + mountPath: /dev + - name: tmpfs + mountPath: /tmp + volumes: + - name: registration-dir + hostPath: + path: /var/lib/kubelet/plugins_registry/ + type: DirectoryOrCreate + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi.cloudscale.ch + type: DirectoryOrCreate + - name: pods-mount-dir + hostPath: + path: /var/lib/kubelet + type: Directory + - name: device-dir + hostPath: + path: /dev + # to make sure temporary stored luks keys never touch a disk + - name: tmpfs + emptyDir: + medium: Memory +--- +# Source: csi-cloudscale/templates/statefulset.yaml +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-controller + namespace: kube-system +spec: + serviceName: "csi-cloudscale" + selector: + matchLabels: + app: csi-cloudscale-controller + replicas: 1 + template: + metadata: + labels: + app: csi-cloudscale-controller + role: csi-cloudscale + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + serviceAccount: csi-cloudscale-controller-sa + containers: + - name: csi-provisioner + image: "registry.k8s.io/sig-storage/csi-provisioner:v5.3.0" + imagePullPolicy: IfNotPresent + args: + - "--csi-address=$(ADDRESS)" + - "--default-fstype=ext4" + - "--v=5" + - "--feature-gates=Topology=false" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-attacher + image: "registry.k8s.io/sig-storage/csi-attacher:v4.10.0" + imagePullPolicy: IfNotPresent + args: + - "--csi-address=$(ADDRESS)" + - "--v=5" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-resizer + image: "registry.k8s.io/sig-storage/csi-resizer:v2.0.0" + args: + - "--csi-address=$(ADDRESS)" + - "--timeout=30s" + - "--v=5" + - "--handle-volume-inuse-error=false" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + imagePullPolicy: IfNotPresent + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-snapshotter + image: "registry.k8s.io/sig-storage/csi-snapshotter:v8.4.0" + args: + - "--csi-address=$(CSI_ENDPOINT)" + - "--v=5" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-cloudscale-plugin + image: "quay.io/cloudscalech/cloudscale-csi-plugin:v4.0.0" + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level=info" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + imagePullPolicy: IfNotPresent + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + volumes: + - name: socket-dir + emptyDir: {} +--- +# Source: csi-cloudscale/templates/csi_driver.yaml +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: csi.cloudscale.ch +spec: + attachRequired: true + podInfoOnMount: true +--- +# Source: csi-cloudscale/templates/volumesnapshotclass.yaml +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: cloudscale-snapshots +driver: csi.cloudscale.ch +deletionPolicy: Delete diff --git a/driver/controller.go b/driver/controller.go index b80dcaa9..8e220b95 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -398,6 +398,11 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) return nil, status.Error(codes.InvalidArgument, "DeleteVolume Volume ID must be provided") } + if acquired := d.volumeLocks.TryAcquire(req.VolumeId); !acquired { + return nil, status.Errorf(codes.Aborted, "an operation with the given Volume ID %s already exists", req.VolumeId) + } + defer d.volumeLocks.Release(req.VolumeId) + ll := d.log.WithFields(logrus.Fields{ "volume_id": req.VolumeId, "method": "delete_volume", @@ -461,6 +466,11 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle return nil, status.Error(codes.AlreadyExists, "read only Volumes are not supported") } + if acquired := d.volumeLocks.TryAcquire(req.VolumeId); !acquired { + return nil, status.Errorf(codes.Aborted, "an operation with the given Volume ID %s already exists", req.VolumeId) + } + defer d.volumeLocks.Release(req.VolumeId) + ll := d.log.WithFields(logrus.Fields{ "volume_id": req.VolumeId, "node_id": req.NodeId, @@ -468,10 +478,46 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle }) ll.Info("controller publish volume called") + // Check current attachment state before modifying it. This prevents + // silently moving a volume that is still attached to another node, + // which would cause a stale VolumeAttachment and Multi-Attach errors. + volume, err := d.cloudscaleClient.Volumes.Get(ctx, req.VolumeId) + if err != nil { + return nil, reraiseNotFound(err, ll, "fetch volume for publish") + } + + if volume.ServerUUIDs != nil && len(*volume.ServerUUIDs) > 0 { + alreadyAttachedToRequestedNode := false + for _, serverUUID := range *volume.ServerUUIDs { + if serverUUID == req.NodeId { + alreadyAttachedToRequestedNode = true + break + } + } + + if alreadyAttachedToRequestedNode { + ll.Info("volume is already attached to the requested node") + return &csi.ControllerPublishVolumeResponse{ + PublishContext: map[string]string{ + PublishInfoVolumeName: volume.Name, + LuksEncryptedAttribute: req.VolumeContext[LuksEncryptedAttribute], + LuksCipherAttribute: req.VolumeContext[LuksCipherAttribute], + LuksKeySizeAttribute: req.VolumeContext[LuksKeySizeAttribute], + }, + }, nil + } + + ll.WithField("current_server_uuids", *volume.ServerUUIDs). + Warn("volume is already attached to a different node") + return nil, status.Errorf(codes.FailedPrecondition, + "volume %s is already attached to server(s) %v, must be detached first", + req.VolumeId, *volume.ServerUUIDs) + } + attachRequest := &cloudscale.VolumeUpdateRequest{ ServerUUIDs: &[]string{req.NodeId}, } - err := d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, attachRequest) + err = d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, attachRequest) if err != nil { if maxVolumesPerServerErrorMessageRe.MatchString(err.Error()) { return nil, status.Error(codes.ResourceExhausted, err.Error()) @@ -481,10 +527,6 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle } ll.Info("volume is attached") - volume, err := d.cloudscaleClient.Volumes.Get(ctx, req.VolumeId) - if err != nil { - return nil, reraiseNotFound(err, ll, "fetch volume") - } return &csi.ControllerPublishVolumeResponse{ PublishContext: map[string]string{ PublishInfoVolumeName: volume.Name, @@ -501,6 +543,11 @@ func (d *Driver) ControllerUnpublishVolume(ctx context.Context, req *csi.Control return nil, status.Error(codes.InvalidArgument, "ControllerPublishVolume Volume ID must be provided") } + if acquired := d.volumeLocks.TryAcquire(req.VolumeId); !acquired { + return nil, status.Errorf(codes.Aborted, "an operation with the given Volume ID %s already exists", req.VolumeId) + } + defer d.volumeLocks.Release(req.VolumeId) + ll := d.log.WithFields(logrus.Fields{ "volume_id": req.VolumeId, "node_id": req.NodeId, @@ -710,6 +757,11 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ return nil, status.Error(codes.InvalidArgument, "CreateSnapshotRequest Source Volume Id must be provided") } + if acquired := d.volumeLocks.TryAcquire(req.SourceVolumeId); !acquired { + return nil, status.Errorf(codes.Aborted, "an operation with the given Volume ID %s already exists", req.SourceVolumeId) + } + defer d.volumeLocks.Release(req.SourceVolumeId) + ll := d.log.WithFields(logrus.Fields{ "source_volume_id": req.SourceVolumeId, "name": req.Name, @@ -941,6 +993,12 @@ func (d *Driver) ControllerExpandVolume(ctx context.Context, req *csi.Controller if len(volID) == 0 { return nil, status.Error(codes.InvalidArgument, "ControllerExpandVolume volume ID missing in request") } + + if acquired := d.volumeLocks.TryAcquire(volID); !acquired { + return nil, status.Errorf(codes.Aborted, "an operation with the given Volume ID %s already exists", volID) + } + defer d.volumeLocks.Release(volID) + volume, err := d.cloudscaleClient.Volumes.Get(ctx, volID) if err != nil { return nil, status.Errorf(codes.Internal, "ControllerExpandVolume could not retrieve existing volume: %v", err) diff --git a/driver/driver_test.go b/driver/driver_test.go index 420640b4..e1af211c 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -1337,3 +1337,262 @@ func TestCreateVolumeFromSnapshot_Idempotent_NeedsExpansion(t *testing.T) { assert.NoError(t, err) assert.Equal(t, expandedSizeGiB, vol.SizeGB) } + +// TestControllerPublishVolume_RejectsWhenAttachedToDifferentNode tests that +// ControllerPublishVolume returns FailedPrecondition when the volume is +// already attached to a different node, preventing silent volume migration. +func TestControllerPublishVolume_RejectsWhenAttachedToDifferentNode(t *testing.T) { + serverA := "server-a-uuid" + serverB := "server-b-uuid" + initialServers := map[string]*cloudscale.Server{ + serverA: {UUID: serverA}, + serverB: {UUID: serverB}, + } + cloudscaleClient := NewFakeClient(initialServers) + + driver := &Driver{ + endpoint: "unix:///tmp/csi-test.sock", + serverId: serverA, + zone: DefaultZone.Slug, + cloudscaleClient: cloudscaleClient, + mounter: &fakeMounter{mounted: map[string]string{}}, + log: logrus.New().WithField("test_enabled", true), + volumeLocks: NewVolumeLocks(), + } + + ctx := context.Background() + volumeID := createVolumeForTest(t, driver, "test-vol-multiattach") + + // Attach volume to server A + _, err := driver.ControllerPublishVolume(ctx, &csi.ControllerPublishVolumeRequest{ + VolumeId: volumeID, + NodeId: serverA, + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{}, + }, + }, + }) + if err != nil { + t.Fatalf("Failed to publish volume to server A: %v", err) + } + + // Try to attach the same volume to server B — should be rejected + _, err = driver.ControllerPublishVolume(ctx, &csi.ControllerPublishVolumeRequest{ + VolumeId: volumeID, + NodeId: serverB, + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{}, + }, + }, + }) + if err == nil { + t.Fatal("Expected FailedPrecondition error when publishing to different node, got nil") + } + + st, ok := status.FromError(err) + if !ok { + t.Fatalf("Expected gRPC status error, got: %v", err) + } + if st.Code() != codes.FailedPrecondition { + t.Errorf("Expected codes.FailedPrecondition, got %v: %v", st.Code(), err) + } + + // Verify the volume is still attached to server A (not silently moved) + vol, err := cloudscaleClient.Volumes.Get(ctx, volumeID) + if err != nil { + t.Fatalf("Failed to get volume: %v", err) + } + if len(*vol.ServerUUIDs) != 1 || (*vol.ServerUUIDs)[0] != serverA { + t.Errorf("Volume should still be attached to server A, got ServerUUIDs=%v", *vol.ServerUUIDs) + } +} + +// TestControllerPublishVolume_IdempotentSameNode tests that calling +// ControllerPublishVolume for a volume already attached to the same node +// returns success without error. +func TestControllerPublishVolume_IdempotentSameNode(t *testing.T) { + serverA := "server-a-uuid" + initialServers := map[string]*cloudscale.Server{ + serverA: {UUID: serverA}, + } + cloudscaleClient := NewFakeClient(initialServers) + + driver := &Driver{ + endpoint: "unix:///tmp/csi-test.sock", + serverId: serverA, + zone: DefaultZone.Slug, + cloudscaleClient: cloudscaleClient, + mounter: &fakeMounter{mounted: map[string]string{}}, + log: logrus.New().WithField("test_enabled", true), + volumeLocks: NewVolumeLocks(), + } + + ctx := context.Background() + volumeID := createVolumeForTest(t, driver, "test-vol-idempotent") + + publishReq := &csi.ControllerPublishVolumeRequest{ + VolumeId: volumeID, + NodeId: serverA, + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{}, + }, + }, + VolumeContext: map[string]string{ + LuksEncryptedAttribute: "false", + }, + } + + // First publish + resp1, err := driver.ControllerPublishVolume(ctx, publishReq) + if err != nil { + t.Fatalf("First publish failed: %v", err) + } + + // Second publish to same node — should succeed (idempotent) + resp2, err := driver.ControllerPublishVolume(ctx, publishReq) + if err != nil { + t.Fatalf("Second publish (idempotent) failed: %v", err) + } + + // Both responses should have the same publish context + if resp1.PublishContext[PublishInfoVolumeName] != resp2.PublishContext[PublishInfoVolumeName] { + t.Errorf("Publish context mismatch: %v vs %v", resp1.PublishContext, resp2.PublishContext) + } +} + +// TestControllerPublishVolume_SucceedsWhenNotAttached tests that +// ControllerPublishVolume works normally when the volume is not attached. +func TestControllerPublishVolume_SucceedsWhenNotAttached(t *testing.T) { + serverA := "server-a-uuid" + initialServers := map[string]*cloudscale.Server{ + serverA: {UUID: serverA}, + } + cloudscaleClient := NewFakeClient(initialServers) + + driver := &Driver{ + endpoint: "unix:///tmp/csi-test.sock", + serverId: serverA, + zone: DefaultZone.Slug, + cloudscaleClient: cloudscaleClient, + mounter: &fakeMounter{mounted: map[string]string{}}, + log: logrus.New().WithField("test_enabled", true), + volumeLocks: NewVolumeLocks(), + } + + ctx := context.Background() + volumeID := createVolumeForTest(t, driver, "test-vol-attach") + + resp, err := driver.ControllerPublishVolume(ctx, &csi.ControllerPublishVolumeRequest{ + VolumeId: volumeID, + NodeId: serverA, + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{}, + }, + }, + }) + if err != nil { + t.Fatalf("Publish failed: %v", err) + } + + if resp.PublishContext[PublishInfoVolumeName] == "" { + t.Error("Expected non-empty volume name in publish context") + } + + // Verify volume is attached to the server + vol, err := cloudscaleClient.Volumes.Get(ctx, volumeID) + if err != nil { + t.Fatalf("Failed to get volume: %v", err) + } + if len(*vol.ServerUUIDs) != 1 || (*vol.ServerUUIDs)[0] != serverA { + t.Errorf("Expected volume attached to server A, got ServerUUIDs=%v", *vol.ServerUUIDs) + } +} + +// TestControllerOperations_VolumeLocks tests that concurrent controller +// operations on the same volume are properly serialized with volume locks. +func TestControllerOperations_VolumeLocks(t *testing.T) { + driver := createDriverForTest(t) + ctx := context.Background() + volumeID := createVolumeForTest(t, driver, "test-vol-locks") + + // Pre-acquire the volume lock + if !driver.volumeLocks.TryAcquire(volumeID) { + t.Fatal("Failed to pre-acquire volume lock") + } + + // ControllerPublishVolume should return Aborted + _, err := driver.ControllerPublishVolume(ctx, &csi.ControllerPublishVolumeRequest{ + VolumeId: volumeID, + NodeId: "some-node", + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{}, + }, + }, + }) + assertAbortedError(t, err, "ControllerPublishVolume") + + // ControllerUnpublishVolume should return Aborted + _, err = driver.ControllerUnpublishVolume(ctx, &csi.ControllerUnpublishVolumeRequest{ + VolumeId: volumeID, + NodeId: "some-node", + }) + assertAbortedError(t, err, "ControllerUnpublishVolume") + + // DeleteVolume should return Aborted + _, err = driver.DeleteVolume(ctx, &csi.DeleteVolumeRequest{ + VolumeId: volumeID, + }) + assertAbortedError(t, err, "DeleteVolume") + + // ControllerExpandVolume should return Aborted + _, err = driver.ControllerExpandVolume(ctx, &csi.ControllerExpandVolumeRequest{ + VolumeId: volumeID, + CapacityRange: &csi.CapacityRange{RequiredBytes: 10 * GB}, + }) + assertAbortedError(t, err, "ControllerExpandVolume") + + // CreateSnapshot should return Aborted (locks on source volume ID) + _, err = driver.CreateSnapshot(ctx, &csi.CreateSnapshotRequest{ + Name: "snap-locked", + SourceVolumeId: volumeID, + }) + assertAbortedError(t, err, "CreateSnapshot") + + driver.volumeLocks.Release(volumeID) +} + +func assertAbortedError(t *testing.T, err error, opName string) { + t.Helper() + if err == nil { + t.Errorf("%s: expected Aborted error when volume is locked, got nil", opName) + return + } + st, ok := status.FromError(err) + if !ok { + t.Errorf("%s: expected gRPC status error, got: %v", opName, err) + return + } + if st.Code() != codes.Aborted { + t.Errorf("%s: expected codes.Aborted, got %v: %v", opName, st.Code(), err) + } +}