diff --git a/Makefile b/Makefile index 6f97969859..0173ec5178 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ else VERSION ?= ${VERSION} endif -RELEASE = v1.34.1 +RELEASE = v1.34.2 GOOS ?= linux ARCH ?= amd64 diff --git a/manifests/cloud-controller-manager/oci-cloud-controller-manager.yaml b/manifests/cloud-controller-manager/oci-cloud-controller-manager.yaml index 7010a24e9b..56bce46f94 100644 --- a/manifests/cloud-controller-manager/oci-cloud-controller-manager.yaml +++ b/manifests/cloud-controller-manager/oci-cloud-controller-manager.yaml @@ -42,7 +42,7 @@ spec: path: /etc/kubernetes containers: - name: oci-cloud-controller-manager - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 command: ["/usr/local/bin/oci-cloud-controller-manager"] args: - --cloud-config=/etc/oci/cloud-provider.yaml diff --git a/manifests/container-storage-interface/csi/templates/oci-csi-controller-driver.yaml b/manifests/container-storage-interface/csi/templates/oci-csi-controller-driver.yaml index d70e4d5bb0..18e3981894 100644 --- a/manifests/container-storage-interface/csi/templates/oci-csi-controller-driver.yaml +++ b/manifests/container-storage-interface/csi/templates/oci-csi-controller-driver.yaml @@ -114,7 +114,7 @@ spec: - --lustre-csi-endpoint=unix://var/run/shared-tmpfs/csi-lustre.sock command: - /usr/local/bin/oci-csi-controller-driver - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 imagePullPolicy: IfNotPresent env: - name: BLOCK_VOLUME_DRIVER_NAME diff --git a/manifests/container-storage-interface/csi/templates/oci-csi-node-driver.yaml b/manifests/container-storage-interface/csi/templates/oci-csi-node-driver.yaml index 301e32b70c..2f82b3f6c7 100644 --- a/manifests/container-storage-interface/csi/templates/oci-csi-node-driver.yaml +++ b/manifests/container-storage-interface/csi/templates/oci-csi-node-driver.yaml @@ -135,7 +135,7 @@ spec: value: "{{ if .Values.customHandle }}{{ .Values.customHandle }}.{{ end }}fss.csi.oraclecloud.com" - name: LUSTRE_VOLUME_DRIVER_NAME value: "{{ if .Values.customHandle }}{{ .Values.customHandle }}.{{ end }}lustre.csi.oraclecloud.com" - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 securityContext: privileged: true volumeMounts: diff --git a/manifests/container-storage-interface/oci-csi-controller-driver.yaml b/manifests/container-storage-interface/oci-csi-controller-driver.yaml index 061dfc1c51..656ba79b5a 100644 --- a/manifests/container-storage-interface/oci-csi-controller-driver.yaml +++ b/manifests/container-storage-interface/oci-csi-controller-driver.yaml @@ -117,7 +117,7 @@ spec: env: - name: LUSTRE_CSI_CONTROLLER_DRIVER_ENABLED value: "true" - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 imagePullPolicy: IfNotPresent volumeMounts: - name: config diff --git a/manifests/container-storage-interface/oci-csi-node-driver.yaml b/manifests/container-storage-interface/oci-csi-node-driver.yaml index 55818fa051..8182e62737 100644 --- a/manifests/container-storage-interface/oci-csi-node-driver.yaml +++ b/manifests/container-storage-interface/oci-csi-node-driver.yaml @@ -129,7 +129,7 @@ spec: value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/host/usr/bin:/host/sbin - name: LUSTRE_DRIVER_ENABLED value: "true" - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 securityContext: privileged: true volumeMounts: diff --git a/manifests/flexvolume-driver/oci-flexvolume-driver.yaml b/manifests/flexvolume-driver/oci-flexvolume-driver.yaml index cf2c67fd1b..fdcabffe51 100644 --- a/manifests/flexvolume-driver/oci-flexvolume-driver.yaml +++ b/manifests/flexvolume-driver/oci-flexvolume-driver.yaml @@ -40,7 +40,7 @@ spec: secretName: oci-flexvolume-driver containers: - name: oci-flexvolume-driver - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 command: ["/usr/local/bin/install.py", "-c", "/tmp/config.yaml"] securityContext: privileged: true @@ -76,7 +76,7 @@ spec: type: DirectoryOrCreate containers: - name: oci-flexvolume-driver - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 command: ["/usr/local/bin/install.py"] securityContext: privileged: true diff --git a/manifests/volume-provisioner/oci-volume-provisioner-fss.yaml b/manifests/volume-provisioner/oci-volume-provisioner-fss.yaml index ca96b6e7f2..01de13988a 100644 --- a/manifests/volume-provisioner/oci-volume-provisioner-fss.yaml +++ b/manifests/volume-provisioner/oci-volume-provisioner-fss.yaml @@ -35,7 +35,7 @@ spec: secretName: oci-volume-provisioner containers: - name: oci-volume-provisioner - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 command: ["/usr/local/bin/oci-volume-provisioner"] env: - name: NODE_NAME diff --git a/manifests/volume-provisioner/oci-volume-provisioner.yaml b/manifests/volume-provisioner/oci-volume-provisioner.yaml index 798c811b2b..13530da82b 100644 --- a/manifests/volume-provisioner/oci-volume-provisioner.yaml +++ b/manifests/volume-provisioner/oci-volume-provisioner.yaml @@ -35,7 +35,7 @@ spec: secretName: oci-volume-provisioner containers: - name: oci-volume-provisioner - image: ghcr.io/oracle/cloud-provider-oci:v1.34.1 + image: ghcr.io/oracle/cloud-provider-oci:v1.34.2 command: ["/usr/local/bin/oci-volume-provisioner"] env: - name: NODE_NAME diff --git a/pkg/csi-util/lustre_lnet_helper.go b/pkg/csi-util/lustre_lnet_helper.go index 311348ee29..f0c5167baa 100644 --- a/pkg/csi-util/lustre_lnet_helper.go +++ b/pkg/csi-util/lustre_lnet_helper.go @@ -80,7 +80,7 @@ func ValidateLustreVolumeId(lusterVolumeId string) (bool, string) { type LnetConfigurator interface { GetNetInterfacesInSubnet(subnetCIDR string) ([]NetInterface, error) IsLustreClientPackagesInstalled(logger *zap.SugaredLogger) bool - GetLnetInfoByLnetLabel(lnetLabel string) (NetInfo, error) + GetLnetInfoByLnetLabel(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) ConfigureLnet(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error VerifyLnetConfiguration(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo, err error) error ExecuteCommandOnWorkerNode(args ...string) (string, error) @@ -136,7 +136,7 @@ func (ls *LnetService) SetupLnet(logger *zap.SugaredLogger, lustreSubnetCIDR str //get existing lnet configuration var netInfo NetInfo - netInfo, err = ls.Configurator.GetLnetInfoByLnetLabel(lnetLabel) + netInfo, err = ls.Configurator.GetLnetInfoByLnetLabel(logger, lnetLabel) if err != nil { return err } @@ -192,20 +192,36 @@ func (olc *OCILnetConfigurator) GetNetInterfacesInSubnet(subnetCIDR string) ([]N return matchingInterfaces, nil } -func (olc *OCILnetConfigurator) GetLnetInfoByLnetLabel(lnetLabel string) (NetInfo, error) { - var netInfo NetInfo +func (olc *OCILnetConfigurator) GetLnetInfoByLnetLabel(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { existingConfiguredLnetInfo, err := olc.ExecuteCommandOnWorkerNode(fmt.Sprintf(SHOW_CONFIGURED_LNET, lnetLabel)) - if err != nil { - return netInfo, fmt.Errorf("Failed to get existing configured lnet information with error : %v", err) + return parseLnetInfo(logger, existingConfiguredLnetInfo, err) +} + +func parseLnetInfo(logger *zap.SugaredLogger, existingConfiguredLnetInfo string, commandErr error) (NetInfo, error) { + var netInfo NetInfo + if commandErr != nil { + if isLnetNetworkDownResponse(existingConfiguredLnetInfo, commandErr) { + logger.With(zap.Error(commandErr)).Info("Expected failure observed while getting existing configured lnet information because lnet network is down. Starting lnet configuration.") + return netInfo, nil + } + return netInfo, fmt.Errorf("Failed to get existing configured lnet information with error : %v", commandErr) } - err = yaml.Unmarshal([]byte(existingConfiguredLnetInfo), &netInfo) + err := yaml.Unmarshal([]byte(existingConfiguredLnetInfo), &netInfo) if err != nil { return netInfo, fmt.Errorf("Failed to parse lnet information with error : %v", err) } return netInfo, nil } +func isLnetNetworkDownResponse(output string, err error) bool { + if err == nil { + return false + } + lnetctlResponse := fmt.Sprintf("%s\n%v", output, err) + return strings.Contains(lnetctlResponse, "Network is down") +} + func (olc *OCILnetConfigurator) ConfigureLnet(logger *zap.SugaredLogger, interfacesInLustreSubnet []NetInterface, lnetLabel string, netInfo NetInfo) error { logger.Infof("Existing lnet information : %v", netInfo) @@ -256,7 +272,7 @@ func (olc *OCILnetConfigurator) ConfigureLnet(logger *zap.SugaredLogger, interfa func (olc *OCILnetConfigurator) VerifyLnetConfiguration(logger *zap.SugaredLogger, interfacesInLustreSubnet []NetInterface, lnetLabel string, netInfo NetInfo, err error) error { logger.Infof("Verifying lnet configuration.") //Get already configured lnet interfaces - netInfo, err = olc.GetLnetInfoByLnetLabel(lnetLabel) + netInfo, err = olc.GetLnetInfoByLnetLabel(logger, lnetLabel) if err != nil { return err } @@ -306,7 +322,7 @@ It returns true when active lnet interface is identified else returns false sing func (ls *LnetService) IsLnetActive(logger *zap.SugaredLogger, lnetLabel string) bool { logger.Debugf("Trying to check status of lnet") //Get already configured lnet interfaces - netInfo, err := ls.Configurator.GetLnetInfoByLnetLabel(lnetLabel) + netInfo, err := ls.Configurator.GetLnetInfoByLnetLabel(logger, lnetLabel) if err != nil { logger.With(zap.Error(err)).Errorf("Failed to get lnet info for lnet : %v", lnetLabel) return false @@ -332,7 +348,6 @@ func (ls *LnetService) IsLnetActive(logger *zap.SugaredLogger, lnetLabel string) } func (olc *OCILnetConfigurator) ExecuteCommandOnWorkerNode(args ...string) (string, error) { - command := exec.Command("chroot-bash", args...) output, err := command.CombinedOutput() diff --git a/pkg/csi-util/lustre_lnet_helper_test.go b/pkg/csi-util/lustre_lnet_helper_test.go index 94b256c3d2..c0877d218d 100644 --- a/pkg/csi-util/lustre_lnet_helper_test.go +++ b/pkg/csi-util/lustre_lnet_helper_test.go @@ -8,6 +8,7 @@ import ( "github.com/pkg/errors" "github.com/stretchr/testify/assert" "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" ) func TestValidateLustreVolumeId(t *testing.T) { @@ -50,7 +51,7 @@ func TestValidateLustreVolumeId(t *testing.T) { type FakeConfigurator struct { GetNetInterfacesInSubnetFunc func(subnetCIDR string) ([]NetInterface, error) IsLustreClientPackagesInstalledFunc func(logger *zap.SugaredLogger) bool - GetLnetInfoByLnetLabelFunc func(lnetLabel string) (NetInfo, error) + GetLnetInfoByLnetLabelFunc func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) ConfigureLnetFunc func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error VerifyLnetConfigurationFunc func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo, err error) error ExecuteCommandOnWorkerNodeFunc func(args ...string) (string, error) @@ -64,8 +65,8 @@ func (f *FakeConfigurator) IsLustreClientPackagesInstalled(logger *zap.SugaredLo return f.IsLustreClientPackagesInstalledFunc(logger) } -func (f *FakeConfigurator) GetLnetInfoByLnetLabel(lnetLabel string) (NetInfo, error) { - return f.GetLnetInfoByLnetLabelFunc(lnetLabel) +func (f *FakeConfigurator) GetLnetInfoByLnetLabel(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { + return f.GetLnetInfoByLnetLabelFunc(logger, lnetLabel) } func (f *FakeConfigurator) ConfigureLnet(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { @@ -130,7 +131,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { // These functions are not used in this case. IsLustreClientPackagesInstalledFunc: func(logger *zap.SugaredLogger) bool { return true }, ExecuteCommandOnWorkerNodeFunc: func(args ...string) (string, error) { return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { return nil }, @@ -158,7 +159,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { } return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return dummyNetInfo, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { @@ -185,7 +186,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { } return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { return nil }, @@ -210,7 +211,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { } return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{}, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { return nil }, @@ -233,7 +234,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { // Both kernel module load and kernel service config succeed. return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{}, errors.New("get lnet info error") }, }, @@ -251,7 +252,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { ExecuteCommandOnWorkerNodeFunc: func(args ...string) (string, error) { return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return dummyNetInfo, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { @@ -272,7 +273,7 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { ExecuteCommandOnWorkerNodeFunc: func(args ...string) (string, error) { return "ok", nil }, - GetLnetInfoByLnetLabelFunc: func(lnetLabel string) (NetInfo, error) { + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return dummyNetInfo, nil }, ConfigureLnetFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo) error { @@ -300,18 +301,135 @@ func TestLnetService_SetupLnet_TableDriven(t *testing.T) { } } +func TestParseLnetInfo(t *testing.T) { + tests := []struct { + name string + commandOutput string + commandErr error + expectedErrSubstr string + expectedNetType string + expectedLogSubstr string + }{ + { + name: "Lustre 2.15.90 network down response returns empty net info", + commandOutput: `--- +show: +- net: + errno: -3 + descr: ! "Network is down" +... +`, + commandErr: errors.New("exit status 234"), + expectedLogSubstr: "Expected failure observed", + }, + { + name: "network down response without errno returns empty net info", + commandOutput: `--- +show: +- net: + descr: ! "Network is down" +... +`, + commandErr: errors.New("exit status 1"), + expectedLogSubstr: "Expected failure observed", + }, + { + name: "valid lnet output is parsed", + commandOutput: `net: +- net type: tcp + local NI(s): + - nid: 10.244.0.10@tcp + status: up + interfaces: + 0: eth0 +`, + expectedNetType: "tcp", + }, + { + name: "unrelated command error is returned", + commandOutput: "opening /dev/lnet failed: No such device", + commandErr: errors.New("exit status 1"), + expectedErrSubstr: "Failed to get existing configured lnet information", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + core, recorded := observer.New(zap.InfoLevel) + logger := zap.New(core).Sugar() + + netInfo, err := parseLnetInfo(logger, tc.commandOutput, tc.commandErr) + + if tc.expectedErrSubstr != "" { + assert.Error(t, err) + assert.Contains(t, err.Error(), tc.expectedErrSubstr) + return + } + + assert.NoError(t, err) + if tc.expectedLogSubstr != "" { + assert.Equal(t, 1, recorded.FilterMessageSnippet(tc.expectedLogSubstr).Len()) + } + if tc.expectedNetType == "" { + assert.Empty(t, netInfo.Net) + return + } + if assert.Len(t, netInfo.Net, 1) { + assert.Equal(t, tc.expectedNetType, netInfo.Net[0].NetType) + } + }) + } +} + +func TestLnetService_SetupLnet_ConfiguresWhenExistingLnetInfoIsEmpty(t *testing.T) { + logger := zap.NewExample().Sugar() + configureCalled := false + ifaces := []NetInterface{{InterfaceName: "eth0", InterfaceIPv4: "10.244.0.10"}} + fakeCfg := &FakeConfigurator{ + GetNetInterfacesInSubnetFunc: func(subnetCIDR string) ([]NetInterface, error) { + return ifaces, nil + }, + IsLustreClientPackagesInstalledFunc: func(logger *zap.SugaredLogger) bool { + return true + }, + ExecuteCommandOnWorkerNodeFunc: func(args ...string) (string, error) { + return "ok", nil + }, + GetLnetInfoByLnetLabelFunc: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { + return NetInfo{}, nil + }, + ConfigureLnetFunc: func(logger *zap.SugaredLogger, gotIfaces []NetInterface, lnetLabel string, netInfo NetInfo) error { + configureCalled = true + assert.Equal(t, ifaces, gotIfaces) + assert.Equal(t, "tcp", lnetLabel) + assert.Empty(t, netInfo.Net) + return nil + }, + VerifyLnetConfigurationFunc: func(logger *zap.SugaredLogger, ifaces []NetInterface, lnetLabel string, netInfo NetInfo, err error) error { + return nil + }, + } + + svc := LnetService{Configurator: fakeCfg} + + err := svc.SetupLnet(logger, "10.244.0.0/24", "tcp") + + assert.NoError(t, err) + assert.True(t, configureCalled) +} + func TestLnetService_IsLnetActive_TableDriven(t *testing.T) { logger := zap.NewExample().Sugar() tests := []struct { name string lnetLabel string - fakeGetInfo func(lnetLabel string) (NetInfo, error) + fakeGetInfo func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) expectedActive bool }{ { name: "Active Lnet", lnetLabel: "tcp1", - fakeGetInfo: func(lnetLabel string) (NetInfo, error) { + fakeGetInfo: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{ Net: []struct { NetType string "yaml:\"net type\"" @@ -339,7 +457,7 @@ func TestLnetService_IsLnetActive_TableDriven(t *testing.T) { { name: "Inactive Lnet", lnetLabel: "tcp1", - fakeGetInfo: func(lnetLabel string) (NetInfo, error) { + fakeGetInfo: func(logger *zap.SugaredLogger, lnetLabel string) (NetInfo, error) { return NetInfo{ Net: []struct { NetType string "yaml:\"net type\""