Skip to content

Commit f49162d

Browse files
committed
fix(BRE2-940): additional Nebius capacity error mapping
1 parent 9660118 commit f49162d

3 files changed

Lines changed: 84 additions & 3 deletions

File tree

v1/providers/nebius/errors.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
v1 "github.com/brevdev/cloud/v1"
88
"github.com/nebius/gosdk/operations"
9+
"github.com/nebius/gosdk/serviceerror"
910
"google.golang.org/grpc/codes"
1011
"google.golang.org/grpc/status"
1112
)
@@ -37,6 +38,17 @@ func handleErrToCloudErr(e error) error {
3738
if e == nil {
3839
return nil
3940
}
41+
var serviceErr *serviceerror.Error
42+
if errors.As(e, &serviceErr) {
43+
for _, detail := range serviceErr.Details {
44+
switch detail.(type) {
45+
case *serviceerror.NotEnoughResources:
46+
return v1.ErrInsufficientResources
47+
case *serviceerror.QuotaFailure:
48+
return v1.ErrOutOfQuota
49+
}
50+
}
51+
}
4052
// Check for Nebius operations.Error for ResourceExhausted (returned by operation.Wait on async failures)
4153
var opErr *operations.Error
4254
if errors.As(e, &opErr) {

v1/providers/nebius/errors_test.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package v1
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
cloudv1 "github.com/brevdev/cloud/v1"
8+
common "github.com/nebius/gosdk/proto/nebius/common/v1"
9+
"github.com/nebius/gosdk/serviceerror"
10+
"github.com/stretchr/testify/require"
11+
"google.golang.org/grpc/codes"
12+
"google.golang.org/grpc/status"
13+
)
14+
15+
func TestHandleErrToCloudErrMapsNotEnoughResourcesToInsufficientResources(t *testing.T) {
16+
t.Parallel()
17+
18+
err := &serviceerror.Error{
19+
Wrapped: status.Error(codes.ResourceExhausted, "operation failed"),
20+
Details: []serviceerror.Detail{
21+
serviceerror.NewDetail(&common.ServiceError{
22+
Service: "compute",
23+
Code: "NotEnoughResources",
24+
Details: &common.ServiceError_NotEnoughResources{
25+
NotEnoughResources: &common.NotEnoughResources{
26+
Violations: []*common.NotEnoughResources_Violation{
27+
{
28+
ResourceType: "virtualMachine",
29+
Requested: "1gpu-16vcpu-64gb",
30+
Message: "VM schedule timeout, most likely due to insufficient hardware resources",
31+
},
32+
},
33+
},
34+
},
35+
}),
36+
},
37+
}
38+
39+
require.True(t, errors.Is(handleErrToCloudErr(err), cloudv1.ErrInsufficientResources))
40+
}
41+
42+
func TestHandleErrToCloudErrMapsQuotaFailureToOutOfQuota(t *testing.T) {
43+
t.Parallel()
44+
45+
err := &serviceerror.Error{
46+
Wrapped: status.Error(codes.ResourceExhausted, "operation failed"),
47+
Details: []serviceerror.Detail{
48+
serviceerror.NewDetail(&common.ServiceError{
49+
Service: "compute",
50+
Code: "QuotaFailure",
51+
Details: &common.ServiceError_QuotaFailure{
52+
QuotaFailure: &common.QuotaFailure{
53+
Violations: []*common.QuotaFailure_Violation{
54+
{
55+
Quota: "compute.instance.gpu.h100",
56+
Limit: "0",
57+
Requested: "1",
58+
Message: "quota exceeded",
59+
},
60+
},
61+
},
62+
},
63+
}),
64+
},
65+
}
66+
67+
require.True(t, errors.Is(handleErrToCloudErr(err), cloudv1.ErrOutOfQuota))
68+
}

v1/providers/nebius/instance.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -916,17 +916,18 @@ func (c *NebiusClient) StartInstance(ctx context.Context, instanceID v1.CloudPro
916916
Id: string(instanceID),
917917
})
918918
if err != nil {
919-
return fmt.Errorf("failed to initiate instance start: %w", err)
919+
return fmt.Errorf("failed to initiate instance start: %w", handleErrToCloudErr(err))
920920
}
921921

922922
// Wait for the start operation to complete
923923
finalOp, err := operation.Wait(ctx)
924924
if err != nil {
925-
return fmt.Errorf("failed to wait for instance start: %w", err)
925+
return fmt.Errorf("failed to wait for instance start: %w", handleErrToCloudErr(err))
926926
}
927927

928928
if !finalOp.Successful() {
929-
return fmt.Errorf("instance start failed: %v", finalOp.Status())
929+
statusErr := fmt.Errorf("instance start failed: %v", finalOp.Status())
930+
return handleErrToCloudErr(statusErr)
930931
}
931932

932933
c.logger.Debug(ctx, "start operation completed, waiting for instance to reach RUNNING state",

0 commit comments

Comments
 (0)