diff --git a/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go new file mode 100644 index 00000000000..7dd8ceec129 --- /dev/null +++ b/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go @@ -0,0 +1,43 @@ +// Code generated by protoc-gen-go-helpers. DO NOT EDIT. +package workflowpb + +import ( + "google.golang.org/protobuf/proto" +) + +// Marshal an object of type UpdateState to the protobuf v3 wire format +func (val *UpdateState) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UpdateState from the protobuf v3 wire format +func (val *UpdateState) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UpdateState) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UpdateState values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UpdateState) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UpdateState + switch t := that.(type) { + case *UpdateState: + that1 = t + case UpdateState: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go new file mode 100644 index 00000000000..422b8bf5a69 --- /dev/null +++ b/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go @@ -0,0 +1,137 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// plugins: +// protoc-gen-go +// protoc +// source: temporal/server/chasm/lib/workflow/proto/v1/update_state.proto + +package workflowpb + +import ( + reflect "reflect" + sync "sync" + unsafe "unsafe" + + v1 "go.temporal.io/api/failure/v1" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type UpdateState struct { + state protoimpl.MessageState `protogen:"open.v1"` + UpdateId string `protobuf:"bytes,1,opt,name=update_id,json=updateId,proto3" json:"update_id,omitempty"` + // Populated when the update was rejected by a validator. + // Used to resolve the update outcome for callbacks on rejected updates. + RejectionFailure *v1.Failure `protobuf:"bytes,2,opt,name=rejection_failure,json=rejectionFailure,proto3" json:"rejection_failure,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UpdateState) Reset() { + *x = UpdateState{} + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UpdateState) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UpdateState) ProtoMessage() {} + +func (x *UpdateState) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UpdateState.ProtoReflect.Descriptor instead. +func (*UpdateState) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescGZIP(), []int{0} +} + +func (x *UpdateState) GetUpdateId() string { + if x != nil { + return x.UpdateId + } + return "" +} + +func (x *UpdateState) GetRejectionFailure() *v1.Failure { + if x != nil { + return x.RejectionFailure + } + return nil +} + +var File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto protoreflect.FileDescriptor + +const file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc = "" + + "\n" + + ">temporal/server/chasm/lib/workflow/proto/v1/update_state.proto\x12+temporal.server.chasm.lib.workflow.proto.v1\x1a%temporal/api/failure/v1/message.proto\"y\n" + + "\vUpdateState\x12\x1b\n" + + "\tupdate_id\x18\x01 \x01(\tR\bupdateId\x12M\n" + + "\x11rejection_failure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\x10rejectionFailureBDZBgo.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpbb\x06proto3" + +var ( + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescOnce sync.Once + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData []byte +) + +func file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescGZIP() []byte { + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescOnce.Do(func() { + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc), len(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc))) + }) + return file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData +} + +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes = []any{ + (*UpdateState)(nil), // 0: temporal.server.chasm.lib.workflow.proto.v1.UpdateState + (*v1.Failure)(nil), // 1: temporal.api.failure.v1.Failure +} +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs = []int32{ + 1, // 0: temporal.server.chasm.lib.workflow.proto.v1.UpdateState.rejection_failure:type_name -> temporal.api.failure.v1.Failure + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_init() } +func file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_init() { + if File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc), len(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc)), + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes, + DependencyIndexes: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs, + MessageInfos: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes, + }.Build() + File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto = out.File + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes = nil + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs = nil +} diff --git a/chasm/lib/workflow/library.go b/chasm/lib/workflow/library.go index 759baf124bb..88be1d5b864 100644 --- a/chasm/lib/workflow/library.go +++ b/chasm/lib/workflow/library.go @@ -46,6 +46,7 @@ func (l *library) Components() []*chasm.RegistrableComponent { chasm.NewRegistrableComponent[*Workflow](chasm.WorkflowComponentName, chasm.WithContextValues(map[any]any{ ctxKeyWorkflowContext: &workflowContext{registry: l.registry}, })), + chasm.NewRegistrableComponent[*WorkflowUpdate]("update"), } } diff --git a/chasm/lib/workflow/proto/v1/update_state.proto b/chasm/lib/workflow/proto/v1/update_state.proto new file mode 100644 index 00000000000..8d1e67bea3d --- /dev/null +++ b/chasm/lib/workflow/proto/v1/update_state.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package temporal.server.chasm.lib.workflow.proto.v1; + +import "temporal/api/failure/v1/message.proto"; + +option go_package = "go.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpb"; + +message UpdateState { + string update_id = 1; + // Populated when the update was rejected by a validator. + // Used to resolve the update outcome for callbacks on rejected updates. + temporal.api.failure.v1.Failure rejection_failure = 2; +} diff --git a/chasm/lib/workflow/workflow.go b/chasm/lib/workflow/workflow.go index 8e3148cc03a..2b5409a4147 100644 --- a/chasm/lib/workflow/workflow.go +++ b/chasm/lib/workflow/workflow.go @@ -4,6 +4,7 @@ import ( "fmt" commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/server/chasm" @@ -35,6 +36,9 @@ type Workflow struct { // IncomingSignals map is used to track incoming signals, keyed by request ID, // to allow DescribeWorkflow to resolve RequestIDRef signal backlinks. IncomingSignals chasm.Map[string, *chasmworkflowpb.IncomingSignalData] + + // Updates indexed by update ID, used to store the update components. + Updates chasm.Map[string, *WorkflowUpdate] } func NewWorkflow( @@ -68,35 +72,92 @@ func (w *Workflow) Terminate( return chasm.TerminateComponentResponse{}, serviceerror.NewInternal("workflow root Terminate should not be called") } -// AddCompletionCallbacks creates completion callbacks using the CHASM implementation. -// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. -func (w *Workflow) AddCompletionCallbacks( - ctx chasm.MutableContext, - eventTime *timestamppb.Timestamp, - requestID string, - completionCallbacks []*commonpb.Callback, - maxCallbacksPerWorkflow int, -) error { - // Check CHASM max callbacks limit - currentCallbackCount := len(w.Callbacks) - if len(completionCallbacks)+currentCallbackCount > maxCallbacksPerWorkflow { +// ProcessCloseCallbacks triggers "WorkflowClosed" callbacks using the CHASM implementation. +// It schedules all workflow-level and update-level callbacks that are in STANDBY state. +func (w *Workflow) ProcessCloseCallbacks(ctx chasm.MutableContext) error { + if err := callback.ScheduleStandbyCallbacks(ctx, w.Callbacks); err != nil { + return err + } + return w.ProcessAllUpdateCloseCallbacks(ctx) +} + +// ProcessAllUpdateCloseCallbacks triggers callbacks for all updates without touching +// workflow-level callbacks. This is used when the workflow is continuing to a new run +// (ContinueAsNew, retry, cron): workflow-level callbacks are inherited by the new run, +// but update callbacks must fire now because the update was aborted on the old run. +func (w *Workflow) ProcessAllUpdateCloseCallbacks(ctx chasm.MutableContext) error { + for _, updateField := range w.Updates { + if err := callback.ScheduleStandbyCallbacks(ctx, updateField.Get(ctx).Callbacks); err != nil { + return err + } + } + return nil +} + +// ProcessUpdateCallbacks triggers callbacks for a single updateID if exists. +func (w *Workflow) ProcessUpdateCallbacks(ctx chasm.MutableContext, updateID string) error { + update, exists := w.Updates[updateID] + if !exists { + return serviceerror.NewNotFoundf("update with ID %s not found", updateID) + } + return callback.ScheduleStandbyCallbacks(ctx, update.Get(ctx).Callbacks) +} + +// RejectUpdate stores the rejection failure on the WorkflowUpdate component and +// fires any pending callbacks. This is used when a reapplied update (after reset) +// is rejected by the worker's validator - the callbacks need to deliver the +// rejection failure to the caller. +func (w *Workflow) RejectUpdate(ctx chasm.MutableContext, updateID string, rejectionFailure *failurepb.Failure) error { + updateField, exists := w.Updates[updateID] + if !exists { + return nil // no callbacks registered for this update + } + + upd := updateField.Get(ctx) + upd.RejectionFailure = rejectionFailure + + return callback.ScheduleStandbyCallbacks(ctx, upd.Callbacks) +} + +// totalCallbackCount returns the total number of callbacks across workflow-level +// and all update-level callback maps. +func (w *Workflow) totalCallbackCount(ctx chasm.Context) int { + count := len(w.Callbacks) + for _, updateField := range w.Updates { + count += len(updateField.Get(ctx).Callbacks) + } + return count +} + +// checkWorkflowCallbackLimit returns an error if adding newCount callbacks would +// exceed the per-workflow maximum. +func (w *Workflow) checkWorkflowCallbackLimit(ctx chasm.Context, newCount, maxCallbacksPerWorkflow int) error { + current := w.totalCallbackCount(ctx) + if newCount+current > maxCallbacksPerWorkflow { return serviceerror.NewFailedPreconditionf( "cannot attach more than %d callbacks to a workflow (%d callbacks already attached)", maxCallbacksPerWorkflow, - currentCallbackCount, + current, ) } + return nil +} - // Initialize map if needed - if w.Callbacks == nil { - w.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) - } - - // Add each callback - for idx, cb := range completionCallbacks { - chasmCB := &callbackspb.Callback{ - Links: cb.GetLinks(), - } +// addCallbacksToMap converts common callbacks to CHASM callback components and +// inserts them into the target map, keyed by "-". +// +// All callbacks are validated up front, so target is not mutated unless every +// callback can be converted successfully (atomic from the caller's POV). +func addCallbacksToMap( + ctx chasm.MutableContext, + target chasm.Map[string, *callback.Callback], + requestID string, + eventTime *timestamppb.Timestamp, + completionCallbacks []*commonpb.Callback, +) error { + chasmCBs := make([]*callbackspb.Callback, len(completionCallbacks)) + for i, cb := range completionCallbacks { + chasmCB := &callbackspb.Callback{Links: cb.GetLinks()} switch variant := cb.Variant.(type) { case *commonpb.Callback_Nexus_: chasmCB.Variant = &callbackspb.Callback_Nexus_{ @@ -108,19 +169,84 @@ func (w *Workflow) AddCompletionCallbacks( default: return serviceerror.NewInvalidArgumentf("unsupported callback variant: %T", variant) } + chasmCBs[i] = chasmCB + } + for idx, chasmCB := range chasmCBs { // requestID (unique per API call) + idx (position within the request) ensures unique, idempotent callback IDs. // Unlike HSM callbacks, CHASM replicates entire trees rather than replaying events, so deterministic // cross-cluster IDs based on event version are not needed. id := fmt.Sprintf("%s-%d", requestID, idx) - - // Create and add callback + if _, exists := target[id]; exists { + // Already registered, skip to avoid overwriting. + continue + } callbackObj := callback.NewCallback(requestID, eventTime, &callbackspb.CallbackState{}, chasmCB) - w.Callbacks[id] = chasm.NewComponentField(ctx, callbackObj) + target[id] = chasm.NewComponentField(ctx, callbackObj) } return nil } +// AddCompletionCallbacks creates completion callbacks using the CHASM implementation. +// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. +func (w *Workflow) AddCompletionCallbacks( + ctx chasm.MutableContext, + eventTime *timestamppb.Timestamp, + requestID string, + completionCallbacks []*commonpb.Callback, + maxCallbacksPerWorkflow int, +) error { + if err := w.checkWorkflowCallbackLimit(ctx, len(completionCallbacks), maxCallbacksPerWorkflow); err != nil { + return err + } + + if w.Callbacks == nil { + w.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) + } + + return addCallbacksToMap(ctx, w.Callbacks, requestID, eventTime, completionCallbacks) +} + +// AddUpdateCompletionCallbacks creates completion callbacks using the CHASM implementation. +// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. +// maxCallbacksPerUpdateID is the configured maximum number of callbacks allowed per update ID. +func (w *Workflow) AddUpdateCompletionCallbacks( + ctx chasm.MutableContext, + eventTime *timestamppb.Timestamp, + updateID string, + requestID string, + completionCallbacks []*commonpb.Callback, + maxCallbacksPerWorkflow int, + maxCallbacksPerUpdateID int, +) error { + if err := w.checkWorkflowCallbackLimit(ctx, len(completionCallbacks), maxCallbacksPerWorkflow); err != nil { + return err + } + + if w.Updates == nil { + w.Updates = make(chasm.Map[string, *WorkflowUpdate], 1) + } + if _, ok := w.Updates[updateID]; !ok { + workflowUpdateObj := NewWorkflowUpdate(ctx, updateID, w.MSPointer) + workflowUpdateObj.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) + w.Updates[updateID] = chasm.NewComponentField(ctx, workflowUpdateObj) + } + + update := w.Updates[updateID].Get(ctx) + + currentCallbackCount := len(update.Callbacks) + if len(completionCallbacks)+currentCallbackCount > maxCallbacksPerUpdateID { + return serviceerror.NewFailedPreconditionf( + "cannot attach more than %d callbacks to update %q (%d callbacks already attached)", + maxCallbacksPerUpdateID, + updateID, + currentCallbackCount, + ) + } + + return addCallbacksToMap(ctx, update.Callbacks, requestID, eventTime, completionCallbacks) +} + // addAndApplyHistoryEvent adds a history event to the workflow and applies the corresponding event definition, // looked up by Go type. This is the preferred way to add and apply events as it provides go-to-definition navigation. func addAndApplyHistoryEvent[D EventDefinition]( diff --git a/chasm/lib/workflow/workflow_update.go b/chasm/lib/workflow/workflow_update.go new file mode 100644 index 00000000000..ff99af32c2c --- /dev/null +++ b/chasm/lib/workflow/workflow_update.go @@ -0,0 +1,67 @@ +package workflow + +import ( + "github.com/nexus-rpc/sdk-go/nexus" + "go.temporal.io/server/chasm" + "go.temporal.io/server/chasm/lib/callback" + "go.temporal.io/server/chasm/lib/workflow/gen/workflowpb/v1" + commonnexus "go.temporal.io/server/common/nexus" + "go.temporal.io/server/common/nexus/nexusrpc" +) + +type WorkflowUpdate struct { + chasm.UnimplementedComponent + + *workflowpb.UpdateState + + // MSPointer is a special in-memory field for accessing the underlying mutable state. + chasm.MSPointer + + // Callbacks map is used to store the callbacks for the update. + Callbacks chasm.Map[string, *callback.Callback] +} + +func NewWorkflowUpdate( + _ chasm.MutableContext, updateID string, msPointer chasm.MSPointer, +) *WorkflowUpdate { + return &WorkflowUpdate{ + UpdateState: &workflowpb.UpdateState{ + UpdateId: updateID, + }, + MSPointer: msPointer, + } +} + +func (u *WorkflowUpdate) LifecycleState( + _ chasm.Context, +) chasm.LifecycleState { + return chasm.LifecycleStateRunning +} + +func (u *WorkflowUpdate) GetNexusCompletion( + ctx chasm.Context, + requestID string, +) (nexusrpc.CompleteOperationOptions, error) { + // If the update was rejected, return the rejection failure directly instead + // of looking up a completion event that doesn't exist. + if rf := u.GetRejectionFailure(); rf != nil { + f, err := commonnexus.TemporalFailureToNexusFailure(rf) + if err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + opErr := &nexus.OperationError{ + Message: "update rejected", + State: nexus.OperationStateFailed, + Cause: &nexus.FailureError{Failure: f}, + } + if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + return nexusrpc.CompleteOperationOptions{ + Error: opErr, + }, nil + } + + // Retrieve the completion data from the underlying mutable state via MSPointer + return u.GetNexusUpdateCompletion(ctx, u.UpdateId, requestID) +} diff --git a/chasm/ms_pointer.go b/chasm/ms_pointer.go index 301da259c9e..cb2c3cf35ad 100644 --- a/chasm/ms_pointer.go +++ b/chasm/ms_pointer.go @@ -56,3 +56,8 @@ func (m MSPointer) GetNexusCompletion(ctx Context, requestID string) (nexusrpc.C func (m MSPointer) GetWorkflowTypeName() string { return m.backend.GetExecutionInfo().GetWorkflowTypeName() } + +// GetNexusUpdateCompletion retrieves the Nexus operation completion data for the given update ID and request ID from the underlying mutable state. +func (m MSPointer) GetNexusUpdateCompletion(ctx Context, updateID string, requestID string) (nexusrpc.CompleteOperationOptions, error) { + return m.backend.GetNexusUpdateCompletion(ctx.goContext(), updateID, requestID) +} diff --git a/chasm/node_backend_mock.go b/chasm/node_backend_mock.go index 6ddc1a815c1..0fcc7faba92 100644 --- a/chasm/node_backend_mock.go +++ b/chasm/node_backend_mock.go @@ -32,6 +32,7 @@ type MockNodeBackend struct { HandleUpdateWorkflowStateStatus func(state enumsspb.WorkflowExecutionState, status enumspb.WorkflowExecutionStatus) (bool, error) HandleIsWorkflow func() bool HandleGetNexusCompletion func(ctx context.Context, requestID string) (nexusrpc.CompleteOperationOptions, error) + HandleGetNexusUpdateCompletion func(ctx context.Context, updateID string, requestID string) (nexusrpc.CompleteOperationOptions, error) HandleAddHistoryEvent func(t enumspb.EventType, setAttributes func(*historypb.HistoryEvent)) *historypb.HistoryEvent HandleLoadHistoryEvent func(ctx context.Context, token []byte) (*historypb.HistoryEvent, error) HandleGenerateEventLoadToken func(event *historypb.HistoryEvent) ([]byte, error) @@ -231,6 +232,17 @@ func (m *MockNodeBackend) EndpointRegistry() EndpointRegistry { return nil } +func (m *MockNodeBackend) GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, +) (nexusrpc.CompleteOperationOptions, error) { + if m.HandleGetNexusUpdateCompletion != nil { + return m.HandleGetNexusUpdateCompletion(ctx, updateID, requestID) + } + return nexusrpc.CompleteOperationOptions{}, nil +} + func (m *MockNodeBackend) NumTasksAdded() int { m.mu.Lock() defer m.mu.Unlock() diff --git a/chasm/tree.go b/chasm/tree.go index 3b9e4cb4fe9..540af938509 100644 --- a/chasm/tree.go +++ b/chasm/tree.go @@ -220,6 +220,11 @@ type ( ctx context.Context, requestID string, ) (nexusrpc.CompleteOperationOptions, error) + GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, + ) (nexusrpc.CompleteOperationOptions, error) EndpointRegistry() EndpointRegistry } diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 267f5e1fd5a..04557e0d1ae 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -1019,6 +1019,11 @@ so forwarding by endpoint ID will not work out of the box.`, 32, `MaxCallbacksPerWorkflow is the maximum number of callbacks that can be attached to a workflow.`, ) + MaxCallbacksPerUpdateID = NewNamespaceIntSetting( + "system.maxCallbacksPerUpdateID", + 32, + `MaxCallbacksPerUpdateID is the maximum number of callbacks that can be attached to a single update ID.`, + ) FrontendLinkMaxSize = NewNamespaceIntSetting( "frontend.linkMaxSize", 4000, // Links may include a workflow ID and namespace name, both of which are limited to a length of 1000. @@ -2975,6 +2980,13 @@ map to enable DescribeWorkflow to resolve RequestIDRef signal backlinks. Require Only enable once all servers in the fleet have been upgraded to a version that understands the IncomingSignals CHASM field.`, ) + EnableWorkflowUpdateCallbacks = NewNamespaceBoolSetting( + "history.enableUpdateCallbacks", + false, + `Controls whether completion callbacks are created for workflow updates using +the CHASM implementation. When disabled, new update callbacks will not be registered, +but existing callbacks will still be processed and fired.`, + ) VersionMembershipCacheTTL = NewGlobalDurationSetting( "history.versionMembershipCacheTTL", diff --git a/go.mod b/go.mod index 7e264e7ba5d..2192ce7eaa9 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 - go.temporal.io/api v1.62.12 + go.temporal.io/api v1.62.13-0.20260519214255-11907b499103 go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 go.temporal.io/sdk v1.41.1 go.uber.org/fx v1.24.0 @@ -99,6 +99,7 @@ require ( github.com/go-openapi/swag/typeutils v0.26.0 // indirect github.com/go-openapi/swag/yamlutils v0.26.0 // indirect github.com/hashicorp/go-version v1.9.0 // indirect + github.com/nexus-rpc/nexus-proto-annotations v0.1.0 // indirect go.opentelemetry.io/collector/featuregate v1.56.0 // indirect ) diff --git a/go.sum b/go.sum index d93212758de..b5753456f6e 100644 --- a/go.sum +++ b/go.sum @@ -319,6 +319,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/nexus-rpc/nexus-proto-annotations v0.1.0 h1:2fELd+9sqUtNu6Fg//pw8YFsxOvp8vZ8hfP0nHhNI80= +github.com/nexus-rpc/nexus-proto-annotations v0.1.0/go.mod h1:n3UjF1bPCW8llR8tHvbxJ+27yPWrhpo8w/Yg1IOuY0Y= github.com/nexus-rpc/sdk-go v0.6.0 h1:QRgnP2zTbxEbiyWG/aXH8uSC5LV/Mg1fqb19jb4DBlo= github.com/nexus-rpc/sdk-go v0.6.0/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= @@ -469,8 +471,8 @@ go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:R go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= -go.temporal.io/api v1.62.12 h1:627rVnItegQmrszg1bH4vfyc/1uNo5qCereCNkvZefw= -go.temporal.io/api v1.62.12/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.62.13-0.20260519214255-11907b499103 h1:mPaS2+VdLF+TEcQ7nbAqjFIJPPmLzS+Tr0qDmrzvlG0= +go.temporal.io/api v1.62.13-0.20260519214255-11907b499103/go.mod h1:0k75tRljEuELWGeXjEZZO7zYqBln4+1FrG6+IMOMy7Q= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 h1:1hKeH3GyR6YD6LKMHGCZ76t6h1Sgha0hXVQBxWi3dlQ= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2/go.mod h1:T8dnzVPeO+gaUTj9eDgm/lT2lZH4+JXNvrGaQGyVi50= go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA= diff --git a/service/frontend/namespace_handler.go b/service/frontend/namespace_handler.go index 087bc5f7de9..25a7c27cc24 100644 --- a/service/frontend/namespace_handler.go +++ b/service/frontend/namespace_handler.go @@ -906,6 +906,7 @@ func (d *namespaceHandler) createResponse( StandaloneNexusOperation: d.config.EnableChasm(info.Name) && d.config.StandaloneNexusOperationsEnabled(info.Name), WorkerPollCompleteOnShutdown: d.config.EnableCancelWorkerPollsOnShutdown(info.Name), WorkerCommands: d.config.WorkerCommandsEnabled(info.Name), + WorkflowUpdateCallbacks: d.config.EnableWorkflowUpdateCallbacks(info.Name), PollerAutoscaling: true, }, Limits: &namespacepb.NamespaceInfo_Limits{ diff --git a/service/frontend/service.go b/service/frontend/service.go index 0a743002912..dacde0b073d 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -191,6 +191,7 @@ type Config struct { EnableUpdateWorkflowExecution dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableUpdateWorkflowExecutionAsyncAccepted dynamicconfig.BoolPropertyFnWithNamespaceFilter + EnableWorkflowUpdateCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute dynamicconfig.IntPropertyFnWithNamespaceFilter EnableWorkerVersioningData dynamicconfig.BoolPropertyFnWithNamespaceFilter @@ -367,6 +368,7 @@ func NewConfig( EnableUpdateWorkflowExecution: dynamicconfig.FrontendEnableUpdateWorkflowExecution.Get(dc), EnableUpdateWorkflowExecutionAsyncAccepted: dynamicconfig.FrontendEnableUpdateWorkflowExecutionAsyncAccepted.Get(dc), + EnableWorkflowUpdateCallbacks: dynamicconfig.EnableWorkflowUpdateCallbacks.Get(dc), NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute: dynamicconfig.NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute.Get(dc), EnableWorkerVersioningData: dynamicconfig.FrontendEnableWorkerVersioningDataAPIs.Get(dc), diff --git a/service/history/api/describeworkflow/api.go b/service/history/api/describeworkflow/api.go index ca52e2ab638..938e007ed82 100644 --- a/service/history/api/describeworkflow/api.go +++ b/service/history/api/describeworkflow/api.go @@ -510,7 +510,11 @@ func buildCallbackInfosFromChasm( for _, field := range wf.Callbacks { callback := field.Get(chasmCtx) - callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, outboundQueueCBPool) + trigger := &workflowpb.CallbackInfo_Trigger{ + Variant: &workflowpb.CallbackInfo_Trigger_WorkflowClosed{}, + } + + callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, trigger, outboundQueueCBPool) if err != nil { logger.Error( "failed to build callback info from CHASM callback", @@ -526,6 +530,38 @@ func buildCallbackInfosFromChasm( } result = append(result, callbackInfo) } + // Collect update callbacks + for updateID, ufield := range wf.Updates { + updates := ufield.Get(chasmCtx) + + for _, ucfield := range updates.Callbacks { + callback := ucfield.Get(chasmCtx) + + trigger := &workflowpb.CallbackInfo_Trigger{ + Variant: &workflowpb.CallbackInfo_Trigger_UpdateWorkflowExecutionCompleted{ + UpdateWorkflowExecutionCompleted: &workflowpb.CallbackInfo_UpdateWorkflowExecutionCompleted{ + UpdateId: updateID, + }, + }, + } + + callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, trigger, outboundQueueCBPool) + if err != nil { + logger.Error( + "failed to build callback info from CHASM update callback", + tag.WorkflowNamespaceID(namespaceID.String()), + tag.WorkflowID(executionInfo.WorkflowId), + tag.WorkflowRunID(executionState.RunId), + tag.Error(err), + ) + return nil, serviceerror.NewInternal("failed to construct describe response") + } + if callbackInfo == nil { + continue + } + result = append(result, callbackInfo) + } + } return result, nil } @@ -535,6 +571,7 @@ func buildCallbackInfoFromChasm( ctx context.Context, namespaceID namespace.ID, callback *chasmcallback.Callback, + trigger *workflowpb.CallbackInfo_Trigger, outboundQueueCBPool *circuitbreakerpool.OutboundQueueCircuitBreakerPool, ) (*workflowpb.CallbackInfo, error) { // Create a circuit breaker state checker function @@ -547,7 +584,7 @@ func buildCallbackInfoFromChasm( return cb.State() != gobreaker.StateClosed } - return buildChasmCallbackInfo(ctx, namespaceID.String(), callback, circuitBreakerState) + return buildChasmCallbackInfo(ctx, namespaceID.String(), callback, trigger, circuitBreakerState) } // buildChasmCallbackInfo converts a single CHASM callback to API CallbackInfo format. @@ -556,6 +593,7 @@ func buildChasmCallbackInfo( ctx context.Context, namespaceID string, cb *chasmcallback.Callback, + trigger *workflowpb.CallbackInfo_Trigger, circuitBreakerState func(destination string) bool, ) (*workflowpb.CallbackInfo, error) { nexusVariant := cb.GetCallback().GetNexus() @@ -595,10 +633,6 @@ func buildChasmCallbackInfo( } } - trigger := &workflowpb.CallbackInfo_Trigger{ - Variant: &workflowpb.CallbackInfo_Trigger_WorkflowClosed{}, - } - return &workflowpb.CallbackInfo{ Callback: cbSpec, Trigger: trigger, diff --git a/service/history/api/pollupdate/api_test.go b/service/history/api/pollupdate/api_test.go index 29c78f6cdf9..55c37e01b64 100644 --- a/service/history/api/pollupdate/api_test.go +++ b/service/history/api/pollupdate/api_test.go @@ -58,6 +58,10 @@ type ( func (mockUpdateEventStore) OnAfterCommit(f func(context.Context)) { f(context.TODO()) } func (mockUpdateEventStore) OnAfterRollback(f func(context.Context)) {} func (mockUpdateEventStore) CanAddEvent() bool { return true } +func (mockUpdateEventStore) RejectWorkflowExecutionUpdate(string, *failurepb.Failure) error { + return nil +} +func (mockUpdateEventStore) HasRequestID(string) bool { return false } func (m mockWFConsistencyChecker) GetWorkflowLease( ctx context.Context, diff --git a/service/history/api/startworkflow/api.go b/service/history/api/startworkflow/api.go index 5127311a50e..ee5e1b131f9 100644 --- a/service/history/api/startworkflow/api.go +++ b/service/history/api/startworkflow/api.go @@ -694,6 +694,7 @@ func (s *Starter) handleUseExistingWorkflowOnConflictOptions( "", // identity nil, // priority nil, // timeSkippingConfig + nil, // workflowUpdateOptions ) return api.UpdateWorkflowWithoutWorkflowTask, err }, diff --git a/service/history/api/updateworkflow/api.go b/service/history/api/updateworkflow/api.go index 9ed6d8993f2..4e283b297a2 100644 --- a/service/history/api/updateworkflow/api.go +++ b/service/history/api/updateworkflow/api.go @@ -165,6 +165,17 @@ func (u *Updater) ApplyRequest( return nil, err } + callbacksAttached, err := u.upd.AttachCallbacks(updateRequest, workflow.WithEffects(effect.Immediate(ctx), ms)) + if err != nil { + return nil, err + } + if callbacksAttached { + return &api.UpdateWorkflowAction{ + Noop: false, + CreateWorkflowTask: false, + }, nil + } + // If WT is scheduled, but not started, updates will be attached to it, when WT is started. // If WT has already started, new speculative WT will be created when started WT completes. // If update is duplicate, then WT for this update was already created. @@ -263,6 +274,42 @@ func (u *Updater) OnSuccess( return nil, err } resp := u.CreateResponse(u.wfKey, status.Outcome, status.Stage) + + // Attach a link to the response. For accepted/completed updates, use a WorkflowEvent link + // with a RequestIdReference pointing to the accepted event. For rejected updates (stage + // COMPLETED with a failure outcome and no acceptance), use a Workflow link since rejected + // updates don't write any event to history. + requestID := u.req.GetRequest().GetRequest().GetRequestId() + if status.Outcome.GetFailure() != nil && status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED { + // Rejected update: no event in history, link to the workflow itself. + resp.Response.Link = &commonpb.Link{ + Variant: &commonpb.Link_Workflow_{ + Workflow: &commonpb.Link_Workflow{ + Namespace: u.req.Request.Namespace, + WorkflowId: u.wfKey.WorkflowID, + RunId: u.wfKey.RunID, + Reason: "Update rejected", + }, + }, + } + } else if status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED || status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED { + // Accepted or completed update: link to the accepted event. + resp.Response.Link = &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: u.req.Request.Namespace, + WorkflowId: u.wfKey.WorkflowID, + RunId: u.wfKey.RunID, + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: requestID, + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED, + }, + }, + }, + }, + } + } return resp, nil } diff --git a/service/history/api/updateworkflowoptions/api.go b/service/history/api/updateworkflowoptions/api.go index a2defe8f766..43204a5f40d 100644 --- a/service/history/api/updateworkflowoptions/api.go +++ b/service/history/api/updateworkflowoptions/api.go @@ -178,7 +178,7 @@ func MergeAndApply( if mergedOpts.GetVersioningOverride() == nil { unsetOverride = true } - _, err = ms.AddWorkflowExecutionOptionsUpdatedEvent(mergedOpts.GetVersioningOverride(), unsetOverride, "", nil, nil, identity, mergedOpts.GetPriority(), mergedOpts.GetTimeSkippingConfig()) + _, err = ms.AddWorkflowExecutionOptionsUpdatedEvent(mergedOpts.GetVersioningOverride(), unsetOverride, "", nil, nil, identity, mergedOpts.GetPriority(), mergedOpts.GetTimeSkippingConfig(), nil) if err != nil { return nil, hasChanges, err } diff --git a/service/history/api/updateworkflowoptions/api_test.go b/service/history/api/updateworkflowoptions/api_test.go index 07d31ec7289..8143b52cdd2 100644 --- a/service/history/api/updateworkflowoptions/api_test.go +++ b/service/history/api/updateworkflowoptions/api_test.go @@ -304,7 +304,7 @@ func (s *updateWorkflowOptionsSuite) TestInvoke_Success() { ).Return(&matchingservice.CheckTaskQueueVersionMembershipResponse{ IsMember: true, }, nil) - s.currentMutableState.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(expectedOverrideOptions.VersioningOverride, false, "", nil, nil, "", expectedOverrideOptions.Priority, expectedOverrideOptions.TimeSkippingConfig).Return(&historypb.HistoryEvent{}, nil) + s.currentMutableState.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(expectedOverrideOptions.VersioningOverride, false, "", nil, nil, "", expectedOverrideOptions.Priority, expectedOverrideOptions.TimeSkippingConfig, nil).Return(&historypb.HistoryEvent{}, nil) s.currentContext.EXPECT().UpdateWorkflowExecutionAsActive(gomock.Any(), s.shardContext).Return(nil) updateReq := &historyservice.UpdateWorkflowExecutionOptionsRequest{ @@ -498,7 +498,7 @@ func TestMergeAndApply_TimeSkippingConfig(t *testing.T) { Config: tc.initialConfig, }, }).AnyTimes() - ms.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, "", nil, gomock.Any()).Return(&historypb.HistoryEvent{}, nil) + ms.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, "", nil, gomock.Any(), gomock.Any()).Return(&historypb.HistoryEvent{}, nil) result, hasChanges, err := MergeAndApply(ms, tc.updateOptions, tc.updateMask, "") require.NoError(t, err) diff --git a/service/history/configs/config.go b/service/history/configs/config.go index 57cc6ae1e1d..e8e7c592eb4 100644 --- a/service/history/configs/config.go +++ b/service/history/configs/config.go @@ -70,9 +70,11 @@ type Config struct { EnableTransitionHistory dynamicconfig.BoolPropertyFnWithNamespaceFilter MaxCallbacksPerWorkflow dynamicconfig.IntPropertyFnWithNamespaceFilter MaxCallbacksPerExecution dynamicconfig.IntPropertyFnWithNamespaceFilter + MaxCallbacksPerUpdateID dynamicconfig.IntPropertyFnWithNamespaceFilter EnableChasm dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMSignalBacklinks dynamicconfig.BoolPropertyFnWithNamespaceFilter + EnableWorkflowUpdateCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter ChasmMaxInMemoryPureTasks dynamicconfig.IntPropertyFn EnableCHASMSchedulerCreation dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMSchedulerMigration dynamicconfig.BoolPropertyFnWithNamespaceFilter @@ -495,15 +497,17 @@ func NewConfig( EnableTransitionHistory: dynamicconfig.EnableTransitionHistory.Get(dc), MaxCallbacksPerWorkflow: dynamicconfig.MaxCallbacksPerWorkflow.Get(dc), MaxCallbacksPerExecution: callback.MaxPerExecution.Get(dc), + MaxCallbacksPerUpdateID: dynamicconfig.MaxCallbacksPerUpdateID.Get(dc), EnableChasm: dynamicconfig.EnableChasm.Get(dc), ChasmMaxInMemoryPureTasks: dynamicconfig.ChasmMaxInMemoryPureTasks.Get(dc), EnableCHASMSchedulerCreation: dynamicconfig.EnableCHASMSchedulerCreation.Get(dc), EnableCHASMSchedulerMigration: dynamicconfig.EnableCHASMSchedulerMigration.Get(dc), - EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), - EnableCHASMSignalBacklinks: dynamicconfig.EnableCHASMSignalBacklinks.Get(dc), - ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), + EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), + EnableCHASMSignalBacklinks: dynamicconfig.EnableCHASMSignalBacklinks.Get(dc), + ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), + EnableWorkflowUpdateCallbacks: dynamicconfig.EnableWorkflowUpdateCallbacks.Get(dc), EventsShardLevelCacheMaxSizeBytes: dynamicconfig.EventsCacheMaxSizeBytes.Get(dc), // 512KB EventsHostLevelCacheMaxSizeBytes: dynamicconfig.EventsHostLevelCacheMaxSizeBytes.Get(dc), // 256MB diff --git a/service/history/historybuilder/event_factory.go b/service/history/historybuilder/event_factory.go index 0d2cdb04695..04c51ecf37c 100644 --- a/service/history/historybuilder/event_factory.go +++ b/service/history/historybuilder/event_factory.go @@ -407,6 +407,7 @@ func (b *EventFactory) CreateWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) *historypb.HistoryEvent { event := b.createHistoryEvent(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, b.timeSource.Now()) event.Attributes = &historypb.HistoryEvent_WorkflowExecutionOptionsUpdatedEventAttributes{ @@ -418,6 +419,7 @@ func (b *EventFactory) CreateWorkflowExecutionOptionsUpdatedEvent( Identity: identity, Priority: priority, TimeSkippingConfig: timeSkippingConfig, + WorkflowUpdateOptions: workflowUpdateOptions, }, } event.Links = links diff --git a/service/history/historybuilder/history_builder.go b/service/history/historybuilder/history_builder.go index 35416a48396..bf69f2fc5ca 100644 --- a/service/history/historybuilder/history_builder.go +++ b/service/history/historybuilder/history_builder.go @@ -475,6 +475,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) *historypb.HistoryEvent { event := b.EventFactory.CreateWorkflowExecutionOptionsUpdatedEvent( worker_versioning.ConvertOverrideToV32(versioningOverride), @@ -485,6 +486,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionOptionsUpdatedEvent( identity, priority, timeSkippingConfig, + workflowUpdateOptions, ) event, _ = b.EventStore.add(event) return event diff --git a/service/history/historybuilder/history_builder_categorization_test.go b/service/history/historybuilder/history_builder_categorization_test.go index 14a9029a1c0..450212c90d5 100644 --- a/service/history/historybuilder/history_builder_categorization_test.go +++ b/service/history/historybuilder/history_builder_categorization_test.go @@ -208,7 +208,7 @@ func TestHistoryBuilder_FlushBufferToCurrentBatch(t *testing.T) { t.Errorf("expected 1 event in memBufferBatch got %d", len(hb.memBufferBatch)) } // add another event to memBufferBatch - hb.AddWorkflowExecutionOptionsUpdatedEvent(nil, false, "request-id-1", nil, nil, "", nil, nil) + hb.AddWorkflowExecutionOptionsUpdatedEvent(nil, false, "request-id-1", nil, nil, "", nil, nil, nil) if len(hb.memBufferBatch) != 2 { t.Errorf("expected 2 event in memBufferBatch got %d", len(hb.memBufferBatch)) } diff --git a/service/history/interfaces/mutable_state.go b/service/history/interfaces/mutable_state.go index a0b85253cf7..2698a95a663 100644 --- a/service/history/interfaces/mutable_state.go +++ b/service/history/interfaces/mutable_state.go @@ -127,10 +127,11 @@ type ( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) (*historypb.HistoryEvent, error) - AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID string, acceptedRequestMessageId string, acceptedRequestSequencingEventId int64, acceptedRequest *updatepb.Request) (*historypb.HistoryEvent, error) + AddWorkflowExecutionUpdateAcceptedEvent(updateID string, acceptedRequestMessageID string, acceptedRequestSequencingEventID int64, acceptedRequest *updatepb.Request) (*historypb.HistoryEvent, error) AddWorkflowExecutionUpdateCompletedEvent(acceptedEventID int64, updResp *updatepb.Response) (*historypb.HistoryEvent, error) - RejectWorkflowExecutionUpdate(protocolInstanceID string, updRejection *updatepb.Rejection) error + RejectWorkflowExecutionUpdate(updateID string, failure *failurepb.Failure) error AddWorkflowExecutionUpdateAdmittedEvent(request *updatepb.Request, origin enumspb.UpdateAdmittedEventOrigin) (*historypb.HistoryEvent, error) ApplyWorkflowExecutionUpdateAdmittedEvent(event *historypb.HistoryEvent, batchId int64) error VisitUpdates(visitor func(updID string, updInfo *persistencespb.UpdateInfo)) diff --git a/service/history/interfaces/mutable_state_mock.go b/service/history/interfaces/mutable_state_mock.go index 20378e67ac4..684b648df54 100644 --- a/service/history/interfaces/mutable_state_mock.go +++ b/service/history/interfaces/mutable_state_mock.go @@ -678,18 +678,18 @@ func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionCanceledEvent(arg0, } // AddWorkflowExecutionOptionsUpdatedEvent mocks base method. -func (m *MockMutableState) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride *workflow.VersioningOverride, unsetVersioningOverride bool, attachRequestID string, attachCompletionCallbacks []*common.Callback, links []*common.Link, identity string, priority *common.Priority, timeSkippingConfig *workflow.TimeSkippingConfig) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride *workflow.VersioningOverride, unsetVersioningOverride bool, attachRequestID string, attachCompletionCallbacks []*common.Callback, links []*common.Link, identity string, priority *common.Priority, timeSkippingConfig *workflow.TimeSkippingConfig, workflowUpdateOptions []*history.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionOptionsUpdatedEvent", versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig) + ret := m.ctrl.Call(m, "AddWorkflowExecutionOptionsUpdatedEvent", versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionOptionsUpdatedEvent indicates an expected call of AddWorkflowExecutionOptionsUpdatedEvent. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionOptionsUpdatedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionOptionsUpdatedEvent), versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionOptionsUpdatedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionOptionsUpdatedEvent), versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) } // AddWorkflowExecutionPausedEvent mocks base method. @@ -813,18 +813,18 @@ func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUnpausedEvent(identi } // AddWorkflowExecutionUpdateAcceptedEvent mocks base method. -func (m *MockMutableState) AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId string, acceptedRequestSequencingEventId int64, acceptedRequest *update.Request) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID string, acceptedRequestSequencingEventID int64, acceptedRequest *update.Request) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionUpdateAcceptedEvent", protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + ret := m.ctrl.Call(m, "AddWorkflowExecutionUpdateAcceptedEvent", updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionUpdateAcceptedEvent indicates an expected call of AddWorkflowExecutionUpdateAcceptedEvent. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionUpdateAcceptedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionUpdateAcceptedEvent), protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionUpdateAcceptedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionUpdateAcceptedEvent), updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) } // AddWorkflowExecutionUpdateAdmittedEvent mocks base method. @@ -3431,17 +3431,17 @@ func (mr *MockMutableStateMockRecorder) RegenerateActivityRetryTask(ai, newSched } // RejectWorkflowExecutionUpdate mocks base method. -func (m *MockMutableState) RejectWorkflowExecutionUpdate(protocolInstanceID string, updRejection *update.Rejection) error { +func (m *MockMutableState) RejectWorkflowExecutionUpdate(updateID string, arg1 *failure.Failure) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "RejectWorkflowExecutionUpdate", protocolInstanceID, updRejection) + ret := m.ctrl.Call(m, "RejectWorkflowExecutionUpdate", updateID, arg1) ret0, _ := ret[0].(error) return ret0 } // RejectWorkflowExecutionUpdate indicates an expected call of RejectWorkflowExecutionUpdate. -func (mr *MockMutableStateMockRecorder) RejectWorkflowExecutionUpdate(protocolInstanceID, updRejection any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) RejectWorkflowExecutionUpdate(updateID, arg1 any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RejectWorkflowExecutionUpdate", reflect.TypeOf((*MockMutableState)(nil).RejectWorkflowExecutionUpdate), protocolInstanceID, updRejection) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RejectWorkflowExecutionUpdate", reflect.TypeOf((*MockMutableState)(nil).RejectWorkflowExecutionUpdate), updateID, arg1) } // RemoveSpeculativeWorkflowTaskTimeoutTask mocks base method. diff --git a/service/history/ndc/events_reapplier_test.go b/service/history/ndc/events_reapplier_test.go index 2f5995dc160..8608083e52b 100644 --- a/service/history/ndc/events_reapplier_test.go +++ b/service/history/ndc/events_reapplier_test.go @@ -115,6 +115,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_WorkflowExec attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() msCurrent.EXPECT().IsWorkflowPendingOnWorkflowTaskBackoff().Return(true) @@ -163,6 +164,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_WorkflowExec attr.GetIdentity(), attr.GetPriority(), timeSkippingConfig, + attr.GetWorkflowUpdateOptions(), ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() msCurrent.EXPECT().IsWorkflowPendingOnWorkflowTaskBackoff().Return(true) diff --git a/service/history/ndc/workflow_resetter.go b/service/history/ndc/workflow_resetter.go index 26d7fb1c537..f7459c15555 100644 --- a/service/history/ndc/workflow_resetter.go +++ b/service/history/ndc/workflow_resetter.go @@ -972,6 +972,7 @@ func reapplyEvents( attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ); err != nil { return reappliedEvents, err } diff --git a/service/history/ndc/workflow_resetter_test.go b/service/history/ndc/workflow_resetter_test.go index 4da5bcfc064..54c8f1343c8 100644 --- a/service/history/ndc/workflow_resetter_test.go +++ b/service/history/ndc/workflow_resetter_test.go @@ -1207,6 +1207,7 @@ func (s *workflowResetterSuite) TestReapplyEvents() { attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ).Return(&historypb.HistoryEvent{}, nil) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED: attr := event.GetWorkflowExecutionSignaledEventAttributes() @@ -1737,6 +1738,7 @@ func (s *workflowResetterSuite) TestReapplyEvents_WorkflowOptionsUpdated_WithTim attr.GetIdentity(), attr.GetPriority(), timeSkippingConfig, + attr.GetWorkflowUpdateOptions(), ).Return(&historypb.HistoryEvent{}, nil) appliedEvents, err := reapplyEvents(context.Background(), ms, nil, smReg, []*historypb.HistoryEvent{event}, nil, "", true) diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index e5d410475cf..2bfad286d93 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -35,7 +35,6 @@ import ( tokenspb "go.temporal.io/server/api/token/v1" workflowspb "go.temporal.io/server/api/workflow/v1" "go.temporal.io/server/chasm" - "go.temporal.io/server/chasm/lib/callback" chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" "go.temporal.io/server/common/backoff" @@ -740,6 +739,112 @@ func (ms *MutableStateImpl) EndpointRegistry() chasm.EndpointRegistry { return ms.endpointRegistry } +func (ms *MutableStateImpl) GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, +) (_ nexusrpc.CompleteOperationOptions, err error) { + var closeTime time.Time + cevent, err := ms.getUpdateOutcomeEvent(ctx, updateID) + var outcome *updatepb.Outcome + if err != nil { + // If the workflow is complete but the update outcome is missing we need to respond to all callbacks + ce, errCE := ms.GetCompletionEvent(ctx) + if errors.Is(errCE, ErrMissingWorkflowCompletionEvent) { + return nexusrpc.CompleteOperationOptions{}, err + } else if errCE != nil { + return nexusrpc.CompleteOperationOptions{}, errCE + } + outcome = &updatepb.Outcome{ + Value: &updatepb.Outcome_Failure{ + Failure: common.CloneProto(update.AcceptedUpdateCompletedWorkflowFailure), + }, + } + closeTime = ce.GetEventTime().AsTime() + } else { + outcome = cevent.GetWorkflowExecutionUpdateCompletedEventAttributes().GetOutcome() + closeTime = cevent.GetEventTime().AsTime() + } + + // Create a RequestIdReference link for the update callback. This is preferred over an + // EventReference link because the requestID is always available, whereas the accepted + // event ID may not be resolvable (e.g., when the workflow completed before the update). + // Note: rejected updates are removed from mutable state, so this code path is only + // reachable for accepted/completed updates. + link := &commonpb.Link_WorkflowEvent{ + Namespace: ms.namespaceEntry.Name().String(), + WorkflowId: ms.executionInfo.WorkflowId, + RunId: ms.executionState.RunId, + } + requestIDInfo, exists := ms.executionState.RequestIds[requestID] + if exists { + link.Reference = &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: requestID, + EventType: requestIDInfo.GetEventType(), + }, + } + } + startLink := commonnexus.ConvertLinkWorkflowEventToNexusLink(link) + + startTime := ms.executionState.GetStartTime().AsTime() + links := []nexus.Link{startLink} + + if outcome.GetSuccess() != nil { + return nexusCompleteOperationSuccess(outcome.GetSuccess(), startTime, closeTime, links), nil + } else if outcome.GetFailure() != nil { + return nexusCompleteOperationFailure(outcome.GetFailure(), nexus.OperationStateFailed, "operation failed", startTime, closeTime, links) + } + return nexusrpc.CompleteOperationOptions{}, serviceerror.NewInternalf("unknown update outcome for update ID: %s", updateID) +} + +// nexusCompleteOperationSuccess constructs a successful CompleteOperationOptions from the given payloads. +// Only the first payload is used since Nexus does not support multi-value returns. +func nexusCompleteOperationSuccess( + result *commonpb.Payloads, + startTime, closeTime time.Time, + links []nexus.Link, +) nexusrpc.CompleteOperationOptions { + var p *commonpb.Payload + if payloads := result.GetPayloads(); len(payloads) > 0 { + p = payloads[0] + } + return nexusrpc.CompleteOperationOptions{ + Result: p, + StartTime: startTime, + CloseTime: closeTime, + Links: links, + } +} + +// nexusCompleteOperationFailure constructs a failed CompleteOperationOptions from the given failure. +func nexusCompleteOperationFailure( + f *failurepb.Failure, + state nexus.OperationState, + message string, + startTime, closeTime time.Time, + links []nexus.Link, +) (nexusrpc.CompleteOperationOptions, error) { + nexusFailure, err := commonnexus.TemporalFailureToNexusFailure(f) + if err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + opErr := &nexus.OperationError{ + Message: message, + State: state, + Cause: &nexus.FailureError{Failure: nexusFailure}, + } + if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + return nexusrpc.CompleteOperationOptions{ + Error: opErr, + StartTime: startTime, + CloseTime: closeTime, + Links: links, + }, nil +} + // GetNexusCompletion converts a workflow completion event into a [nexus.OperationCompletion]. // Completions may be sent to arbitrary third parties, we intentionally do not include any termination reasons, and // expose only failure messages. @@ -781,118 +886,57 @@ func (ms *MutableStateImpl) GetNexusCompletion( } startLink := commonnexus.ConvertLinkWorkflowEventToNexusLink(link) + startTime := ms.executionState.GetStartTime().AsTime() + closeTime := ce.GetEventTime().AsTime() + links := []nexus.Link{startLink} + switch ce.GetEventType() { case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_COMPLETED: - payloads := ce.GetWorkflowExecutionCompletedEventAttributes().GetResult().GetPayloads() - var p *commonpb.Payload // default to nil, the payload serializer converts nil to Nexus nil Content. - if len(payloads) > 0 { - // All of our SDKs support returning a single value from workflows, we can safely ignore the - // rest of the payloads. Additionally, even if a workflow could return more than a single value, - // Nexus does not support it. - p = payloads[0] - } - return nexusrpc.CompleteOperationOptions{ - Result: p, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + return nexusCompleteOperationSuccess( + ce.GetWorkflowExecutionCompletedEventAttributes().GetResult(), + startTime, closeTime, links, + ), nil case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_FAILED: - f, err := commonnexus.TemporalFailureToNexusFailure(ce.GetWorkflowExecutionFailedEventAttributes().GetFailure()) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - Message: "operation failed", - State: nexus.OperationStateFailed, - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + return nexusCompleteOperationFailure( + ce.GetWorkflowExecutionFailedEventAttributes().GetFailure(), + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_CANCELED: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation canceled", - FailureInfo: &failurepb.Failure_CanceledFailureInfo{ - CanceledFailureInfo: &failurepb.CanceledFailureInfo{ - Details: ce.GetWorkflowExecutionCanceledEventAttributes().GetDetails(), + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation canceled", + FailureInfo: &failurepb.Failure_CanceledFailureInfo{ + CanceledFailureInfo: &failurepb.CanceledFailureInfo{ + Details: ce.GetWorkflowExecutionCanceledEventAttributes().GetDetails(), + }, }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateCanceled, - Message: "operation canceled", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateCanceled, "operation canceled", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TERMINATED: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation terminated", - FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ - TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{}, + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation terminated", + FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ + TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{}, + }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateFailed, - Message: "operation failed", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TIMED_OUT: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation exceeded internal timeout", - FailureInfo: &failurepb.Failure_TimeoutFailureInfo{ - TimeoutFailureInfo: &failurepb.TimeoutFailureInfo{ - // Not filling in timeout type and other information, it's not particularly interesting to a Nexus - // caller. + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation exceeded internal timeout", + FailureInfo: &failurepb.Failure_TimeoutFailureInfo{ + TimeoutFailureInfo: &failurepb.TimeoutFailureInfo{}, }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateFailed, - Message: "operation failed", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) } return nexusrpc.CompleteOperationOptions{}, serviceerror.NewInternalf("invalid workflow execution status: %v", ce.GetEventType()) } @@ -1467,6 +1511,17 @@ func (ms *MutableStateImpl) GetUpdateOutcome( ctx context.Context, updateID string, ) (*updatepb.Outcome, error) { + event, err := ms.getUpdateOutcomeEvent(ctx, updateID) + if err != nil { + return nil, err + } + return event.GetWorkflowExecutionUpdateCompletedEventAttributes().GetOutcome(), nil +} + +func (ms *MutableStateImpl) getUpdateOutcomeEvent( + ctx context.Context, + updateID string, +) (*historypb.HistoryEvent, error) { if ms.executionInfo.UpdateInfos == nil { return nil, serviceerror.NewNotFound("update not found") } @@ -1493,11 +1548,10 @@ func (ms *MutableStateImpl) GetUpdateOutcome( if err != nil { return nil, err } - attrs := event.GetWorkflowExecutionUpdateCompletedEventAttributes() - if attrs == nil { + if event.GetWorkflowExecutionUpdateCompletedEventAttributes() == nil { return nil, serviceerror.NewInternal("event pointer does not reference an update completed event") } - return attrs.GetOutcome(), nil + return event, nil } func (ms *MutableStateImpl) GetActivityScheduledEvent( @@ -3237,6 +3291,43 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUnpausedEvent(event *historypb return ms.updatePauseInfoSearchAttribute() } +func (ms *MutableStateImpl) addUpdateCallbacks( + event *historypb.HistoryEvent, + updateID string, + requestID string, + updateCallbacks []*commonpb.Callback, +) error { + if len(updateCallbacks) == 0 { + return nil + } + if ms.chasmCallbacksEnabled() && ms.config.EnableWorkflowUpdateCallbacks(ms.GetNamespaceEntry().Name().String()) { + // Initialize chasm tree once for new workflows. + // Using context.Background() because this is done outside an actual request context and the + // chasmworkflow.NewWorkflow does not actually use it currently. + ms.EnsureChasmWorkflowComponent(context.Background()) + return ms.addUpdateCallbacksChasm(event, updateID, requestID, updateCallbacks) + } + + return nil +} + +func (ms *MutableStateImpl) addUpdateCallbacksChasm( + event *historypb.HistoryEvent, + updateID string, + requestID string, + updateCallbacks []*commonpb.Callback, +) error { + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + + nsName := ms.GetNamespaceEntry().Name().String() + maxCallbacksPerWorkflow := ms.config.MaxCallbacksPerWorkflow(nsName) + maxCallbacksPerUpdateID := ms.config.MaxCallbacksPerUpdateID(nsName) + return wf.AddUpdateCompletionCallbacks(ctx, event.EventTime, updateID, requestID, updateCallbacks, maxCallbacksPerWorkflow, maxCallbacksPerUpdateID) +} + func (ms *MutableStateImpl) addCompletionCallbacks( event *historypb.HistoryEvent, requestID string, @@ -4743,7 +4834,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionFailedEvent( if attrs.RetryState != enumspb.RETRY_STATE_IN_PROGRESS { return ms.processCloseCallbacks() } - return nil + // Workflow-level callbacks are inherited by the retry run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddTimeoutWorkflowEvent( @@ -4791,7 +4884,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionTimedoutEvent( if attrs.RetryState != enumspb.RETRY_STATE_IN_PROGRESS { return ms.processCloseCallbacks() } - return nil + // Workflow-level callbacks are inherited by the retry run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddWorkflowExecutionCancelRequestedEvent( @@ -5452,22 +5547,44 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateAdmittedEvent(event *his ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} ms.writeEventToCache(event) - return nil + + // Store completion callbacks from the update request at admission time. + // This is needed for the reset/reapply case where the UpdateAccepted event + // may have a nil AcceptedRequest (because the UpdateAdmitted event already + // contains the request), causing callbacks to be lost at acceptance time. + requestID := attrs.GetRequest().GetRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + return ms.addUpdateCallbacks( + event, + updateID, + requestID, + attrs.GetRequest().GetCompletionCallbacks(), + ) } func (ms *MutableStateImpl) AddWorkflowExecutionUpdateAcceptedEvent( - protocolInstanceID string, - acceptedRequestMessageId string, - acceptedRequestSequencingEventId int64, + updateID string, + acceptedRequestMessageID string, + acceptedRequestSequencingEventID int64, acceptedRequest *updatepb.Request, ) (*historypb.HistoryEvent, error) { if err := ms.checkMutability(tag.WorkflowActionUpdateAccepted); err != nil { return nil, err } - event := ms.hBuilder.AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + event := ms.hBuilder.AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) if err := ms.ApplyWorkflowExecutionUpdateAcceptedEvent(event); err != nil { return nil, err } + // Add links from Nexus callbacks to the event. + callbacksLinks := make([]*commonpb.Link, 0) + for _, cb := range acceptedRequest.GetCompletionCallbacks() { + if cb.GetNexus() != nil { + callbacksLinks = append(callbacksLinks, cb.GetLinks()...) + } + } + event.Links = callbacksLinks return event, nil } @@ -5505,6 +5622,27 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateAcceptedEvent( ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} ms.writeEventToCache(event) + // Add update completion callbacks. + // This is the primary path for registering callbacks — AcceptedRequest is + // present in the normal flow. The exception is the reset/reapply case where + // callbacks are registered at admission time instead (because the + // UpdateAccepted event has a nil AcceptedRequest after reset). In that case, + // addCallbacksToMap is a no-op since the requestID-indexed keys already + // exist from the admitted event. + if attrs.GetAcceptedRequest() != nil { + requestID := attrs.GetAcceptedRequest().GetRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + if err := ms.addUpdateCallbacks( + event, + updateID, + requestID, + attrs.GetAcceptedRequest().GetCompletionCallbacks(), + ); err != nil { + return err + } + } return nil } @@ -5551,13 +5689,60 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateCompletedEvent( sizeDelta = ui.Size() - sizeBefore ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} + if ms.ChasmEnabled() { + if err := ms.processUpdateCallbacks(updateID); err != nil { + return err + } + } ms.writeEventToCache(event) return nil } -func (ms *MutableStateImpl) RejectWorkflowExecutionUpdate(_ string, _ *updatepb.Rejection) error { - // TODO (alex-update): This method is noop because we don't currently write rejections to the history. - return nil +func (ms *MutableStateImpl) RejectWorkflowExecutionUpdate(updateID string, wfFailure *failurepb.Failure) error { + if !ms.chasmCallbacksEnabled() { + return nil + } + + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + + // Return early if there are no CHASM update callbacks for this update. + if _, ok := wf.Updates[updateID]; !ok { + return nil + } + + // Store the rejection failure and fire the callbacks. + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + return wf.RejectUpdate(ctx, updateID, wfFailure) +} + +// processUpdateCallbacks triggers "UpdateFinished" callbacks using the CHASM implementation. +func (ms *MutableStateImpl) processUpdateCallbacks(updateID string) error { + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + + // Return early if there are no chasm callbacks to process for this update ID. + if len(wf.Updates) == 0 { + return nil + } + if _, ok := wf.Updates[updateID]; !ok { + return nil + } + + // If there are callbacks to process, create a writable workflow component. + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + + return wf.ProcessUpdateCallbacks(ctx, updateID) } func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( @@ -5569,6 +5754,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) (*historypb.HistoryEvent, error) { if err := ms.checkMutability(tag.WorkflowActionWorkflowOptionsUpdated); err != nil { return nil, err @@ -5582,6 +5768,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( identity, priority, timeSkippingConfig, + workflowUpdateOptions, ) prevEffectiveVersioningBehavior := ms.GetEffectiveVersioningBehavior() prevEffectiveDeployment := ms.GetEffectiveDeployment() @@ -5634,6 +5821,33 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionOptionsUpdatedEvent(event *his return err } + // Add update callbacks + for _, updateOptions := range attributes.GetWorkflowUpdateOptions() { + updateID := updateOptions.GetUpdateId() + requestID := updateOptions.GetAttachedRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + if err := ms.addUpdateCallbacks( + event, + updateID, + requestID, + updateOptions.GetAttachedCompletionCallbacks(), + ); err != nil { + return err + } + // If the update is already completed, fire the callbacks immediately. + if ms.ChasmEnabled() { + if ui, ok := ms.executionInfo.UpdateInfos[updateID]; ok { + if _, isCompleted := ui.Value.(*persistencespb.UpdateInfo_Completion); isCompleted { + if err := ms.processUpdateCallbacks(updateID); err != nil { + return err + } + } + } + } + } + // Update priority. if attributes.GetPriority() != nil { if !proto.Equal(ms.executionInfo.Priority, attributes.GetPriority()) { @@ -6034,7 +6248,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionContinuedAsNewEvent( ms.executionInfo.CloseTime = continueAsNewEvent.GetEventTime() ms.ClearStickyTaskQueue() ms.writeEventToCache(continueAsNewEvent) - return nil + // Workflow-level callbacks are inherited by the new run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddStartChildWorkflowExecutionInitiatedEvent( @@ -6851,6 +7067,35 @@ func (ms *MutableStateImpl) AddExternalPayloadCount(count int64) { ms.executionInfo.ExecutionStats.ExternalPayloadCount += count } +// processUpdateCloseCallbacks triggers only update-level callbacks, leaving workflow-level +// callbacks untouched. This is used when the workflow is continuing to a new run +// (ContinueAsNew, retry, cron): workflow-level callbacks are inherited by the new run, +// but update callbacks must fire now because the update was aborted on the old run. +// +// Note: unlike processCloseCallbacks, this does not need a WorkflowWasReset guard. +// Reset always terminates the old run (via terminateWorkflow), which goes through +// processCloseCallbacks — not through the retry/CAN paths that call this method. +func (ms *MutableStateImpl) processUpdateCloseCallbacks() error { + if !ms.ChasmEnabled() { + // Update callbacks are only supported in CHASM mode. + return nil + } + + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + if len(wf.Updates) == 0 { + return nil + } + + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + return wf.ProcessAllUpdateCloseCallbacks(ctx) +} + // processCloseCallbacks triggers "WorkflowClosed" callbacks, applying the state machine transition that schedules // callback tasks. func (ms *MutableStateImpl) processCloseCallbacks() error { @@ -6903,7 +7148,7 @@ func (ms *MutableStateImpl) processCloseCallbacksChasm() error { } // Return early if there are no chasm callbacks to process. - if len(wf.Callbacks) == 0 { + if len(wf.Callbacks) == 0 && len(wf.Updates) == 0 { return nil } @@ -6913,7 +7158,7 @@ func (ms *MutableStateImpl) processCloseCallbacksChasm() error { return err } - return callback.ScheduleStandbyCallbacks(ctx, wf.Callbacks) + return wf.ProcessCloseCallbacks(ctx) } func (ms *MutableStateImpl) AddTasks( diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index df62e74f7d2..8c65db2017b 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -1158,7 +1158,7 @@ func (s *mutableStateSuite) TestOverride_UnpinnedBase_SetPinnedAndUnsetWithEmpty s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) // set pinned override - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions2.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions2.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment2, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1173,7 +1173,7 @@ func (s *mutableStateSuite) TestOverride_UnpinnedBase_SetPinnedAndUnsetWithEmpty // unset pinned override with boolean id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1195,7 +1195,7 @@ func (s *mutableStateSuite) TestOverride_PinnedBase_SetUnpinnedAndUnsetWithEmpty s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) // set unpinned override - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(unpinnedOptions.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(unpinnedOptions.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1210,7 +1210,7 @@ func (s *mutableStateSuite) TestOverride_PinnedBase_SetUnpinnedAndUnsetWithEmpty // unset pinned override with empty id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1231,7 +1231,7 @@ func (s *mutableStateSuite) TestOverride_RedirectFails() { id := uuid.NewString() s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment3, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1258,7 +1258,7 @@ func (s *mutableStateSuite) TestOverride_BaseDeploymentUpdatedOnCompletion() { id := uuid.NewString() s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment3, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1312,7 +1312,7 @@ func (s *mutableStateSuite) TestOverride_BaseDeploymentUpdatedOnCompletion() { // now we unset the override and check that the base deployment/behavior is in effect id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment2, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( diff --git a/service/history/workflow/update/abort_reason.go b/service/history/workflow/update/abort_reason.go index 2b3737125f9..a7594a8a35b 100644 --- a/service/history/workflow/update/abort_reason.go +++ b/service/history/workflow/update/abort_reason.go @@ -58,10 +58,10 @@ var reasonStateMatrix = map[reasonState]failureError{ // There can be different types of Update failures coming from worker and a client must handle them anyway. // It is easier and less error-prone for a client to handle only Update failures instead of both failures and // not obvious NotFound errors in case if the Workflow completes before the Update completes. - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompleted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompletedAfterAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompleted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompletedAfterAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, // Completed Updates can't be aborted. reasonState{r: AbortReasonWorkflowCompleted, st: stateCompleted}: {f: nil, err: nil}, reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAborted}: {f: nil, err: nil}, @@ -74,10 +74,10 @@ var reasonStateMatrix = map[reasonState]failureError{ reasonState{r: AbortReasonWorkflowContinuing, st: stateAdmitted}: {f: nil, err: consts.ErrWorkflowClosing}, reasonState{r: AbortReasonWorkflowContinuing, st: stateSent}: {f: nil, err: consts.ErrWorkflowClosing}, // Accepted Update can't be applied to the new run, and must be failed same way as if Workflow is completed. - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompleted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompletedAfterAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompleted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompletedAfterAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, // Completed Updates can't be aborted. reasonState{r: AbortReasonWorkflowContinuing, st: stateCompleted}: {f: nil, err: nil}, reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAborted}: {f: nil, err: nil}, @@ -121,6 +121,8 @@ func (r AbortReason) String() string { return "WorkflowCompleted" case AbortReasonWorkflowContinuing: return "WorkflowContinuing" + case AbortReasonWorkflowTaskFailed: + return "WorkflowTaskFailed" case lastAbortReason: return fmt.Sprintf("invalid reason %d", r) } diff --git a/service/history/workflow/update/errors_failures.go b/service/history/workflow/update/errors_failures.go index 6e14883018e..c20c57aaf41 100644 --- a/service/history/workflow/update/errors_failures.go +++ b/service/history/workflow/update/errors_failures.go @@ -24,7 +24,7 @@ var ( }}, } - acceptedUpdateCompletedWorkflowFailure = &failurepb.Failure{ + AcceptedUpdateCompletedWorkflowFailure = &failurepb.Failure{ Message: "Workflow Update failed because the Workflow completed before the Update completed.", Source: "Server", FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{ diff --git a/service/history/workflow/update/export_test.go b/service/history/workflow/update/export_test.go index f9a78e89d72..fc56d39c748 100644 --- a/service/history/workflow/update/export_test.go +++ b/service/history/workflow/update/export_test.go @@ -8,9 +8,10 @@ var ( // while we *could* write the unit test code to walk an Update through a // series of message deliveries to get to the right state, it's much faster // just to instantiate directly into the desired state. + NewAdmitted = newAdmitted NewAccepted = newAccepted NewCompleted = newCompleted - AbortFailure = acceptedUpdateCompletedWorkflowFailure + AbortFailure = AcceptedUpdateCompletedWorkflowFailure ) // ObserveCompletion exports withOnComplete to unit tests diff --git a/service/history/workflow/update/store.go b/service/history/workflow/update/store.go index c5520714f62..b0fc93fa7c7 100644 --- a/service/history/workflow/update/store.go +++ b/service/history/workflow/update/store.go @@ -3,8 +3,11 @@ package update import ( "context" + commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common/effect" ) @@ -43,7 +46,34 @@ type ( resp *updatepb.Response, ) (*historypb.HistoryEvent, error) + // AddWorkflowExecutionOptionsUpdatedEvent writes a workflow execution + // options updated event. This is used to attach completion callbacks, + // request IDs, links, and per-update callback options to the workflow. + // The data may not be durable when this function returns. + AddWorkflowExecutionOptionsUpdatedEvent( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) + // CanAddEvent returns true if an event can be added to the EventStore. CanAddEvent() bool + + // RejectWorkflowExecutionUpdate notifies the store that an update was + // rejected by the worker's validator. The store uses this to fire any + // completion callbacks that were registered at admission time and to + // clean up the update's mutable-state entry. + RejectWorkflowExecutionUpdate(updateID string, rejectionFailure *failurepb.Failure) error + + // HasRequestID checks whether the given requestID has already been + // recorded for this workflow execution. Used by AttachCallbacks to deduplicate + // callback attachment when the same request is retried. + HasRequestID(requestID string) bool } ) diff --git a/service/history/workflow/update/store_mock_test.go b/service/history/workflow/update/store_mock_test.go index 2e5469af539..8fb08d5c69c 100644 --- a/service/history/workflow/update/store_mock_test.go +++ b/service/history/workflow/update/store_mock_test.go @@ -3,9 +3,12 @@ package update_test import ( "context" + commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common/effect" "go.temporal.io/server/service/history/workflow/update" @@ -72,7 +75,37 @@ type mockEventStore struct { resp *updatepb.Response, ) (*historypb.HistoryEvent, error) - CanAddEventFunc func() bool + AddWorkflowExecutionOptionsUpdatedEventFunc func( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) + + CanAddEventFunc func() bool + HasRequestIDFunc func(requestID string) bool +} + +func (m mockEventStore) AddWorkflowExecutionOptionsUpdatedEvent( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, +) (*historypb.HistoryEvent, error) { + if m.AddWorkflowExecutionOptionsUpdatedEventFunc != nil { + return m.AddWorkflowExecutionOptionsUpdatedEventFunc(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) + } + return &historypb.HistoryEvent{}, nil } func (m mockEventStore) AddWorkflowExecutionUpdateAcceptedEvent( @@ -103,3 +136,14 @@ func (m mockEventStore) CanAddEvent() bool { } return true } + +func (m mockEventStore) RejectWorkflowExecutionUpdate(_ string, _ *failurepb.Failure) error { + return nil +} + +func (m mockEventStore) HasRequestID(requestID string) bool { + if m.HasRequestIDFunc != nil { + return m.HasRequestIDFunc(requestID) + } + return false +} diff --git a/service/history/workflow/update/update.go b/service/history/workflow/update/update.go index 3d552f5ab46..f84b8e112ae 100644 --- a/service/history/workflow/update/update.go +++ b/service/history/workflow/update/update.go @@ -5,8 +5,10 @@ import ( "errors" "time" + commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" + historypb "go.temporal.io/api/history/v1" protocolpb "go.temporal.io/api/protocol/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" @@ -18,6 +20,14 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) +// pendingCallback holds a AttachCallbacks request that arrived while the Update +// was in stateSent. These are flushed to the event store on acceptance +// in onAcceptanceMsg. In-memory only; lost on registry clear/lock release. +type pendingCallback struct { + requestID string + completionCallbacks []*commonpb.Callback +} + type ( // Update docs are at /docs/architecture/workflow-update.md. Update struct { @@ -42,6 +52,10 @@ type ( checkLimits func(*updatepb.Request) error instrumentation *instrumentation admittedTime time.Time + // pendingCallbacks buffers AttachCallbacks requests that arrive while + // the Update is in stateSent. Flushed to the event store in onAcceptanceMsg. + // Cleared on rejection, abort, or rollback. In-memory only; lost on lock release. + pendingCallbacks []pendingCallback // These fields might be accessed while not holding the workflow lock. accepted future.Future[*failurepb.Failure] @@ -251,6 +265,11 @@ func (u *Update) abort( return } + // Clear any buffered AttachCallbacks callbacks defensively. Abort is called during + // cleanup (e.g., registry clear, workflow close) where a hard error would be + // worse than silently clearing. + u.pendingCallbacks = nil + u.instrumentation.countAborted(u.id, reason) prevState := u.setState(stateProvisionallyAborted) @@ -351,6 +370,134 @@ func (u *Update) Admit( return nil } +// AttachCallbacks attaches completion callbacks from a second caller to an update +// that has already progressed past admission. If the update is accepted, it writes +// a WorkflowExecutionOptionsUpdatedEvent with the caller's callbacks and request ID. +// If the update is in stateSent (sent to worker, not yet accepted), callbacks are +// buffered in memory and flushed when the update is accepted. If the update is +// already completed, returns true without attaching callbacks since the caller +// receives the result synchronously. +// +// Returns (true, nil) if the caller should proceed (callbacks attached or update already completed), +// (false, nil) if the update is in an early state where attachment does not apply, +// or (false, error) if the update is in a transient state where the caller should retry. +func (u *Update) AttachCallbacks( + req *updatepb.Request, + eventStore EventStore, +) (isCallbackAttached bool, err error) { + // Only attach callbacks if the request actually has something to attach. + // This preserves existing behavior for callers that don't set callbacks. + if len(req.GetCompletionCallbacks()) == 0 { + return false, nil + } + if req.GetRequestId() == "" { + return false, serviceerror.NewInvalidArgumentf("invalid %T: request_id is required when completion_callbacks are set", req) + } + + switch u.state { + case stateProvisionallyAccepted, + stateProvisionallyCompleted, + stateProvisionallyCompletedAfterAccepted, + stateProvisionallyAborted: + // Provisional states are transient — they exist only between an event write + // and its OnAfterCommit callback within a single workflow task completion + // transaction. In practice, AttachCallbacks should never see these states because + // a new UpdateWorkflowExecution API call must acquire the workflow lock, + // which means the previous transaction has already committed and provisional + // states have resolved. This guard is kept defensively in case future code + // paths call AttachCallbacks within the same transaction. + return false, serviceerror.NewResourceExhausted(enumspb.RESOURCE_EXHAUSTED_CAUSE_BUSY_WORKFLOW, "workflow update is not yet accepted, please retry") + + case stateSent: + // stateSent: the update has been sent to the worker but not yet accepted. + // Buffer the callbacks in memory; they will be flushed to the event store + // when the update is accepted in onAcceptanceMsg. + // Returning (true, nil) is safe because: + // - The caller already holds the workflow lock + // - A workflow task already exists (the update was sent via one) + // - No new workflow task is needed — just buffer until acceptance + // - The event will be written atomically with acceptance + // If the Update struct is lost (registry cleared), the abort mechanism fires + // registryClearedErr on the caller's future, prompting an immediate retry. + if req.GetRequestId() != "" { + for _, pc := range u.pendingCallbacks { + if pc.requestID == req.GetRequestId() { + return true, nil + } + } + } + u.pendingCallbacks = append(u.pendingCallbacks, pendingCallback{ + requestID: req.GetRequestId(), + completionCallbacks: req.GetCompletionCallbacks(), + }) + return true, nil + + case stateAccepted: + // Persist immediately as its own event, u.persistCallback(...) will dedup against requestIDs + // that are already recorded on the workflow. + return u.persistCallback(eventStore, req.GetRequestId(), req.GetCompletionCallbacks()) + + case stateCompleted: + // If the update is already completed, the result is returned synchronously + // in the UpdateWorkflowExecution response — no callback needed. + return true, nil + + default: + // All other states are too early or not applicable for callback attachment. + return false, nil + } +} + +// persistPendingCallbacks writes one WorkflowExecutionOptionsUpdatedEvent per +// buffered AttachCallbacks callback, skipping any whose requestID is already persisted. +// Called from onAcceptanceMsg after the acceptance event has been written. +// +// NOTE: Each pending callback requires its own event because the API proto's +// WorkflowUpdateOptionsUpdate carries a singular AttachedRequestId, and the +// WorkflowUpdateOptions map is keyed by update ID (all entries here share u.id, +// so only one map entry is possible per event). Each requestID must be durably +// recorded in the event so that ApplyWorkflowExecutionOptionsUpdatedEvent can +// call AttachRequestID during replay for correct deduplication. +// +// In practice, the number of buffered callbacks is very small (1-2): it requires +// multiple concurrent callers to call AttachCallbacks while the update is in +// stateSent. The per-update callback limit (MaxCallbacksPerUpdateID) bounds the +// worst case. +func (u *Update) persistPendingCallbacks(eventStore EventStore) error { + for _, pc := range u.pendingCallbacks { + if _, err := u.persistCallback(eventStore, pc.requestID, pc.completionCallbacks); err != nil { + return err + } + } + u.pendingCallbacks = nil + return nil +} + +// persistCallback writes a single WorkflowExecutionOptionsUpdatedEvent to attach the +// given requestID and completion callbacks, and deduplicate on requestID if already attached. +func (u *Update) persistCallback( + eventStore EventStore, + requestID string, + completionCallbacks []*commonpb.Callback, +) (isCallbackAttached bool, err error) { + // Callback is already attached, no need to update. + if requestID != "" && eventStore.HasRequestID(requestID) { + return true, nil + } + _, err = eventStore.AddWorkflowExecutionOptionsUpdatedEvent( + nil, false, "", nil, nil, "", nil, nil, + []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate{{ + UpdateId: u.id, + AttachedRequestId: requestID, + AttachedCompletionCallbacks: completionCallbacks, + }}, + ) + if err != nil { + return false, err + } + return true, nil +} + // OnProtocolMessage delivers a message to the Update state machine. The Body field of // *protocolpb.Message parameter is expected to be one of *updatepb.Response, // *updatepb.Rejection, *updatepb.Acceptance. Writes to the EventStore @@ -503,6 +650,12 @@ func (u *Update) onAcceptanceMsg( } u.acceptedEventID = event.EventId + // Persist any callbacks that were buffered by AttachCallbacks while in stateSent or stateAdmitted. + // See persistPendingCallbacks for why this writes one event per pending entry. + if err := u.persistPendingCallbacks(eventStore); err != nil { + return err + } + prevState := u.setState(stateProvisionallyAccepted) eventStore.OnAfterCommit(func(context.Context) { if !u.state.Matches(stateSet(stateProvisionallyAccepted | stateProvisionallyCompleted | stateProvisionallyAborted)) { @@ -545,6 +698,7 @@ func (u *Update) onAcceptanceMsg( return } u.acceptedEventID = common.EmptyEventID + u.pendingCallbacks = nil u.setState(prevState) }) return nil @@ -556,7 +710,7 @@ func (u *Update) onAcceptanceMsg( // are both completed with the failurepb.Failure value from the updatepb.Rejection input message. func (u *Update) onRejectionMsg( rej *updatepb.Rejection, - effects effect.Controller, + eventStore EventStore, ) error { // See comment in onAcceptanceMsg about stateAdmitted. if err := u.checkStateSet(rej, stateSet(stateSent|stateAdmitted)); err != nil { @@ -566,7 +720,15 @@ func (u *Update) onRejectionMsg( return err } u.instrumentation.countRejectionMsg() - return u.reject(rej.Failure, effects) + // Notify the event store so it can fire any completion callbacks that were + // registered at admission time (e.g., after reset/reapply) and clean up + // the update's mutable-state entry. + if err := eventStore.RejectWorkflowExecutionUpdate(u.id, rej.Failure); err != nil { + return err + } + // Clear any buffered AttachCallbacks callbacks — they cannot be delivered for a rejected update. + u.pendingCallbacks = nil + return u.reject(rej.Failure, eventStore) } // rejects an Update with provided failure. @@ -574,6 +736,14 @@ func (u *Update) reject( rejectionFailure *failurepb.Failure, effects effect.Controller, ) error { + if len(u.pendingCallbacks) > 0 { + // Invariant: buffer must be cleared before reject. If we reach here, + // there is a bug in the caller (onRejectionMsg should clear the buffer). + return serviceerror.NewInternalf( + "update %s: reject called with %d pending AttachCallbacks callbacks", + u.id, len(u.pendingCallbacks), + ) + } prevState := u.setState(stateProvisionallyCompleted) effects.OnAfterCommit(func(context.Context) { if u.state != stateProvisionallyCompleted { @@ -675,3 +845,7 @@ func (u *Update) GetSize() int { } return size } + +func (u *Update) AcceptedEventID() int64 { + return u.acceptedEventID +} diff --git a/service/history/workflow/update/update_test.go b/service/history/workflow/update/update_test.go index 89c441f1a2b..1a05b98aeca 100644 --- a/service/history/workflow/update/update_test.go +++ b/service/history/workflow/update/update_test.go @@ -6,12 +6,14 @@ import ( "time" "github.com/stretchr/testify/require" + commonpb "go.temporal.io/api/common/v1" . "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" protocolpb "go.temporal.io/api/protocol/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/server/common/effect" "go.temporal.io/server/common/future" "go.temporal.io/server/common/payloads" @@ -1188,3 +1190,410 @@ func assertAborted(t *testing.T, upd *update.Update, expectedErr error) { } } } + +func TestAttachCallbacks(t *testing.T) { + tv := testvars.New(t) + testCallbacks := []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: "http://localhost:1234/callback", + }, + }, + }, + } + testRequest := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: tv.RequestID(), + CompletionCallbacks: testCallbacks, + } + + capturingStore := func(effects *effect.Buffer) (mockEventStore, *[]*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate) { + var captured []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + captured = workflowUpdateOptions + return &historypb.HistoryEvent{}, nil + }, + } + return store, &captured + } + + trackingStore := func(effects *effect.Buffer) (mockEventStore, *bool) { + eventCreated := false + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + eventCreated = true + return &historypb.HistoryEvent{}, nil + }, + } + return store, &eventCreated + } + + countingOptionsStore := func(effects *effect.Buffer) (mockEventStore, *int) { + count := 0 + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + count++ + return &historypb.HistoryEvent{}, nil + }, + } + return store, &count + } + + t.Run("on stateAccepted fires callbacks and returns true", func(t *testing.T) { + effects := &effect.Buffer{} + store, capturedOptions := capturingStore(effects) + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Len(t, *capturedOptions, 1) + require.Equal(t, tv.UpdateID(), (*capturedOptions)[0].UpdateId) + require.Equal(t, tv.RequestID(), (*capturedOptions)[0].AttachedRequestId) + require.Equal(t, testCallbacks, (*capturedOptions)[0].AttachedCompletionCallbacks) + }) + + t.Run("on stateCompleted returns true without attaching callbacks", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewCompleted(tv.UpdateID(), future.NewReadyFuture[*updatepb.Outcome](successOutcome, nil)) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.False(t, *eventCreated, "should not attach callbacks when update is already completed") + }) + + t.Run("on stateCreated returns false without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.New(tv.UpdateID()) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.False(t, fired) + require.False(t, *eventCreated) + }) + + t.Run("on stateAdmitted returns false without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewAdmitted(tv.UpdateID(), nil) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.False(t, fired) + require.False(t, *eventCreated) + }) + + t.Run("on stateSent buffers callbacks and returns true", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + msg := send(t, upd, skipAlreadySent) + require.NotNil(t, msg) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Accept the update — this should flush the buffered callbacks. + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, "should flush one buffered callback on acceptance") + }) + + t.Run("on stateSent dedup by requestID buffers only once", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + // Call AttachCallbacks twice with the same requestID. + fired1, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired1) + fired2, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, "duplicate requestID should be deduped, only one event written") + }) + + t.Run("on stateSent multiple different requestIDs", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + req1 := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: "request-1", + CompletionCallbacks: testCallbacks, + } + req2 := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: "request-2", + CompletionCallbacks: testCallbacks, + } + fired1, err := upd.AttachCallbacks(req1, store) + require.NoError(t, err) + require.True(t, fired1) + fired2, err := upd.AttachCallbacks(req2, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 2, *optionsEventCount, "two different requestIDs should produce two events") + }) + + t.Run("on stateSent flush skips already-persisted requestID", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return requestID == tv.RequestID() + } + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, "already-persisted requestID should be skipped during flush") + }) + + t.Run("on stateSent flush error fails acceptance", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + return nil, serviceerror.NewInternal("flush error") + }, + } + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + err = accept(t, store, upd) + require.Error(t, err) + require.ErrorContains(t, err, "flush error") + }) + + t.Run("provisional states still return ResourceExhausted", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{Controller: effects} + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + // Accept but do NOT apply effects — update is in stateProvisionallyAccepted. + require.NoError(t, accept(t, store, upd)) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.False(t, fired) + require.Error(t, err) + var resourceExhaustedErr *serviceerror.ResourceExhausted + require.ErrorAs(t, err, &resourceExhaustedErr) + }) + + t.Run("on stateSent rejection clears buffer", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + err = reject(t, store, upd) + require.NoError(t, err) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, "rejected update should not flush buffered callbacks") + }) + + t.Run("buffered callbacks lost when Update struct is recreated", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Simulate Update struct being lost — create a new one from mutable state. + upd2 := update.NewAdmitted(tv.UpdateID(), nil) + require.NoError(t, accept(t, store, upd2)) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, + "callbacks buffered on the lost Update struct should NOT be flushed on the new struct's acceptance") + }) + + t.Run("same requestID can be re-buffered on new Update struct after loss", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Simulate loss — new struct from mutable state. + upd2 := update.NewAdmitted(tv.UpdateID(), nil) + _ = send(t, upd2, skipAlreadySent) + + // Same requestID can buffer again on new struct. + fired2, err := upd2.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd2)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, + "re-buffered callbacks on new struct should be flushed on acceptance") + }) + + t.Run("re-buffered requestID deduped against persisted state after loss", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return requestID == tv.RequestID() + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Equal(t, 0, *optionsEventCount, + "already-persisted requestID should not write another event") + }) + + t.Run("with EventStore error returns error", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + return nil, serviceerror.NewInternal("store error") + }, + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.False(t, fired) + require.Error(t, err) + require.ErrorContains(t, err, "store error") + }) + + t.Run("skips event when request has no callbacks and no request ID", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + emptyRequest := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + } + + fired, err := upd.AttachCallbacks(emptyRequest, store) + require.NoError(t, err) + require.False(t, fired, "should return false when no callbacks to attach — preserves existing caller behavior") + require.False(t, *eventCreated, "should not create event when no callbacks and no request ID") + }) + + t.Run("dedup by requestID on stateAccepted returns true without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + eventCreated := false + store := mockEventStore{ + Controller: effects, + HasRequestIDFunc: func(requestID string) bool { + return requestID == tv.RequestID() + }, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + eventCreated = true + return &historypb.HistoryEvent{}, nil + }, + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired, "should return true so caller can wait on existing update") + require.False(t, eventCreated, "should not create event for duplicate requestID") + }) + + t.Run("different requestID on stateAccepted creates event normally", func(t *testing.T) { + effects := &effect.Buffer{} + store, capturedOptions := capturingStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return false // different requestID, not seen before + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Len(t, *capturedOptions, 1) + require.Equal(t, tv.UpdateID(), (*capturedOptions)[0].UpdateId) + require.Equal(t, tv.RequestID(), (*capturedOptions)[0].AttachedRequestId) + }) +} diff --git a/service/history/workflow/update/validation.go b/service/history/workflow/update/validation.go index 0308e00e318..a59d4953b1d 100644 --- a/service/history/workflow/update/validation.go +++ b/service/history/workflow/update/validation.go @@ -44,6 +44,15 @@ func validateRequestMsg(updateID string, msg *updatepb.Request) error { return validateRequestMsgPrefix(updateID, "", msg) } +func callbacksRequireRequestID(msg *updatepb.Request) func() error { + return func() error { + if len(msg.GetCompletionCallbacks()) > 0 && msg.GetRequestId() == "" { + return serviceerror.NewInvalidArgumentf("invalid %T: request_id is required when completion_callbacks are set", msg) + } + return nil + } +} + func validateRequestMsgPrefix( updateID string, prefix string, @@ -56,6 +65,7 @@ func validateRequestMsgPrefix( eq(msg.GetMeta().GetUpdateId(), prefix+"meta.update_id", updateID, updateID, msg), notZero(msg.GetInput(), prefix+"input", msg), notZero(msg.GetInput().GetName(), prefix+"input.name", msg), + callbacksRequireRequestID(msg), ) } diff --git a/tests/nexus_workflow_update_test.go b/tests/nexus_workflow_update_test.go new file mode 100644 index 00000000000..6b95c4132c0 --- /dev/null +++ b/tests/nexus_workflow_update_test.go @@ -0,0 +1,1359 @@ +package tests + +import ( + "context" + "encoding/json" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/google/uuid" + "github.com/nexus-rpc/sdk-go/nexus" + "github.com/stretchr/testify/require" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + updatepb "go.temporal.io/api/update/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/client" + "go.temporal.io/sdk/temporal" + "go.temporal.io/sdk/worker" + "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common" + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/nexus/nexustest" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/common/testing/parallelsuite" + "go.temporal.io/server/tests/testcore" +) + +type NexusWorkflowUpdateTestSuite struct { + parallelsuite.Suite[*NexusWorkflowUpdateTestSuite] +} + +func TestNexusWorkflowUpdateTestSuite(t *testing.T) { + parallelsuite.Run(t, &NexusWorkflowUpdateTestSuite{}) +} + +// updateNexusTestConfig holds configuration for workflow update + nexus integration tests. +type updateNexusTestConfig struct { + taskQueue string + childWfID string + updateID string +} + +// newUpdateNexusTestConfig creates a config with randomized names to avoid collisions. +func newUpdateNexusTestConfig(t *testing.T) updateNexusTestConfig { + return updateNexusTestConfig{ + taskQueue: testcore.RandomizeStr(t.Name()), + childWfID: testcore.RandomizeStr("child-workflow-id"), + updateID: "update-id", + } +} + +// makeUpdateWithCallbackHandler creates a nexus handler that sends a workflow update with +// completion callbacks to the specified child workflow. onStart is an optional callback +// invoked at the start of each operation (e.g. for counting invocations). +// If the update is already completed (e.g., the workflow has finished), the handler returns +// the result synchronously instead of starting an async operation with callbacks. +func makeUpdateWithCallbackHandler( + env *NexusTestEnv, + t *testing.T, + cfg updateNexusTestConfig, + onStart func(), +) nexustest.Handler { + return nexustest.Handler{ + OnStartOperation: func( + ctx context.Context, + service, operation string, + input *nexus.LazyValue, + options nexus.StartOperationOptions, + ) (nexus.HandlerStartOperationResult[any], error) { + if onStart != nil { + onStart() + } + resp, err := env.FrontendClient().UpdateWorkflowExecution( + ctx, + &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: cfg.updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testcore.MustToPayload(t, "test")}, + }, + }, + RequestId: uuid.NewString(), + CompletionCallbacks: []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: options.CallbackURL, + Header: options.CallbackHeader, + }, + }, + }, + }, + }, + }, + ) + if err != nil { + return nil, nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "update call failed: %v", err) + } + // Verify the response contains a link. + link := resp.GetLink() + require.NotNil(t, link, "update response should contain a link") + if workflowEvent := link.GetWorkflowEvent(); workflowEvent != nil { + // Accepted/completed update: link points to the accepted event. + require.Equal(t, cfg.childWfID, workflowEvent.GetWorkflowId()) + require.Equal(t, enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED, workflowEvent.GetRequestIdRef().GetEventType()) + } else if wfLink := link.GetWorkflow(); wfLink != nil { + // Rejected update: link points to the workflow with a reason. + require.Equal(t, cfg.childWfID, wfLink.GetWorkflowId()) + require.Equal(t, "Update rejected", wfLink.GetReason()) + } else { + require.Fail(t, "link should be a workflow event or workflow link") + } + // If the update is already completed, return the result synchronously. + if outcome := resp.GetOutcome(); outcome != nil { + if failure := outcome.GetFailure(); failure != nil { + return nil, &nexus.OperationError{ + State: nexus.OperationStateFailed, + Message: failure.GetMessage(), + } + } + if success := outcome.GetSuccess(); success != nil && len(success.GetPayloads()) > 0 { + var result string + if jsonErr := json.Unmarshal(success.GetPayloads()[0].GetData(), &result); jsonErr == nil { + return &nexus.HandlerStartOperationResultSync[any]{Value: result}, nil + } + } + } + return &nexus.HandlerStartOperationResultAsync{ + OperationToken: "test", + }, nil + }, + } +} + +func enableUpdateCallbacksOpts() []testcore.TestOption { + return []testcore.TestOption{ + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMCallbacks, true), + testcore.WithDynamicConfig(dynamicconfig.EnableWorkflowUpdateCallbacks, true), + } +} + +// newUpdateChildWorkflow returns a child workflow function that registers an "update" +// handler and waits for a "stop" signal. If blockOnSignal is true, the update handler +// blocks on a "complete-update" signal before returning, which is useful for ensuring +// the update goes through the async path. +func newUpdateChildWorkflow(blockOnSignal bool) func(workflow.Context, string) (string, error) { + return func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + if blockOnSignal { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + } + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } +} + +// getFirstWFTaskCompleteEventID scans the workflow history and returns the event ID +// of the first WorkflowTaskCompleted event. +func (s *NexusWorkflowUpdateTestSuite) getFirstWFTaskCompleteEventID(ctx context.Context, env *NexusTestEnv, workflowID, runID string) int64 { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_TASK_COMPLETED { + return event.EventId + } + } + s.FailNow("couldn't find a WorkflowTaskCompleted event", "workflowID=%s runID=%s", workflowID, runID) + return 0 +} + +// newSimpleCallerWF returns a caller workflow that executes a nexus operation targeting +// childWfID and returns the string result. +func (s *NexusWorkflowUpdateTestSuite) newSimpleCallerWF(endpointName, childWfID string) func(workflow.Context) (string, error) { + return func(ctx workflow.Context) (string, error) { + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWfID, workflow.NexusOperationOptions{}) + var result string + err := fut.Get(ctx, &result) + return result, err + } +} + +// awaitUpdateAccepted polls the workflow history until a WorkflowExecutionUpdateAccepted +// event is found, failing the test if it does not appear within 10 seconds. +func (s *NexusWorkflowUpdateTestSuite) awaitUpdateAccepted(ctx context.Context, env *NexusTestEnv, workflowID, runID string) { + await.Require(env.Context(), s.T(), func(t *await.T) { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + require.NoError(t, err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + return + } + } + require.Fail(t, "update not yet accepted") + }, 10*time.Second, 500*time.Millisecond) +} + +// startWorker creates a worker on the given task queue, registers wfs, starts it, +// and schedules cleanup. +func (s *NexusWorkflowUpdateTestSuite) startWorker(env *NexusTestEnv, taskQueue string, wfs ...any) { + w := worker.New(env.SdkClient(), taskQueue, worker.Options{}) + for _, wf := range wfs { + w.RegisterWorkflow(wf) + } + s.NoError(w.Start()) + s.T().Cleanup(w.Stop) +} + +// requireNexusOperationError asserts that err is a WorkflowExecutionError with an inner NexusOperationError, +// and returns the inner NexusOperationError. +func (s *NexusWorkflowUpdateTestSuite) requireNexusOperationError(err error) *temporal.NexusOperationError { + var wee *temporal.WorkflowExecutionError + s.ErrorAs(err, &wee) + var noe *temporal.NexusOperationError + s.ErrorAs(wee, &noe) + return noe +} + +// assertAcceptedUpdateCompletedWorkflowError asserts the full error chain: +// WorkflowExecutionError -> NexusOperationError -> ApplicationError{Type: "AcceptedUpdateCompletedWorkflow"}. +// Used to assert the correct error for completion callbacks that failed because the update didn't complete +// before the workflow finishes. +func (s *NexusWorkflowUpdateTestSuite) assertAcceptedUpdateCompletedWorkflowError(err error) { + noe := s.requireNexusOperationError(err) + var appErr *temporal.ApplicationError + s.ErrorAs(noe, &appErr) + s.Equal("AcceptedUpdateCompletedWorkflow", appErr.Type()) +} + +// assertReappliedUpdateInNewRun verifies that updateID appears as an UpdateAdmitted event +// in runID's history with completion callbacks preserved. +func (s *NexusWorkflowUpdateTestSuite) assertReappliedUpdateInNewRun(ctx context.Context, env *NexusTestEnv, workflowID, runID, updateID string) { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + found := false + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ADMITTED { + attrs := event.GetWorkflowExecutionUpdateAdmittedEventAttributes() + if attrs.GetRequest().GetMeta().GetUpdateId() == updateID { + found = true + s.NotEmpty(attrs.GetRequest().GetCompletionCallbacks(), "reapplied update should preserve completion callbacks") + } + } + } + s.True(found, "expected reapplied UpdateAdmitted event in new run") +} + +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateAsyncNexusOperation() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(false) + + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result string + err := fut.Get(ctx, &result) + return result, err + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test", result) + + // Verify the child workflow's history contains the update accepted event with callbacks. + childHistory := env.SdkClient().GetWorkflowHistory(ctx, cfg.childWfID, "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + foundUpdateAccepted := false + for childHistory.HasNext() { + event, err := childHistory.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + foundUpdateAccepted = true + attrs := event.GetWorkflowExecutionUpdateAcceptedEventAttributes() + s.NotNil(attrs) + s.Equal(cfg.updateID, attrs.GetAcceptedRequest().GetMeta().GetUpdateId()) + s.NotEmpty(attrs.GetAcceptedRequest().GetCompletionCallbacks()) + break + } + } + s.True(foundUpdateAccepted, "expected to find WorkflowExecutionUpdateAccepted event in child workflow history") +} + +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateAsyncAttachedNexusOperation() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(true) + + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var exec workflow.NexusOperationExecution + if err := fut.GetNexusOperationExecution().Get(ctx, &exec); err != nil { + return "", err + } + // Send a second update to verify attaching after starting works. + afut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var aexec workflow.NexusOperationExecution + if err := afut.GetNexusOperationExecution().Get(ctx, &aexec); err != nil { + return "", err + } + // Signal the child to complete the update now that both operations are attached. + if err := workflow.SignalExternalWorkflow(ctx, childWE.ID, "", "complete-update", nil).Get(ctx, nil); err != nil { + return "", err + } + var aresult string + if err := afut.Get(ctx, &aresult); err != nil { + return "", err + } + + var result string + err := fut.Get(ctx, &result) + return result, err + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 10 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test", result) +} + +// TestWorkflowUpdateCallbackOnAlreadyCompletedUpdate verifies that when a second caller +// sends an update request with the same update ID after the update has already completed, +// the second request returns the result synchronously without attaching a new callback. +// The child workflow should only have one update callback (from the first request). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateNoCallbackAttachedOnAlreadyCompletedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "already-completed-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(false) + + // Caller workflow sends two nexus operations targeting the same update. + // The first one triggers the update, the second one arrives after it completes + // and should still get the result via AttachCallbacks. + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + + // First nexus operation: triggers the update. + fut1 := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result1 string + if err := fut1.Get(ctx, &result1); err != nil { + return "", err + } + + // Second nexus operation: targets the same already-completed update. + fut2 := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result2 string + if err := fut2.Get(ctx, &result2); err != nil { + return "", err + } + + return result1 + " | " + result2, nil + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test | updated: test", result) + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") + + // Verify the child workflow has exactly one update callback (from the first request). + // The second request returns synchronously because the update is already completed, + // so no additional callback is attached. + descResp, err := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + }, + }) + s.NoError(err) + updateCallbackCount := 0 + for _, cb := range descResp.GetCallbacks() { + if cb.GetTrigger().GetUpdateWorkflowExecutionCompleted() != nil { + updateCallbackCount++ + } + } + s.Equal(1, updateCallbackCount, "expected exactly one update callback on the child workflow") + + // Verify the child workflow has the correct request ID infos. + // Each nexus operation generates a unique request ID. If the second operation + // (targeting the already-completed update) had attached its request ID, we would + // see 3 entries instead of 2, or an OPTIONS_UPDATED entry. The count of 2 with + // only STARTED and UPDATE_ACCEPTED types proves the second request ID was not attached. + sdkDescResp, err := env.SdkClient().DescribeWorkflowExecution(ctx, cfg.childWfID, "") + s.NoError(err) + requestIDInfos := sdkDescResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.NotNil(requestIDInfos) + s.Len(requestIDInfos, 2, "expected exactly 2 request ID infos: second operation should not attach") + cntStarted := 0 + cntAccepted := 0 + for _, info := range requestIDInfos { + s.False(info.Buffered) + s.GreaterOrEqual(info.EventId, common.FirstEventID) + s.NotEqual( + enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, + info.EventType, + "second operation targeting completed update should not create an OPTIONS_UPDATED request ID", + ) + switch info.EventType { + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED: + cntStarted++ + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED: + cntAccepted++ + default: + s.Failf("unexpected event type in request ID info", "got %v", info.EventType) + } + } + s.Equal(1, cntStarted, "expected one STARTED request ID info") + s.Equal(1, cntAccepted, "expected one UPDATE_ACCEPTED request ID info from first update acceptance") +} + +// TestDescribeWorkflowShowsUpdateCallbacks verifies that DescribeWorkflowExecution +// returns update-level callbacks after an update with callbacks is sent. +func (s *NexusWorkflowUpdateTestSuite) TestDescribeWorkflowShowsUpdateCallbacks() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "describe-callback-update-id" + callbackURL := "http://localhost:9999/callback" + + wf := func(ctx workflow.Context) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + // Wait for a signal so update stays in-progress while we describe. + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done", nil + } + + s.startWorker(env, taskQueue, wf) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: taskQueue, + }, wf) + s.NoError(err) + + // Send update with completion callbacks (don't wait for completion). + testPayload := testcore.MustToPayload(s.T(), "test") + updateDone := make(chan struct{}) + go func() { + defer close(updateDone) + _, _ = env.FrontendClient().UpdateWorkflowExecution(ctx, &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: run.GetID(), + RunId: run.GetRunID(), + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testPayload}, + }, + }, + RequestId: uuid.NewString(), + CompletionCallbacks: []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: callbackURL, + }, + }, + }, + }, + }, + }) + }() + + // Wait until the update is accepted by checking DescribeWorkflowExecution. + await.Require(env.Context(), s.T(), func(t *await.T) { + desc, err := env.SdkClient().DescribeWorkflowExecution(ctx, run.GetID(), run.GetRunID()) + require.NoError(t, err) + require.NotNil(t, desc.GetCallbacks(), "callbacks should be present") + found := false + for _, cb := range desc.GetCallbacks() { + if cb.GetCallback().GetNexus().GetUrl() == callbackURL { + found = true + // Verify the trigger references the update. + trigger := cb.GetTrigger() + require.NotNil(t, trigger) + updateTrigger := trigger.GetUpdateWorkflowExecutionCompleted() + if updateTrigger != nil { + require.Equal(t, updateID, updateTrigger.GetUpdateId()) + } + } + } + require.True(t, found, "expected to find callback with URL %s", callbackURL) + }, 10*time.Second, 500*time.Millisecond) + + // Complete the update and stop the workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "complete-update", nil)) + <-updateDone + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetInflightUpdate verifies that when a workflow is +// reset while an update with completion callbacks is in-flight (accepted but not completed), +// the update is reapplied in the new run and the callback fires when the update completes. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetInflightUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on "complete-update" signal so the update + // stays in-flight while we perform the reset. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + // Start target workflow independently (not as child) to avoid parent-child + // complications during reset. + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation that triggers the update with callbacks. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target workflow. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, targetRun.GetRunID()) + + // Reset the target workflow to the first WFT completed event (before the update). + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with inflight update", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // Verify the update was reapplied in the new run's history. + s.assertReappliedUpdateInNewRun(ctx, env, cfg.childWfID, resetResp.RunId, cfg.updateID) + + // Signal the new run to complete the update, which should trigger the callback. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "complete-update", nil)) + + // The callback fires -> nexus operation completes -> caller gets the result. + var result string + s.NoError(callerRun.Get(ctx, &result)) + s.Equal("updated: test", result) + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetRejectedUpdate verifies that when a workflow is +// reset while an update with completion callbacks is in-flight (accepted but not completed), +// and the new run's workflow code rejects the reapplied update via a validator, the +// completion callback fires with a failure and the caller's nexus operation fails. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetRejectedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Use a shared flag to switch behavior between runs. In the first run the + // update is accepted (and blocks); after we flip the flag the validator + // rejects every update. + var shouldReject atomic.Bool + + // Single workflow function used for both runs. + targetWF := func(ctx workflow.Context, input string) (string, error) { + err := workflow.SetUpdateHandlerWithOptions(ctx, "update", + func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }, + workflow.UpdateHandlerOptions{ + Validator: func(ctx workflow.Context, input string) error { + if shouldReject.Load() { + return errors.New("update rejected after reset") + } + return nil + }, + }, + ) + if err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation that triggers the update with callbacks. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target workflow. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, targetRun.GetRunID()) + + // Flip the flag so the validator rejects updates in the new run. + shouldReject.Store(true) + + // Reset the target workflow to the first WFT completed event (before the update). + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with inflight update expecting rejection", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // Verify the update was reapplied in the new run's history. + s.assertReappliedUpdateInNewRun(ctx, env, cfg.childWfID, resetResp.RunId, cfg.updateID) + + // The reapplied update is rejected by the validator -> callback fires with failure -> + // nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the reapplied update was rejected") + + // Verify it's a NexusOperationError wrapping the rejection failure. + _ = s.requireNexusOperationError(err) + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetCompletedUpdate verifies that when a workflow is +// reset after an update with callbacks has already completed, the update is reapplied in +// the new run, completes again, and a new nexus operation targeting the same update ID +// receives the result via the AttachCallbacks path. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetCompletedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "reset-completed-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler completes immediately. + targetWF := newUpdateChildWorkflow(false) + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a single nexus operation. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + // First caller: triggers the update, it completes, callback fires. + run1, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result1 string + s.NoError(run1.Get(ctx, &result1)) + s.Equal("updated: test", result1) + + // Reset the target workflow to before the update. + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with completed update", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // The update is reapplied and completes again in the new run. + // Wait for the update to complete in the new run before sending the second operation. + await.Require(env.Context(), s.T(), func(t *await.T) { + hist := env.SdkClient().GetWorkflowHistory(ctx, cfg.childWfID, resetResp.RunId, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + require.NoError(t, err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_COMPLETED { + return + } + } + require.Fail(t, "update not yet completed in new run") + }, 10*time.Second, 500*time.Millisecond) + + // Second caller: sends a new nexus operation targeting the same update ID. + // Since the update is already completed in the new run, AttachCallbacks fires the callback. + run2, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result2 string + s.NoError(run2.Get(ctx, &result2)) + s.Equal("updated: test", result2) + + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateSyncReturnForCompletedWorkflow verifies that when a second nexus +// operation targets the same update ID on a workflow that has already completed, the +// handler detects the update is already completed and returns the result synchronously +// (instead of starting an async operation with callbacks). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateSyncReturnForCompletedWorkflow() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "sync-return-completed-wf-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler completes immediately. + targetWF := newUpdateChildWorkflow(false) + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a single nexus operation. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + // First caller: triggers the update, it completes, callback fires. + run1, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result1 string + s.NoError(run1.Get(ctx, &result1)) + s.Equal("updated: test", result1) + + // Complete the target workflow by sending the "stop" signal. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, targetRun.GetRunID(), "stop", nil)) + + // Wait for the target workflow to complete. + var targetResult string + s.NoError(targetRun.Get(ctx, &targetResult)) + + // Second caller: sends a new nexus operation targeting the same update ID. + // Since the workflow is completed and the update was already completed, + // UpdateWorkflowExecution returns the outcome directly -> handler returns sync. + run2, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result2 string + s.NoError(run2.Get(ctx, &result2)) + s.Equal("updated: test", result2) + + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") +} + +// TestWorkflowUpdateCallbackOnFailedUpdate verifies that when an update handler returns +// an error (update completes with a failure outcome), the completion callback fires and +// the caller's nexus operation completes with a failure. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnFailedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "failed-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler returns an error after acceptance. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + return "", temporal.NewApplicationError("update handler failed", "UpdateFailed", nil) + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // The update is accepted but the handler returns an error -> update completes with + // failure -> callback fires -> nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the update failed") + + // Verify it's a NexusOperationError wrapping the update failure. + _ = s.requireNexusOperationError(err) + + // Clean up: stop the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) +} + +// TestWorkflowUpdateCallbackOnWorkflowTerminate verifies that when a workflow is +// terminated while an update with completion callbacks is in-flight (accepted, handler +// blocking), the ProcessCloseCallbacks mechanism fires the callback and the caller's +// nexus operation completes. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowTerminate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "terminate-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Terminate the target workflow while the update is in-flight. + // ProcessCloseCallbacks should fire the update-level callbacks. + s.NoError(env.SdkClient().TerminateWorkflow(ctx, cfg.childWfID, "", "testing terminate with inflight update callback")) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target was terminated). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target was terminated") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowComplete verifies that when a workflow completes +// normally while an update with completion callbacks is in-flight (accepted, handler +// blocking), the ProcessCloseCallbacks mechanism fires the callback and the caller's +// nexus operation completes with a failure (the run closes without completing the update). +// This exercises mutable_state_impl.go processCloseCallbacksChasm -> wf.ProcessCloseCallbacks. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowComplete() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "complete-wf-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Update handler blocks on "complete-update" signal so the update stays in-flight + // while the workflow itself completes via the "stop" signal. + targetWF := newUpdateChildWorkflow(true) + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Complete the target workflow normally while the update is still in-flight. + // processCloseCallbacksChasm fires the update-level callbacks on workflow close. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) + + // The callback fires -> nexus operation completes with failure -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target completed while update was in-flight") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowContinueAsNew verifies that when a workflow +// continues-as-new while an update with completion callbacks is in-flight (accepted, +// handler blocking), the update callbacks are fired and the caller's nexus operation +// completes with a failure (the old run is closed). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowContinueAsNew() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "continue-as-new-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + // When "continue-as-new" signal is received, the workflow continues as new. + var targetWF func(ctx workflow.Context, input string) (string, error) + targetWF = func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "continue-as-new") + signalCh.Receive(ctx, nil) + return "", workflow.NewContinueAsNewError(ctx, targetWF, "continued") + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Signal the target workflow to continue-as-new while the update is in-flight. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "continue-as-new", nil)) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target continued as new and the update was aborted). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target continued as new") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowFailedWithRetry verifies that when a workflow +// fails with a retry policy (RetryState=IN_PROGRESS) while an update with completion +// callbacks is in-flight (accepted, handler blocking), the update callbacks are fired +// and the caller's nexus operation completes with a failure (the old run is closed). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowFailedWithRetry() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "failed-retry-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + // When "fail" signal is received, the workflow returns an error (which will + // be retried due to the retry policy). + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "fail") + signalCh.Receive(ctx, nil) + return "", errors.New("intentional failure for retry test") + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + RetryPolicy: &temporal.RetryPolicy{ + InitialInterval: 1 * time.Second, + MaximumAttempts: 3, + BackoffCoefficient: 1, + }, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Signal the target workflow to fail while the update is in-flight. + // The retry policy will cause a new run to be created. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "fail", nil)) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target failed and the update was aborted). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target workflow failed with retry") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnRejectedUpdate verifies that when an update is rejected +// by the workflow's validator, the nexus handler detects the rejection (which is returned +// as a completed update with a failure outcome) and returns a synchronous failure to the +// caller. This tests the proper handling of rejection in the callback flow. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnRejectedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "rejected-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: validator rejects all updates. + targetWF := func(ctx workflow.Context, input string) (string, error) { + err := workflow.SetUpdateHandlerWithOptions(ctx, "update", + func(ctx workflow.Context, input string) (string, error) { + return "updated: " + input, nil + }, + workflow.UpdateHandlerOptions{ + Validator: func(ctx workflow.Context, input string) error { + return errors.New("update rejected by validator") + }, + }, + ) + if err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // The update is rejected by the validator -> nexus handler detects rejection and + // returns sync failure -> nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the update was rejected") + + // Verify it's a NexusOperationError containing the rejection message. + noe := s.requireNexusOperationError(err) + s.Contains(noe.Error(), "update rejected by validator") + + // Clean up: stop the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) +} + +// TestWorkflowUpdateRequestIDInAcceptedEvent verifies that when an update request includes +// a RequestId, it is preserved in the WorkflowExecutionUpdateAccepted event's AcceptedRequest. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateRequestIDInAcceptedEvent() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "request-id-accepted-test" + requestID := uuid.NewString() + + wf := newUpdateChildWorkflow(false) + s.startWorker(env, taskQueue, wf) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: taskQueue, + }, wf, "initial input") + s.NoError(err) + + // Send an update with a specific RequestId and wait for completion. + _, err = env.FrontendClient().UpdateWorkflowExecution(ctx, &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: run.GetID(), + RunId: run.GetRunID(), + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testcore.MustToPayload(s.T(), "test")}, + }, + }, + RequestId: requestID, + }, + }) + s.NoError(err) + + // Verify the accepted event contains the request ID in the AcceptedRequest. + hist := env.SdkClient().GetWorkflowHistory(ctx, run.GetID(), run.GetRunID(), false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + foundAccepted := false + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + foundAccepted = true + attrs := event.GetWorkflowExecutionUpdateAcceptedEventAttributes() + s.NotNil(attrs) + s.Equal(updateID, attrs.GetAcceptedRequest().GetMeta().GetUpdateId()) + s.Equal(requestID, attrs.GetAcceptedRequest().GetRequestId()) + break + } + } + s.True(foundAccepted, "expected to find WorkflowExecutionUpdateAccepted event") + + // Clean up. + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +} diff --git a/tests/update_workflow_sdk_test.go b/tests/update_workflow_sdk_test.go index 48ce8124797..8dc4902469d 100644 --- a/tests/update_workflow_sdk_test.go +++ b/tests/update_workflow_sdk_test.go @@ -6,14 +6,18 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/stretchr/testify/suite" + commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" "go.temporal.io/api/workflowservice/v1" sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" + "go.temporal.io/sdk/worker" "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/testing/testvars" "go.temporal.io/server/tests/testcore" @@ -398,3 +402,95 @@ func (s *UpdateWorkflowSdkSuite) pollUpdate(ctx context.Context, tv *testvars.Te WaitPolicy: waitPolicy, }) } + +// TestUpdateSameRequestIDDeduplicatesCallbacks verifies requestID-based +// deduplication in AttachCallbacks. The update blocks (stays in stateAccepted), then: +// - A second request with the same requestID is deduped (no new callback). +// - A third request with a different requestID creates an additional callback. +// +// The workflow should end up with exactly 2 update callbacks (from requestID1 and requestID2). +func (s *UpdateWorkflowSdkSuite) TestUpdateSameRequestIDDeduplicatesCallbacks() { + s.OverrideDynamicConfig(dynamicconfig.EnableChasm, true) + s.OverrideDynamicConfig(dynamicconfig.EnableCHASMCallbacks, true) + s.OverrideDynamicConfig(dynamicconfig.EnableWorkflowUpdateCallbacks, true) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "dedup-callbacks-test" + requestID1 := uuid.NewString() + requestID2 := uuid.NewString() + + // Workflow where the update handler blocks until signaled. + wf := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + w := worker.New(s.SdkClient(), taskQueue, worker.Options{}) + w.RegisterWorkflow(wf) + s.NoError(w.Start()) + s.T().Cleanup(w.Stop) + + run, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf"), + TaskQueue: taskQueue, + }, wf, "input") + s.NoError(err) + + makeRequest := func(reqID string) *workflowservice.UpdateWorkflowExecutionRequest { + return &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: run.GetID()}, + WaitPolicy: &updatepb.WaitPolicy{LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED}, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: updateID}, + Input: &updatepb.Input{Name: "update", Args: &commonpb.Payloads{Payloads: []*commonpb.Payload{testcore.MustToPayload(s.T(), "test")}}}, + RequestId: reqID, + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost:9999/callback"}}, + }}, + }, + } + } + + // First request: triggers the update, waits for acceptance (update blocks in handler). + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID1)) + s.NoError(err) + + // Second request: same requestID → should be deduped by AttachCallbacks (no new callback). + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID1)) + s.NoError(err) + + // Third request: different requestID → should create a new callback via AttachCallbacks. + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID2)) + s.NoError(err) + + // Verify exactly 2 update callbacks: one from requestID1 (first request), + // one from requestID2 (third request). The second request was deduped. + descResp, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: run.GetID()}, + }) + s.NoError(err) + updateCallbackCount := 0 + for _, cb := range descResp.GetCallbacks() { + if cb.GetTrigger().GetUpdateWorkflowExecutionCompleted() != nil { + updateCallbackCount++ + } + } + s.Equal(2, updateCallbackCount, "expected 2 callbacks: requestID1 (original) + requestID2 (new), with duplicate requestID1 deduped") + + // Clean up. + s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "complete-update", nil)) + s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +}